採用上文中的第一種方法實現:
host.c
#include
#include
#pragma warning( disable : 4996 )
#define matrix_dim 1*1024
int main()
//建立上下文
context = clcreatecontext(null, 1, &devices, null, null, &error);
if (error != 0)
//建立程式
program_handle = fopen("kernel.cl", "rb");
if (program_handle == null)
fseek(program_handle, 0, seek_end);
program_size = ftell(program_handle);
rewind(program_handle);
program_buffer = (char *)malloc(program_size + 1);
program_buffer[program_size] = '\0';
error = fread(program_buffer, sizeof(char), program_size, program_handle);
if (error == 0)
fclose(program_handle);
program = clcreateprogramwithsource(context, 1, (const
char **)&program_buffer, &program_size, &error);
if (error < 0)
free(program_buffer);
//編譯程式
error = clbuildprogram(program, 1, &devices, null, null, null);
if (error < 0)
//建立命令佇列
queue = clcreatecommandqueue(context, devices, cl_queue_profiling_enable, &error);
if (error < 0)
//建立核心
kernel = clcreatekernel(program, kernel_name, &error);
if (kernel == null)
//初始化引數
float result[matrix_dim + 31];
float a_in[32];
float b_in[matrix_dim];
float c_in[matrix_dim + 31];
for (int i = 0; i < matrix_dim; i++)
for (int i = 0; i < matrix_dim + 31; i++)
for (int i = 0; i < 32; i++)
for (int j = 0; j < matrix_dim+31 ; j++) }}
printf("\n");
//建立快取物件
cl_mem memobject1 = clcreatebuffer(context, cl_mem_read_only | cl_mem_copy_host_ptr, sizeof(float) * matrix_dim, a_in, &error);
if (error < 0)
cl_mem memobject2 = clcreatebuffer(context, cl_mem_read_only | cl_mem_copy_host_ptr, sizeof(float) * matrix_dim, b_in, &error);
if (error < 0)
cl_mem memobject3 = clcreatebuffer(context, cl_mem_write_only, sizeof(float) * matrix_dim, null, &error);
if (error < 0)
//設定核心引數
error = clsetkernelarg(kernel, 0, sizeof(cl_mem), &memobject1);
error |= clsetkernelarg(kernel, 1, sizeof(cl_mem), &memobject2);
error |= clsetkernelarg(kernel, 2, sizeof(cl_mem), &memobject3);
if (error != cl_success)
//執行核心
size_t globalworksize[1] = ;
size_t localworksize[1] = ;
error = clenqueuendrangekernel(queue, kernel, 1, null, globalworksize, localworksize, 0, null, null);
if (error != cl_success)
//讀取執行結果
error = clenqueuereadbuffer(queue, memobject3, cl_true, 0, matrix_dim * sizeof(float), result, 0, null, null);
if (error != cl_success)
//顯示結果
int check = 1;
for (int i = 0; i < matrix_dim; i++)
}printf("\n");
if (check)
printf("successed!\n");
else
printf("failed!\n");
clreleaseprogram(program);
clreleasecontext(context);
clreleasekernel(kernel);
clreleasecommandqueue(queue);
clreleasememobject(memobject1);
clreleasememobject(memobject2);
clreleasememobject(memobject3);
return
0;}
kernel.cl
__kernel void createbuffer(__global const
float *a_in,
__global const
float *b_in,
__global float *result)
}
點雲壓縮的opencl實現
2.解碼 3.opencl演算法 4.演算法實現 5.參考文獻 點雲壓縮的八叉樹演算法 通過迴圈遞迴的方法對大小2n 2n 2 n2 n times 2 n times 2 n 2n 2n 2n的八叉樹空間8等分劃分,最多剖分n nn次。在完成逐層劃分之後,對資料編碼,編碼方式為 假設點雲座標p x...
C 實現卷積
來自 include include using namespace std int main 定義卷積核矩陣 其實也是乙個陣列,陣列元素的個數3 3 int const kernel 3 float b kernel kernel 計算卷積輸出矩陣的維數 其實是輸出陣列元素個數的開根號 int c...
tensorflow實現卷積與反卷積自編碼框架
從dcgan中了解到了反卷積的操作,所以我本來打算能通過卷積操作作為編碼器將一幀影象轉換為乙個20維的向量,而後再通過反卷積實現解碼功能從而達到影象恢復效果,先把程式貼上,後續有空再調整網路層數和引數吧 from tensorflow.examples.tutorials.mnist import ...