OpenCL實現序列卷積

2021-07-23 09:34:54 字數 3402 閱讀 6616

採用上文中的第一種方法實現:

host.c

#include

#include

#pragma warning( disable : 4996 )

#define matrix_dim 1*1024

int main()

//建立上下文

context = clcreatecontext(null, 1, &devices, null, null, &error);

if (error != 0)

//建立程式

program_handle = fopen("kernel.cl", "rb");

if (program_handle == null)

fseek(program_handle, 0, seek_end);

program_size = ftell(program_handle);

rewind(program_handle);

program_buffer = (char *)malloc(program_size + 1);

program_buffer[program_size] = '\0';

error = fread(program_buffer, sizeof(char), program_size, program_handle);

if (error == 0)

fclose(program_handle);

program = clcreateprogramwithsource(context, 1, (const

char **)&program_buffer, &program_size, &error);

if (error < 0)

free(program_buffer);

//編譯程式

error = clbuildprogram(program, 1, &devices, null, null, null);

if (error < 0)

//建立命令佇列

queue = clcreatecommandqueue(context, devices, cl_queue_profiling_enable, &error);

if (error < 0)

//建立核心

kernel = clcreatekernel(program, kernel_name, &error);

if (kernel == null)

//初始化引數

float result[matrix_dim + 31];

float a_in[32];

float b_in[matrix_dim];

float c_in[matrix_dim + 31];

for (int i = 0; i < matrix_dim; i++)

for (int i = 0; i < matrix_dim + 31; i++)

for (int i = 0; i < 32; i++)

for (int j = 0; j < matrix_dim+31 ; j++) }}

printf("\n");

//建立快取物件

cl_mem memobject1 = clcreatebuffer(context, cl_mem_read_only | cl_mem_copy_host_ptr, sizeof(float) * matrix_dim, a_in, &error);

if (error < 0)

cl_mem memobject2 = clcreatebuffer(context, cl_mem_read_only | cl_mem_copy_host_ptr, sizeof(float) * matrix_dim, b_in, &error);

if (error < 0)

cl_mem memobject3 = clcreatebuffer(context, cl_mem_write_only, sizeof(float) * matrix_dim, null, &error);

if (error < 0)

//設定核心引數

error = clsetkernelarg(kernel, 0, sizeof(cl_mem), &memobject1);

error |= clsetkernelarg(kernel, 1, sizeof(cl_mem), &memobject2);

error |= clsetkernelarg(kernel, 2, sizeof(cl_mem), &memobject3);

if (error != cl_success)

//執行核心

size_t globalworksize[1] = ;

size_t localworksize[1] = ;

error = clenqueuendrangekernel(queue, kernel, 1, null, globalworksize, localworksize, 0, null, null);

if (error != cl_success)

//讀取執行結果

error = clenqueuereadbuffer(queue, memobject3, cl_true, 0, matrix_dim * sizeof(float), result, 0, null, null);

if (error != cl_success)

//顯示結果

int check = 1;

for (int i = 0; i < matrix_dim; i++)

}printf("\n");

if (check)

printf("successed!\n");

else

printf("failed!\n");

clreleaseprogram(program);

clreleasecontext(context);

clreleasekernel(kernel);

clreleasecommandqueue(queue);

clreleasememobject(memobject1);

clreleasememobject(memobject2);

clreleasememobject(memobject3);

return

0;}

kernel.cl

__kernel void createbuffer(__global const

float *a_in,

__global const

float *b_in,

__global float *result)

}

點雲壓縮的opencl實現

2.解碼 3.opencl演算法 4.演算法實現 5.參考文獻 點雲壓縮的八叉樹演算法 通過迴圈遞迴的方法對大小2n 2n 2 n2 n times 2 n times 2 n 2n 2n 2n的八叉樹空間8等分劃分,最多剖分n nn次。在完成逐層劃分之後,對資料編碼,編碼方式為 假設點雲座標p x...

C 實現卷積

來自 include include using namespace std int main 定義卷積核矩陣 其實也是乙個陣列,陣列元素的個數3 3 int const kernel 3 float b kernel kernel 計算卷積輸出矩陣的維數 其實是輸出陣列元素個數的開根號 int c...

tensorflow實現卷積與反卷積自編碼框架

從dcgan中了解到了反卷積的操作,所以我本來打算能通過卷積操作作為編碼器將一幀影象轉換為乙個20維的向量,而後再通過反卷積實現解碼功能從而達到影象恢復效果,先把程式貼上,後續有空再調整網路層數和引數吧 from tensorflow.examples.tutorials.mnist import ...