CUDA第乙個程式(只有乙個執行緒)

2021-05-22 13:35:56 字數 2214 閱讀 9539

#include

#include

#include

#define total_num 50000

bool cuda_initial(void)

else

printf("there is %d device beyond 1.0/n",device_count);

for(i=0;iif(cudasetdevice(i)==cudaerrorinvaliddevice)

return true;

}void generate_num(int *num,int data_num)

{int i;

for(i=0;i

__global__ void square_sum(int *num,int num_of_num,int * result,clock_t *time)

{int i;

int sum=0;

clock_t start,end;

start=clock();

for(i=0;i

int main()

{if(cuda_initial()==true)

printf("cuda initial successed!/n");

int num_str[total_num];

generate_num(num_str,total_num);

int *gpudata;

int *result;

clock_t *time;

cudamalloc((void **)&gpudata,sizeof(int)*total_num);

cudamalloc((void **)&result,sizeof(int));

cudamalloc((void **)&time,sizeof(clock_t));

cudamemcpy((void *)gpudata,num_str,sizeof(int)*total_num,cudamemcpyhosttodevice);

square_sum<<<1,1>>>(gpudata,total_num,result,time);

int result_in_gpu;

cudamemcpy((void *)&result_in_gpu,result,sizeof(int),cudamemcpydevicetohost);

clock_t time_used;

cudamemcpy((void *)&time_used,time,sizeof(clock_t),cudamemcpydevicetohost);

printf("in gpu result is %d/n",result_in_gpu);

printf("in gpu time used is %d/n",time_used);

int result_in_cpu=0;

int i;

for(i=0;i測試結果:

there is 1 device beyond 1.0

device properties is :

device name is geforce 9800 gt

totalglobalmem is 536543232

sharedmemperblock is 16384

regsperblock is 8192

warpsize is 32

mempitch is 262144

maxthreadsperblock is 512

maxthreadsdim [3] is 512 x 512 x 64

maxgridsize [3] is 65535 x 65535 x 1

totalconstmem is 65536

device version is major 1 ,minor 1

clockrate is 1350000

texturealignment is 256

deviceoverlap is 1

multiprocessorcount is 14

cuda initial successed!

in gpu result is 1419240

in gpu time used is 29763916

in cpu result is 1419240

請按任意鍵繼續. . .

記憶體頻寬:50000/1048576*4/0.022=8.67mb/s

CUDA程式設計(一)第乙個CUDA程式

cuda compute unified device architecture 是顯示卡廠商nvidia推出的運算平台。是一種通用平行計算架構,該架構使gpu能夠解決複雜的計算問題。說白了就是我們可以使用gpu來並行完成像神經網路 影象處理演算法這些在cpu上跑起來比較吃力的程式。通過gpu和高並...

CUDA程式設計(一)第乙個CUDA程式

cuda compute unified device architecture 是顯示卡廠商nvidia推出的運算平台。是一種通用平行計算架構,該架構使gpu能夠解決複雜的計算問題。說白了就是我們可以使用gpu來並行完成像神經網路 影象處理演算法這些在cpu上跑起來比較吃力的程式。通過gpu和高並...

CUDA 第乙個CUDA程式 addVector

本文主要通過對兩個浮點陣列中的資料進行相加,並將其結果放入第三個陣列中。其演算法分別在cpu gpu上分別執行,並比較了所需時間,強烈感受到gpu的平行計算能力。這裡,每個陣列的元素大小為30000000個。include include include include for the cuda r...