cuda 複雜問題 + 細節問題 解答 見 cuda複雜問題 + 細節問題 解答
首先先把程式貼上:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include #include #include #define width 10
#define height 11
using std::cout;
using std::endl;
texturetex;
__global__ void kernel(float *arr_cpy)
float *arr;
int main(void)
} size_t pitch, tex_ofs;
float *arr_d = 0;
cudamallocpitch((void**)&arr_d, &pitch, width * sizeof(float), height);
cudamemcpy2d(arr_d, pitch, arr, width * sizeof(arr[0]),
width * sizeof(arr[0]), height, cudamemcpyhosttodevice);
tex.normalized = false;
cudabindtexture2d(&tex_ofs, &tex, arr_d, &tex.channeldesc,width, height, pitch);
float *arr_cpy;
float *hos_c = 0;
hos_c = (float*)malloc(width*height * sizeof(float));
cudamalloc((void**)&arr_cpy, width*height * sizeof(float));
dim3 blocks(2,2);
dim3 threads(5, 5);
kernel << > >(arr_cpy);
cudamemcpy(hos_c, arr_cpy, width*height*sizeof(float), cudamemcpydevicetohost);
for (int i = 0;i < width*height;i++)
cudadevicesynchronize();
system("pause");
return exit_success;
}
注意幾個問題:
第一,一維紋理不管是linear memory還是使用cudamallocpitch,都是可以使用tex1dfetch和tex1d這兩個函式進行取樣的。而對於二維紋理,不管是cudaarray還是cudamallocpitch都是使用tex2d。
第二,#define width 10 #define height 11,height必須大於width,否則會報錯。
第三,取樣的時候長寬是顛倒的:
float i = threadidx.x + blockidx.x*blockdim.x;
float j = threadidx.y + blockidx.y*blockdim.y;
arr_cpy[(int)(i*width+j)] = tex2d(tex,j + 0.5f , i + 0.5f);
前面是 j ,後面是 i 。 動態建立和釋放二維陣列
c動態建立和釋放二維陣列 include include define row 5 define col 4 main int i int arr arr int malloc row sizeof int for i 0 i使用calloc申請記憶體時,記憶體會清0,而malloc並不進行這項工作...
動態建立和釋放二維陣列
define crt secure no warnings include include include 動態建立二維陣列,指標做輸出 int get2darr char arr out int row,int col for int i 0 i row i arr p 掛上 return 0 完...
一維動態陣列和二維動態陣列的建立和使用
include include void main for i 0 i printf d a i 陣列元素輸出 printf free a 動態釋放指標a所指向的n歌記憶體空間 二維動態陣列的建立和使用 include include 建立二維動態陣列的函式 int make2darray int ...