1、cudamemcpy()<--> cudamalloc() //線性記憶體拷貝
1//線性記憶體拷貝
2 cudamalloc((void**)&dev_a, data_size);
3 cudamemcpy(dev_a, host_a, data_size, cudamemcpyhosttodevice);
2、cudamemcpy2d()<-->
cudamallocpitch() //線性記憶體拷貝
cudaerror_t cudamemcpy2d(void *dst,
size_t dpitch,
const
void *src,
size_t spitch,
size_t width,
size_t height,
enum
cudamemcpykind kind
)
例:
1 cudamallocpitch((void**)&devptr, &pitch, width * sizeof(float), height);
2 cudamemcpy2d( void* dst,size_t dpitch,const
void* src,size_t spitch,size_t width,size_t height,enum cudamemcpykind kind )
3、cudamemcpy2dtoarray()<-->cudamallocarray() //(二維)線性記憶體到2維陣列的拷貝
1例:cudaerror_t cudamemcpy2dtoarray (
2struct cudaarray *dst,
3size_t woffset,
4size_t hoffset,
5const
void *src,
6size_t spitch,
7size_t width,
8size_t height,
9enum
cudamemcpykind kind
10 )
1void mv(float *y, float *a, float *x, int m, intn)2
4、cudamemcpytoarray()<-->cudamallocarray() //(1維)線性記憶體到2維陣列的拷貝
1例:cudaerror_t cudamemcpytoarray(
2struct cudaarray *dst,
3size_t woffset,
4size_t hoffset,
5const
void *src,
6size_t count,
7enum
cudamemcpykind kind
8 )
1void initcudatexture(float *h_volume, float2 *velocity)
2
5、cudamemcpy3d()<-->cudamalloc3darray() //(1維)線性記憶體到3維陣列的拷貝
1 cudaerror_t cudamemcpy3d(conststruct cudamemcpy3dparms *p) 23
struct
cudaextent ;
8struct
cudaextent make_cudaextent(size_t w, size_t h, size_t d);910
struct
cudapos ;
15struct
cudapos make_cudapos(size_t x, size_t y, size_t z);
1617
struct
cudamemcpy3dparms ;
例:
1void initcudatexture(const uchar *h_volume, cudaextent volumesize)2;
8 copyparams.srcptr = make_cudapitchedptr((void*)h_volume, volumesize.width*sizeof
(uchar), volumesize.width, volumesize.height);
9 copyparams.dstarray =d_volumearray;
10 copyparams.extent =volumesize;
11 copyparams.kind =cudamemcpyhosttodevice;
12 cutilsafecall(cudamemcpy3d(©params));
1314 tex.normalized = true
;15 tex.filtermode =cudafiltermodelinear;
16 tex.addressmode[0] =cudaaddressmodewrap;
17 tex.addressmode[1] =cudaaddressmodewrap;
18 tex.addressmode[2] =cudaaddressmodewrap;
1920
cutilsafecall(cudabindtexturetoarray(tex, d_volumearray, channeldesc));
21 }
6、cudamemcpytosymbol() //拷貝到常數儲存器
1 __constant__ float constdata[256];2float data[256
];3 cudamemcpytosymbol(constdata, data, sizeof
(data));
4 cudamemcpyfromsymbol(data, constdata, sizeof
(data));
5 __device__ float devdata; float value = 3.14f
;6 cudamemcpytosymbol(devdata, &value, sizeof(float
));7 __device__ float* devpointer; float*ptr;
8 cudamalloc(&ptr, 256 * sizeof(float
));9 cudamemcpytosymbol(devpointer, &ptr, sizeof(ptr));
CUDA記憶體拷貝
1 cudamemcpy cudamalloc 線性記憶體拷貝 1 線性記憶體拷貝 2 cudamalloc void dev a,data size 3 cudamemcpy dev a,host a,data size,cudamemcpyhosttodevice 2 cudamemcpy2d ...
CUDA學習之零拷貝記憶體
當使用零拷貝記憶體來共享主機和裝置間的資料時,必須同步主機和裝置間的記憶體訪問,同時更改主機和裝置的零拷貝記憶體中的資料將導致不可預知的後果。有兩種常見的異構計算系統架構 整合架構和離散架構。在整合架構中,cpu和gpu整合在乙個晶元上,並且在實體地址上共享主存。在這種架構中,由於無須在pcie匯流...
CUDA記憶體使用
cuda執行緒可以在執行過程中從多中記憶體空間訪問資料,分為三個層次 1,區域性記憶體 每乙個執行緒有其私有的區域性記憶體。2,共享記憶體 每乙個執行緒塊 thread block 有乙個共享記憶體,可以被該執行緒塊中的所有執行緒訪問。3,全域性記憶體 所有的執行緒都能訪問。此外還有兩個能被所有執行...