CUDA記憶體拷貝

2021-08-13 17:56:30 字數 3467 閱讀 3539

1、cudamemcpy()<--> cudamalloc()  //線性記憶體拷貝

1

//線性記憶體拷貝

2 cudamalloc((void**)&dev_a, data_size);

3 cudamemcpy(dev_a, host_a, data_size, cudamemcpyhosttodevice);

2、cudamemcpy2d()<-->

cudamallocpitch() //線性記憶體拷貝

cudaerror_t cudamemcpy2d(    

void *dst,

size_t dpitch,

const

void *src,

size_t spitch,

size_t width,

size_t height,

enum

cudamemcpykind kind

)

例:

1 cudamallocpitch((void**)&devptr, &pitch, width * sizeof(float

), height);

2 cudamemcpy2d( void* dst,size_t dpitch,const

void* src,size_t spitch,size_t width,size_t height,enum cudamemcpykind kind )

3、cudamemcpy2dtoarray()<-->cudamallocarray() //(二維)線性記憶體到2維陣列的拷貝

1

cudaerror_t cudamemcpy2dtoarray (

2struct cudaarray *dst,

3size_t woffset,

4size_t hoffset,

5const

void *src,

6size_t spitch,

7size_t width,

8size_t height,

9enum

cudamemcpykind kind

10 )

例:

1

void mv(float *y, float *a, float *x, int m, intn)2

4、cudamemcpytoarray()<-->cudamallocarray()  //(1維)線性記憶體到2維陣列的拷貝

1

cudaerror_t cudamemcpytoarray(

2struct cudaarray *dst,

3size_t woffset,

4size_t hoffset,

5const

void *src,

6size_t count,

7enum

cudamemcpykind kind

8 )

例:

1

void initcudatexture(float *h_volume, float2 *velocity)

2

5、cudamemcpy3d()<-->cudamalloc3darray() //(1維)線性記憶體到3維陣列的拷貝

1 cudaerror_t cudamemcpy3d(const

struct cudamemcpy3dparms *p) 23

struct

cudaextent ;

8struct

cudaextent make_cudaextent(size_t w, size_t h, size_t d);910

struct

cudapos ;

15struct

cudapos make_cudapos(size_t x, size_t y, size_t z);

1617

struct

cudamemcpy3dparms ;

例: 

1

void initcudatexture(const uchar *h_volume, cudaextent volumesize)2;

8 copyparams.srcptr = make_cudapitchedptr((void*)h_volume, volumesize.width*sizeof

(uchar), volumesize.width, volumesize.height);

9 copyparams.dstarray =d_volumearray;

10 copyparams.extent =volumesize;

11 copyparams.kind =cudamemcpyhosttodevice;

12 cutilsafecall(cudamemcpy3d(©params));

1314 tex.normalized = true

;15 tex.filtermode =cudafiltermodelinear;

16 tex.addressmode[0] =cudaaddressmodewrap;

17 tex.addressmode[1] =cudaaddressmodewrap;

18 tex.addressmode[2] =cudaaddressmodewrap;

1920

cutilsafecall(cudabindtexturetoarray(tex, d_volumearray, channeldesc));

21 }

6、cudamemcpytosymbol()  //拷貝到常數儲存器

1 __constant__ float constdata[256];2

float data[256

];3 cudamemcpytosymbol(constdata, data, sizeof

(data));

4 cudamemcpyfromsymbol(data, constdata, sizeof

(data));

5 __device__ float devdata; float value = 3.14f

;6 cudamemcpytosymbol(devdata, &value, sizeof(float

));7 __device__ float* devpointer; float*ptr;

8 cudamalloc(&ptr, 256 * sizeof(float

));9 cudamemcpytosymbol(devpointer, &ptr, sizeof(ptr));

CUDA記憶體拷貝

1 cudamemcpy cudamalloc 線性記憶體拷貝 1 線性記憶體拷貝 2 cudamalloc void dev a,data size 3 cudamemcpy dev a,host a,data size,cudamemcpyhosttodevice 2 cudamemcpy2d ...

CUDA學習之零拷貝記憶體

當使用零拷貝記憶體來共享主機和裝置間的資料時,必須同步主機和裝置間的記憶體訪問,同時更改主機和裝置的零拷貝記憶體中的資料將導致不可預知的後果。有兩種常見的異構計算系統架構 整合架構和離散架構。在整合架構中,cpu和gpu整合在乙個晶元上,並且在實體地址上共享主存。在這種架構中,由於無須在pcie匯流...

CUDA記憶體使用

cuda執行緒可以在執行過程中從多中記憶體空間訪問資料,分為三個層次 1,區域性記憶體 每乙個執行緒有其私有的區域性記憶體。2,共享記憶體 每乙個執行緒塊 thread block 有乙個共享記憶體,可以被該執行緒塊中的所有執行緒訪問。3,全域性記憶體 所有的執行緒都能訪問。此外還有兩個能被所有執行...