您的当前位置:首页正文

CUDA实例练习(一)

2023-03-24 来源:好走旅游网
CUDA实例练习(⼀)

题⽬:将1000000个线程写⼊到10个数组。

#include

#include

#include \"device_launch_parameters.h\"#include \"gputimer.h\"

#define num_threads 1000000#define block_width 1000#define array_size 10

void print_array(int * array, int size);

__global__ void increment_atomic(int * g){

int i = blockIdx.x * blockDim.x + threadIdx.x; i = i % array_size; atomicAdd(&g[i], 1);}

void print_array(int * array, int size){

for (int i = 0; i < size; i++) { printf(\"%d \", array[i]); }}

int main(){

GpuTimer timer;

printf(\"%d total threads in %d blocks writing into %d arrays\\n\", num_threads, num_threads / block_width, array_size); int h_array[array_size];

const int array_bytes = array_size * sizeof(int); int * d_array;

cudaMalloc((void **)&d_array, array_bytes); cudaMemset((void *)d_array, 0, array_bytes);

timer.Start();

increment_atomic << > >(d_array); timer.Stop();

cudaMemcpy(h_array, d_array, array_bytes, cudaMemcpyDeviceToHost); print_array(h_array, array_size);

printf(\"\\nTime elapsed = %g ms\\n\", timer.Elapsed()); cudaFree(d_array); return 0;}

编译环境:visual studio 2013

因篇幅问题不能全部显示,请点此查看更多更全内容