#include #include #include #include extern "C" void vecadd(int,float*,float*,float*); extern "C" void set_value (int, float*, float*); int main() { float *a, *b, *c; float *result; int i; int n; n = 1024; result = (float*)malloc(n*sizeof(float)); cudaMalloc((void**)&a,(size_t)n*sizeof(float)); cudaMalloc((void**)&b,(size_t)n*sizeof(float)); cudaMalloc((void**)&c,(size_t)n*sizeof(float)); set_value(n, a, b); vecadd(n, a, b, c); cudaMemcpy (result, c, sizeof(float)*n, cudaMemcpyDeviceToHost); for( i = 0; i < 10; i++) fprintf(stdout, "c[%d] = %f\n", i, result[i]); return 0; }