So I see a parent question about how to copy from host to the constant memory on GPU using cudaMemcpyToSymbol.
My question is how to do the reverse, copying from device constant memory to the host using cudaMemcpyFromSymbol.
In the following minimal reproducible example, I either got
- 1)
invalid device symbolerror usingcudaMemcpyFromSymbol(const_d_a, b, size);, or - 2) got
segmentation faultif I usecudaMemcpyFromSymbol(&b, const_d_a, size, cudaMemcpyDeviceToHost).
I have consulted with the manual which suggests I code as in 1), and this SO question that suggests I code as in 2). Neither of them work here.
Could anyone kindly help suggesting a workaround with this? I must be understanding something improperly... Thanks!
Here is the code:
// a basic CUDA function to test working with device constant memory
#include <stdio.h>
#include <cuda.h>
const unsigned int N = 10; // size of vectors
__constant__ float const_d_a[N * sizeof(float)];
int main()
{
float * a, * b; // a and b are vectors. c is the result
a = (float *)calloc(N, sizeof(float));
b = (float *)calloc(N, sizeof(float));
/**************************** Exp 1: sequential ***************************/
int i;
int size = N * sizeof(float);
for (i = 0; i < N; i++){
a[i] = (float)i / 0.23 + 1;
}
// 1. copy a to constant memory
cudaError_t err = cudaMemcpyToSymbol(const_d_a, a, size);
if (err != cudaSuccess){
printf("%s in %s at line %d\n", cudaGetErrorString(err), __FILE__, __LINE__);
exit(EXIT_FAILURE);
}
cudaError_t err2 = cudaMemcpyFromSymbol(const_d_a, b, size);
if (err2 != cudaSuccess){
printf("%s in %s at line %d\n", cudaGetErrorString(err2), __FILE__, __LINE__);
exit(EXIT_FAILURE);
}
double checksum0, checksum1;
for (i = 0; i < N; i++){
checksum0 += a[i];
checksum1 += b[i];
}
printf("Checksum for elements in host memory is %f\n.", checksum0);
printf("Checksum for elements in constant memory is %f\n.", checksum1);
return 0;
}