I am trying to klearn how to program for nVidia cards. Here is my code:
__global__ void add_one(int* i)
{
    i[0]++;
}
template<class TYPE>
void gpu_load(TYPE data)
{
    int size = 1;
    cudaMalloc( (void**) &data, size * sizeof(TYPE));
}
template<class TYPE>
void copy_to_gpu(TYPE* cpu_var, TYPE* gpu_var)
{
    int size = 1;
    cudaMemcpy(  gpu_var, cpu_var, size * sizeof(TYPE), cudaMemcpyHostToDevice); 
}
template<class TYPE>
void copy_to_cpu(TYPE* cpu_var, TYPE* gpu_var)
{
    int size = 1;
    cudaMemcpy( gpu_var, cpu_var, size * sizeof(TYPE), cudaMemcpyDeviceToHost);
}
int main() 
{
    int gpu_i[1];
    int cpu_i[1];
    cpu_i[0] = 5;
    gpu_load(cpu_i);
    copy_to_gpu(cpu_i, gpu_i);
    add_one<<<1, 1>>>(gpu_i);
    int res[1];
    copy_to_cpu(res, gpu_i);
    std::cout << res[0];
}
Why the cout doesn't display 5+1 and displays 0 instead?
I tried my best to make it work... It seems like nothing happens...?
 
     
     
    