when I use this code in cuda it only increase a[0],a[1],a[2] other was 0 (didn't increased)
   __global__ void inc2(int * a){
        int i= threadIdx.x;
        i%=10;
        atomicAdd(&(a[i]),1);
    }
when I write
__global__ void inc2(int * a){
    int i= threadIdx.x;
    i%=10;
    atomicAdd(&(a[6]),1);
}
it didn't increase a[6]
what's wrong? sorry
all of the code is this
__global__ void inc2(int * a){
    int i= threadIdx.x;
    i%=10;
    atomicAdd(&(a[6]),1);
}
int main()
{
    //=============================================
    int aaa[10]={0};
    int *q;
    cudaMalloc((void**)&q,100);
    cudaMemcpy(q,aaa,10,cudaMemcpyHostToDevice);
    inc2<<<100,100>>>(q);
    cudaMemcpy(aaa,q,10,cudaMemcpyDeviceToHost);
    printf("\n\n");
    for(int i=0;i<10;i++){
        printf("%d\t",aaa[i]);
    }
    cudaFree(q);
    return 0;
 }
 
     
    