I have to work with a float2 matrix as a 1D array. I wanted to check some things and I have written this code:
#include <stdio.h>
#include <stdlib.h>
#define index(x,y) x+y*N
__global__ void test(float2* matrix_CUDA,int N)
{   
    int i,j;
    i=blockIdx.x*blockDim.x+threadIdx.x;
    j=blockIdx.y*blockDim.y+threadIdx.y;
    matrix_CUDA[index(i,j)].x=i;
    matrix_CUDA[index(i,j)].y=j;
}
int main()
{
    int N=256;
    int i,j;
    //////////////////////////////////////////
    float2* matrix;
    matrix=(float2*)malloc(N*N*sizeof(float2));
    //////////////////////////////////////////
    float2* matrix_CUDA;
    cudaMalloc((void**)&matrix_CUDA,N*N*sizeof(float2));
    //////////////////////////////////////////
    dim3 block_dim(32,2,0);
    dim3 grid_dim(2,2,0);
    test <<< grid_dim,block_dim >>> (matrix_CUDA,N);
    //////////////////////////////////////////
    cudaMemcpy(matrix,matrix_CUDA,N*N*sizeof(float2),cudaMemcpyDeviceToHost);
    for(i=0;i<N;i++)
    {
        for(j=0;j<N;j++)
        {
            printf("%d %d, %f %f\n",i,j,matrix[index(i,j)].x,matrix[index(i,j)].y);
        }
    }
    return 0;
}
I was waiting for a output like:
0 0, 0 0
0 1, 0 1
0 2, 0 2
0 3, 0 3
...
But the thing I find is:
0 0, -nan 7.265723657
0 1, -nan 152345
0 2, 25.2135235 -nan
0 3, 52354.324534 24.52354234523
...
That means I have some problems with the memory allocation (I suppose) but I can't find what is wrong with my code. Could someone help me?
 
     
    