I am working on HD image processing using CUDA 7.5 with NVIDIA GEFORCE 840M on Ubuntu 14.04. I have a 3750*3750 image, and I have troubles to initialize an array of this dimension. The following code runs until k is about 4000.
__device__ int sImg;
__device__ int *B;
/* ############################### INITILIAZE ############################## */
__global__ void initialize(int *tab, int v, int s)
{    
  int k = blockDim.x*blockIdx.x + threadIdx.x ;
  if ( k < s )
    tab[k] = v;
}
/* ########################### The parent kernel ########################### */
__global__ void EDGE(int *A, int *C ,int h, int w, int dim, int nbScales)
{
  sImg = dim*dim;
  cudaMalloc((void**)&B,sImg*sizeof(int));
  int threadsPerBlock = 256;
  int blocksPerGrid = (sImg + threadsPerBlock -1) / threadsPerBlock;
  /// I have troubles here, it does not complete the process
  initialize<<<blocksPerGrid,threadsPerBlock>>>(B,0,sImg);
  cudaDeviceSynchronize();
  initialize<<<blocksPerGrid,threadsPerBlock>>>(C,0,sImg);
  cudaDeviceSynchronize();  
  /// A transormation into frequency domain
  FSDWT <<< 1 , nbScales >>> (A,B, h, w,dim,nbScales);
  cudaDeviceSynchronize();
  /// Tresholding the transform                                     
  Treshold<<<1,1>>>(B,C,dim*dim);
  cudaDeviceSynchronize();
  cudaFree(B);
}
/* ############################  call from host ############################ */
extern "C" void EDGE_host(int *A,int *B,int h,int w,int dim, int nbScales)
{
  EDGE <<< 1 , 1 >>> (A,B, h, w,dim,nbScales);
}
Thank you very much
 
     
    