I have an array of arrays (with different length) in C i would like to process within the "CUDA kernel".
const int N_ARRAYS = 1000;
int *arrayOfArrays[N_ARRAYS];
int arr1[3] = {1,2,3};
int arr2[2] = {1,4};
int arr3[4] = {1,5,3,6};
//....
int arr1000[5] = {9,9,9,10,10};
arrayOfArrays[0] = arr1;
arrayOfArrays[1] = arr2;
arrayOfArrays[2] = arr3;
//...
arrayOfArrays[1000] = arr1000;
I found this post: CUDA allocating array of arrays which gave a good idea on how it could work. But honestly i did not get it to work.
I will summarize the steps again:
- You have to allocate the pointers to a host memory,
 - then allocate device memory for each array
 - and store it's pointer in the host memory.
 - Then allocate the memory for storing the pointers into the device
 - and then copy the host memory to the device memory.
 
Here is what i tried so far, based on the given answer. For illustration purposes i will show it with N_ARRAYS = 3, but in reality its way higher (>1000).
int main(){
    const int N_ARRAYS = 3;
    int *arrayOfArrays[N_ARRAYS];
    int arr1[1] = {1,2,3};
    int arr2[2] = {1,4};
    int arr3[3] = {1,5,3};
    arrayOfArrays[0] = arr1;
    arrayOfArrays[1] = arr2;
    arrayOfArrays[2] = arr3;
    // 1) You have to allocate the pointers to a host memory, 
    //void *h_array = malloc(sizeof(void*) * N_ARRAYS); // i use arrayOfArrays instead
    for(int i = 0; i < N_ARRAYS; i++){
        //2) then allocate device memory for each array
        cudaMalloc(&arrayOfArrays[i], i * sizeof(void*));
    }
    // 4) Allocate the memmory for storing the pointers into the device to *d_array
    void *d_array = cudaMalloc(sizeof(void*) * N_ARRAYS);
    // 5) Copy arrayOfArrays to d_array of size sizeof(void*) * N_ARRAYS from Host to device
    cudaMemcpy(d_array, arrayOfArrays, sizeof(void*) * N_ARRAYS, cudaMemcpyHostToDevice);
    // Call kernel
    multi_array_kernel<1,1>(N_ARRAYS, d_array);
    cudaThreadSynchronize();
    for(int i = 0; i < N_ARRAYS; i++){
        cudaFree(arrayOfArrays[i]); //host not device memory
        //TODO: check error
    }
    cudaFree(d_array);
    free(arrayOfArrays);
}
and the kernel:
__global__ void multi_array_kernel( int N, void** arrays ){
    int nr;
    int sum = 0;
    for(nr = 0; nr < N; nr++){
        if(arrays[nr+0] == arrays[nr-1+0]) sum +=1; // some sample calc.
    }
}