The original problem was launching more threads that it is possible like this:
someKernel<<<1 , 1025>>> ( ... );
and not detecting the error, as I did not know how to detect kernel call errors. This is explained well in talonmies answer in this question:
What is the canonical way to check for errors using the CUDA runtime API?
Instead of modifying the code I presented I wrote my own for conciseness:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t cudaError, char *file, int line, bool abort=true)
{
   if (cudaError != cudaSuccess) 
   {
      fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(cudaError), file, line);
   }
}
__global__ void addKernel(const int *dev_a, const int *dev_b, int *dev_c)
{
    int i = threadIdx.x;
    if ( i < 5 )
        dev_c[i] = dev_a[i] + dev_b[i];
}
int main()
{
    const int arraySize = 5;
    const int a[arraySize] = { 1, 2, 3, 4, 5 };
    const int b[arraySize] = { 10, 20, 30, 40, 50 };
    int c[arraySize] = { 0 };
    int *dev_a(nullptr), *dev_b(nullptr), *dev_c(nullptr);
    gpuErrchk( cudaMalloc((void**)&dev_a, arraySize * sizeof(int)) );
    gpuErrchk( cudaMalloc((void**)&dev_b, arraySize * sizeof(int)) );
    gpuErrchk( cudaMalloc((void**)&dev_c, arraySize * sizeof(int)) );
    gpuErrchk( cudaMemcpy(dev_a, a, arraySize * sizeof(int), cudaMemcpyHostToDevice) );
    gpuErrchk( cudaMemcpy(dev_b, b, arraySize * sizeof(int), cudaMemcpyHostToDevice) );
    const int testMax1D = 1025; 
    dim3 testMax2D ( 32, 33 );
    addKernel<<<1, testMax2D>>> ( dev_a , dev_b, dev_c );
    gpuErrchk( cudaPeekAtLastError() );
    gpuErrchk( cudaDeviceSynchronize() );
    gpuErrchk(  cudaMemcpy( c, dev_c, arraySize * sizeof(int), cudaMemcpyDeviceToHost) );
    printf("{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n",
        c[0], c[1], c[2], c[3], c[4]);
    cudaFree(dev_a);
    cudaFree(dev_b);
    cudaFree(dev_c);
    return 0;
}
I now get correct error reports. Thank you for your patience.
I don't understand this call in the gpuAssert function, so I ommited it:
if (abort) exit(code);
Is exit a custom written function or something I missed?
 
     
     
    