i tried to run a simple cuda programm that adds to vector but it only result in a vector with zeros.
[EDIT] CUDA 11.0 compiles for a gpu with compute capability 5.2 by default. You can change this with the -arch= flag when compiling with nvcc (nvcc -arch=sm_50 file.cu in my case): cuda 11 kernel doesn't run
It seems like the kernel doesnt do anything, cause I tried it with save an integer directly in c[0]. The program runs on cuda 11.0 with a m1200 on ubuntu 20.04.
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <iostream>
__global__ void vectorAdd(int* a, int* b, int* c){
    int i = threadIdx.x;
    c[0] = 100;
    c[i] = a[i] + b[i];
    return;
}
int main() {
    int a[]= {1,2,3,4,5,6,7,8,9};
    int b[]= {1,2,3,4,5,6,7,8,9};   
    int sa = sizeof(a) / sizeof(int);
    int c[sa] = {0};
    
    int* cudaA = 0;
    int* cudaB = 0;
    int* cudaC = 0;
    
    cudaMalloc(&cudaA, sizeof(a));
    cudaMalloc(&cudaB, sizeof(b));
    cudaMalloc(&cudaC, sizeof(c));
    
    cudaMemcpy(cudaA, a, sizeof(a), cudaMemcpyHostToDevice);
    cudaMemcpy(cudaB, b, sizeof(b), cudaMemcpyHostToDevice);
    
    std::cout << sa << std::endl;
    vectorAdd <<< 1, sa >>> (cudaA, cudaB, cudaC);
    cudaMemcpy(c, cudaC, sizeof(c), cudaMemcpyDeviceToHost);
        
    for (int x = 0; x < 9; x++){
        std::cout << c[x]<< std::endl;
    }
    
    return 0;
}
the code is from a video on youtube
