I am new to OpenMP. I try to create 1000x1000 matrix multiplication and try to speed up with open mp. But when I execute my code, it passes without any error occur. But when I run the execution code it does not return any of the results. I check with change the dimension to 10x10, then it immediately works! I have no idea what is wrong with my code.
#define n 1000 // Dimension of metrix
'''
int i, j, k, th_id, num_thread;
int max = 100;
double a[n][n], b[n][n], mul[n][n];
double norm;
srand(time(0));
double start = omp_get_wtime();
omp_set_num_threads(6);
#pragma omp parallel shared(a, b, mul, norm, num_thread) private(th_id, i, j, k)
{
    th_id = omp_get_thread_num();
    #pragma omp for
    for (i=0; i<n; i++){
        for (j=0; j<n; j++){
            a[i][j] = double(rand()%max)/100;
        }
    }
    #pragma omp for
    for (i=0; i<n; i++){
        for (j=0; j<n; j++){
            b[i][j] = double(rand()%max)/100;
        }
    }
    #pragma omp for
    for (i=0; i<n; i++){
        for (j=0; j<n; j++){
            mul[i][j] = 0;
        }
    }
    
    printf("Thread %d starting matrix multiply...\n", th_id);
    #pragma omp for
    for (i=0; i<n; i++) {
        printf("Thread = %d did row = %d\n", th_id, i);
        for (j=0; j<n; j++) {
            for (k=0; k<n; k++){
                mul[i][j] += a[i][k]*b[k][j]; 
            } 
        }
    }
    #pragma omp for reduction(+:norm)
    // Calculate Frobenius norm
    for (i=0; i<n; i++) {
        for (j=0; j<n; j++){
            norm += mul[i][j]*mul[i][j];
        }
    }
}
double end = omp_get_wtime();
printf("Frobenuis norm of parallel node is %lf\n", sqrt(norm));
printf("Elapsed time = %lf s\n", end-start);
printf("Precision = %lf s\n", omp_get_wtick());
printf("End Program");
return 0;
'''
 
    