I have a piece of code that i want to parallelize and the openmp program is much slower than the serial version, so what is wrong with my implementation?. This is the code of the program
#include <iostream>
#include <gsl/gsl_math.h>
#include "Chain.h"
using namespace std;
int main(){
  int const N=1000;
  int timeSteps=100;
  double delta=0.0001;
  double qq[N];
  Chain ch(N);
  ch.initCond();
  for (int t=0; t<timeSteps; t++){
    ch.changeQ(delta*t);
    ch.calMag_i();
    ch.calForce001();
  }
  ch.printSomething();
}
The Chain.h is
class Chain{
  public:
    int N;
    double *q;
    double *mx;
    double *my;
    double *force;
    Chain(int const Np);
    void initCond();
    void changeQ(double delta);
    void calMag_i();
    void calForce001();
};
And the Chain.cpp is
Chain::Chain(int const Np){
  this->N     = Np;
  this->q     = new double[Np];
  this->mx    = new double[Np];
  this->my    = new double[Np];
  this->force = new double[Np];  
}
void Chain::initCond(){
  for (int i=0; i<N; i++){
    q[i]     = 0.0;
    force[i] = 0.0;
  }
}
void Chain::changeQ(double delta){
  int i=0;
  #pragma omp parallel
  {
    #pragma omp for
    for (int i=0; i<N; i++){
      q[i] = q[i] + delta*i + 1.0*i/N;
    }
  }
}
void Chain::calMag_i(){
  int i =0;
  #pragma omp parallel
  {
    #pragma omp for
    for (i=0; i<N; i++){
      mx[i] = cos(q[i]);
      my[i] = sin(q[i]);
    }
  }
}
void Chain::calForce001(){
  int i;
  int j;
  double fij =0.0;
  double start_time = omp_get_wtime();
  #pragma omp parallel
  {
    #pragma omp for private(j, fij)
    for (i=0; i<N; i++){
      force[i] = 0.0;
      for (j=0; j<i; j++){
        fij = my[i]*mx[j] - mx[i]*my[j];
        #pragma omp critical
        {
          force[i] +=  fij;
          force[j] += -fij;
        }
      }
    }
  }
  double time = omp_get_wtime() - start_time;
  cout <<"time = " << time <<endl;
}
So the methods changeQ() and calMag_i() are in fact faster than the serial code, but my problem is the calForce001(). The execution time are:
- with openMP 3.939s
- without openMP 0.217s
Now, clearly i'm doing something wrong or the code can't be parallelize. Please any help with be usefull. Thanks in advance. Carlos
Edit: In order to clarify the question i add the functions omp_get_wtime() to calculate the execution time for the function calForce001() and the times for one execution are
- with omp :0.0376656
- without omp: 0.00196766
So with omp method is 20 times slower.
Otherwise, i'm also calculate the time for the calMag_i() method
- with omp: 3.3845e-05
- without omp: 9.9516e-05
for this method omp is 3 times faster.
I hope this confirm that the latency problem is in the calForce001() method.
 
    