I am surprised at the results of the following code using gcc 4.7.2 on Opensuse Linux:
#include <cmath>
#include <chrono>
#include <cstdlib>
#include <vector>
#include <chrono>
#include <iostream>
#include <future>
int main(void)
{
  const long N = 10*1000*1000;
  std::vector<double> array(N);
  for (auto& i : array)
    i = rand()/333.;
  std::chrono::time_point<std::chrono::system_clock> start, end;
  start = std::chrono::system_clock::now();
  for (auto& i : array)
    pow(i,i);
  end = std::chrono::system_clock::now();
  std::chrono::duration<double> elapsed_seconds = end-start;
  std::cout << "elapsed time: " << elapsed_seconds.count() << "s\n";
  start = std::chrono::system_clock::now();
  for (auto& i : array)
    std::packaged_task<double(double,double)> myTask(pow);
  elapsed_seconds = std::chrono::system_clock::now()-start;
  std::cout << "elapsed time: " << elapsed_seconds.count() << "s\n";
  start = std::chrono::system_clock::now();
  for (auto& i : array)
    std::packaged_task<double()> myTask(std::bind(pow,i,i));
  elapsed_seconds = std::chrono::system_clock::now()-start;
  std::cout << "elapsed time: " << elapsed_seconds.count() << "s\n";
  return 0;
}
The results look like this (and are fairly consistent amongst runs):
elapsed time: 0.694315s
elapsed time: 6.49907s
elapsed time: 8.42619s
If I interpret the results correctly, just creating a std::packaged_task (not even executing it or storing its arguments yet) is already ten times more expensive than executing pow. Is that a valid conclusion?
Why is this so?
Is this by accident gcc specific?
 
    