I have a simple program to measure floating point multiplication (and random generation, compiles g++ -O0). When running on host (Ubuntu 16.04) it gives ~ 1.6s per 10000000 multiplications, when running in container from image 'ubuntu' (without recompilation) it gives ~3.6s. Can someone explain why it slower in ~2.5 times?
p.s. I got multiple runs of program to get rid of outliers. I do not need to optimize it, just detailed explanation of what is happen there.
test.cpp
#include <cstdio>
#include <math.h>
#include <chrono>
using namespace std;
using namespace std::chrono;
// timer cribbed from
// https://gist.github.com/gongzhitaao/7062087
class Timer
{
    public:
    Timer() : beg_(clock_::now()) {}
    void reset() { beg_ = clock_::now(); }
    double elapsed() const
    {
        return duration_cast<second_>(clock_::now() - beg_).count();
    }
    private:
    typedef high_resolution_clock clock_;
    typedef duration<double, ratio<1>> second_;
    time_point<clock_> beg_;
};
#define randf() ((double)rand()) / ((double)(RAND_MAX))
double warmup(Timer tmr) {
    tmr.reset();
    for (int i = 0; i < 100000000; i++)
    {
        double r1 = randf();
        double r2 = randf();
    }
    double elapsed = tmr.elapsed();
    return elapsed;
}
double test(Timer tmr) {
    double total = 0.0;
    tmr.reset();
    for (int i = 0; i < 100000000; i++)
    {
        double r1 = randf();
        double r2 = randf();
        total += r1*r2;
    }
    double elapsed = tmr.elapsed();
    return elapsed;
}
double avg(double* arr) {
    double res = 0.0;
    for (int i = 0; i < 10; i++) {
        res += *(arr + i);
    }
    return res / 10;
}
int main()
{
    double total;
    int total2;
    Timer tmr;
    
    double warmup_runs[10];
    for (int i = 0; i < 10; i++)
    {
        warmup_runs[i] = warmup(tmr);
        printf("warm - %f\n", warmup_runs[i]);
    }
    double avg_warmup = avg(warmup_runs);
    printf("avg warm - %f\n", avg_warmup);
    const int runs = 10;
    double result[runs];
    for (int i = 0; i < runs; i++)
    {
        result[i] = test(tmr);
        printf("real - %f\n", result[i]);
    }
    double avg_result = avg(result);
    printf("avg real - %f\n", avg_result);
    printf("d - %f\n", avg_result - avg_warmup);
}
Dockerfile
FROM ubuntu
WORKDIR /arythmetics
COPY a.out .
compile g++ -O0 test.cpp
to run inside container I use after build:
docker run -it <container> .bin/bash
.\a.out
UPDATE:
after compiling with -static flag, program run time is same in both environments
there is another question, why it is practically same? is'n there should be some containerization overhead?
 
    