I have a simple C# and C++ code that computes a sum of dot products.
The C# code is:
using System;
namespace DotPerfTestCS
{
    class Program
    {
        struct Point3D
        {
            public double X, Y, Z;
            public Point3D(double x, double y, double z)
            {
                X = x;
                Y = y;
                Z = z;
            }
        }
        static void RunTest()
        {
            unchecked
            {
                const int numPoints = 100000;
                const int numIters = 100000000;
                Point3D[] pts = new Point3D[numPoints];
                for (int i = 0; i < numPoints; i++) pts[i] = new Point3D(i, i + 1, i + 2);
                var begin = DateTime.Now;
                double sum = 0.0;
                var u = new Point3D(1, 2, 3);
                for (int i = 0; i < numIters; i++)
                {
                    var v = pts[i % numPoints];
                    sum += u.X * v.X + u.Y * v.Y + u.Z * v.Z;
                }
                var end = DateTime.Now;
                Console.WriteLine("Sum: {0} Time elapsed: {1} ms", sum, (end - begin).TotalMilliseconds);
            }
        }
        static void Main(string[] args)
        {
            for (int i = 0; i < 5; i++) RunTest();
        }
    }
}
and the C++ is
#include <iostream>
#include <vector>
#include <time.h>
using namespace std;
typedef struct point3d
{
    double x, y, z;
    point3d(double x, double y, double z)
    {
        this->x = x;
        this->y = y;
        this->z = z;
    }
} point3d_t;
double diffclock(clock_t clock1,clock_t clock2)
{
    double diffticks=clock1-clock2;
    double diffms=(diffticks*10)/CLOCKS_PER_SEC;
    return diffms;
}
void runTest()
{
    const int numPoints = 100000;
    const int numIters = 100000000;
    vector<point3d_t> pts;
    for (int i = 0; i < numPoints; i++) pts.push_back(point3d_t(i, i + 1, i + 2));
    auto begin = clock();
    double sum = 0.0, dum = 0.0;
    point3d_t u(1, 2, 3);
    for (int i = 0; i < numIters; i++) 
    {
        point3d_t v = pts[i % numPoints];
        sum += u.x * v.x + u.y * v.y + u.z * v.z;
    }
    auto end = clock();
    cout << "Sum: " << sum << " Time elapsed: " << double(diffclock(end,begin)) << " ms" << endl;
}
int main()
{
    for (int i = 0; i < 5; i++) runTest();
    return 0;
}
The C# version (Release x86 with optimization on, x64 is even slower) output is
Sum: 30000500000000 Time elapsed: 551.0299 ms 
Sum: 30000500000000 Time elapsed: 551.0315 ms 
Sum: 30000500000000 Time elapsed: 552.0294 ms
Sum: 30000500000000 Time elapsed: 551.0316 ms 
Sum: 30000500000000 Time elapsed: 550.0315 ms
while C++ (default VS2010 Release build settings) yields
Sum: 3.00005e+013 Time elapsed: 4.27 ms
Sum: 3.00005e+013 Time elapsed: 4.27 ms
Sum: 3.00005e+013 Time elapsed: 4.25 ms
Sum: 3.00005e+013 Time elapsed: 4.25 ms
Sum: 3.00005e+013 Time elapsed: 4.25 ms
Now I would expect the C# code would be a little slower. But 130 times slower seems way too much to me. Can someone please explain to me what is going on here?
EDIT
I am not a C++ programmer and I just took the diffclock code somewhere from the internet without really checking if it's correct.
Using std::difftime the C++ results are
Sum: 3.00005e+013 Time elapsed: 457 ms
Sum: 3.00005e+013 Time elapsed: 452 ms
Sum: 3.00005e+013 Time elapsed: 451 ms
Sum: 3.00005e+013 Time elapsed: 451 ms
Sum: 3.00005e+013 Time elapsed: 451 ms
which seems about right.
 
     
     
     
     
    