I am comparing the time it takes Julia to compute the Euclidean distances between two sets of points in 3D space against an equivalent implementation in C. I was very surprised to observe that (for this particular case and my particular implementations) Julia is 22% faster than C. When I also included @fastmath in the Julia version, it would be even 83% faster than C.
This leads to my question: why? Either Julia is more amazing than I originally thought or I am doing something very inefficient in C. I am betting my money on the latter.
Some particulars about the implementation:
- In Julia I use 2D arrays of Float64.
- In C I use dynamically allocated 1D arrays of double.
- In C I use the sqrtfunction frommath.h.
- The computations are very fast, therefore I compute them a 1000 times to avoid comparing on the micro/millisecond level.
Some particulars about the compilation:
- Compiler: gcc 5.4.0
- Optimisation flags: -O3 -ffast-math
Timings:
- Julia (without @fastmath): 90 s
- Julia (with @fastmath): 20 s
- C: 116 s
- I use the bash command timefor the timings- $ time ./particleDistance.jl(with shebang in file)
- $ time ./particleDistance
 
particleDistance.jl
#!/usr/local/bin/julia
function distance!(x::Array{Float64, 2}, y::Array{Float64, 2}, r::Array{Float64, 2})
    nx = size(x, 1)
    ny = size(y, 1)
    for k = 1:1000
        for j = 1:ny
            @fastmath for i = 1:nx
                @inbounds dx = y[j, 1] - x[i, 1]
                @inbounds dy = y[j, 2] - x[i, 2]
                @inbounds dz = y[j, 3] - x[i, 3]
                rSq = dx*dx + dy*dy + dz*dz
                @inbounds r[i, j] = sqrt(rSq)
            end
        end
    end
end
function main()
    n = 4096
    m = 4096
    x = rand(n, 3)
    y = rand(m, 3)
    r = zeros(n, m)
    distance!(x, y, r)
    println("r[n, m] = $(r[n, m])")
end
main()
particleDistance.c
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
void distance(int n, int m, double* x, double* y, double* r)
{
    int i, j, I, J;
    double dx, dy, dz, rSq;
    for (int k = 0; k < 1000; k++)
    {
        for (j = 0; j < m; j++)
        {
            J = 3*j;
            for (i = 0; i < n; i++)
            {
                I = 3*i;
                dx = y[J] - x[I];
                dy = y[J+1] - x[I+1];
                dz = y[J+2] - x[I+2];
                rSq = dx*dx + dy*dy + dz*dz;
                r[j*n+i] = sqrt(rSq);
            }
        }
    }
}
int main()
{
    int i;
    int n = 4096;
    int m = 4096;
    double *x, *y, *r;
    size_t xbytes = 3*n*sizeof(double);
    size_t ybytes = 3*m*sizeof(double);
    x = (double*) malloc(xbytes);
    y = (double*) malloc(ybytes);
    r = (double*) malloc(xbytes*ybytes/9);
    for (i = 0; i < 3*n; i++)
    {
        x[i] = (double) rand()/RAND_MAX*2.0-1.0;
    }
    for (i = 0; i < 3*m; i++)
    {
        y[i] = (double) rand()/RAND_MAX*2.0-1.0;
    }
    distance(n, m, x, y, r);
    printf("r[n*m-1] = %f\n", r[n*m-1]);
    free(x);
    free(y);
    free(r);
    return 0;
}
Makefile
all: particleDistance.c
    gcc -o particleDistance particleDistance.c -O3 -ffast-math -lm
 
     
    