Most implementations I know use a 1D array with wrappers either via MACROS or methods to access the 2D data. I myself can't say which is faster and I can hardly see a case where this would really matter.
However this being said I would still recommend using 1D arrays for the following reasons:
- easier to allocate and release dynamically (If you want to allocate your data dynamically and maintain the matrix[i][j] pattern then you are allocating an array of arrays - slower to allocate more bug prone, you have to release each array separately)
- if you allocate dynamically and have to copy the data to a 3d party library say OpenGL or OpenCV you are into some trouble since they need the data to be contiguous.
- It will be easier to store and read the matrix from files or other persistent storage.
- even if you don't allocate dynamically you will have to do all sorts of ugly castings to handle the transfer of memory to 3d party libraries.
- it's super easy to create a structure with methods to have efficient 2d access to your data without multiplications or unclear indexing. Or for that matter use MACROS to encapsulate the ugly multiplications for you.
- You could even create a structure to look like the next sample and maintain the same benefits of a 1d array with the readability of a 2d array:
template<typename T>
struct Matrix4x4
{
    struct index
    {
        int row, col;
        index(int r = 0, int c = 0)
            : row(r), col(c){}
        index(index const & cp)
            : row(cp.row), col(cp.col)
        {
        }
        //Assignment ommited for brevity
    };
    /*
        Constructors, Assignments etc. ommited for brevity
    */
    T m00, m01, m02, m03;
    T m10, m11, m12, m13;
    T m20, m21, m22, m23;
    T m30, m31, m32, m33;
    T * toArray() const
    {
        return &m00;
    }
    T * toArray()
    {
        return &m00;
    }
    T * row(int r)
    {
        return (&m00) + r*4;
    }
    T * row(int r) const
    {
        return (&m00) + r*4;
    }
    T & operator()(int r, int c)
    {
        return *row(r)[c];
    }
    T const & operator()(int r, int c) const
    {
        return *row(r)[c];
    }
    T & operator[](index const & idx)
    {
        return row(idx.row)[idx.col];
    }
    T const & operator[](index const & idx) const
    {
        return row(idx.row)[idx.col];
    }
};
In your code you can do:
typedef Matrix4x4<double> Matrix4x4d;
Matrix4x4d mat;
/* You can do any of the following */
mat.m23 = 6.0;
mat(2,3) = 6.0;
mat[Matrix4x4d::index(2,3)] = 6.0;
mat.row(2)[3] = 6.0;
mat.toArray()[2*4 + 3] = 6.0;
#define M(m,r,c) (*((&m.m00) + r*4 + c))
M(mat,2,3) = 6.0;
I myself implemented several matrix libraries over the years and always opted for the 1d solution.