operator [] handles one-and-only-one index until the C++23 Standard becomes official and C++23 compilers are widely available.
Things are further complicated by the desire for the ability to access 2D structures the same way you access 1D structures. You cannot override [] (or any other function) with a change of return type in order to return something you can invoke the inner dimension's [] on AND also return the value.
First, the smart way: Use operator(). This should easily convert to C++23's improved operator[]
template<typename T>
class matrix
{
private:
    std::vector<T> mat; // faster and more versatile as a 1D data structure.
                        // we will perform the indexing math, if necessary,
                        // on demand below. This may sound slow, extra math,
                        // but guess what [][] is doing behind the scenes
                        // anyway? That's right! Same dang thing.
                        // What we get is 1D matrixes are now dead simple
                        // and we have one contiguous block of memory and
                        // this make it dead easy to for the CPU to cache
    size_t rows;    // using size_t instead of in for larger size and who
    size_t cols;    // needs a negative array size?
public:
    // just regular old constructors pretty much unchanged from the asker's 
    // originals other than flattening out the nested vectors
    matrix(int numRows, int numCols) :
            mat(numRows * numCols), rows(numRows), cols(numCols)
    {
    }
    matrix(std::initializer_list<T> initlist) :
            mat( { initlist }), rows(1), cols(initlist.size())
    {
    }
    matrix(std::initializer_list<std::initializer_list<T>> initlist) :
            rows(initlist.size()), cols(initlist.begin()->size())
    {
        for (const auto &rowElement : initlist)
        {
            mat.insert(mat.end(), rowElement.begin(), rowElement.end());
            if (rowElement.size() != cols)
            {
                throw std::runtime_error("Initializer list is not rectangular.");
            }
        }
    }
    // uses operator ()
    T& operator()(size_t index)
    {
        return mat[index];
    }
    T& operator()(size_t row, size_t col)
    {
        return mat[row * cols + col]; // flatten 2D indexing into 1D
    }
    T operator()(size_t index) const
    {
        return mat[index];
    }
    T operator()(size_t row, size_t col) const
    {
        return mat[row * cols + col];
    }
};
Usage:
np::matrix<int> mat(3, 3);
np::matrix<int> mat1 = { { 1, 2, 3 }, { 4, 5, 6 } };
np::matrix<int> mat2 = { 1, 2, 3, 4, 5, 6 };
const np::matrix<int> mat3 = mat1; // test for const-correctness
std::cout << mat1(1,2) << std::endl;
std::cout << mat1(4) << std::endl;
std::cout << mat3(1,2) << std::endl;
std::cout << mat3(4) << std::endl;
Now here's the simplest way I can come up with to do it with the pre C++23 operator[] and preserve the [i][j] syntax.
template<typename T>
class matrix
{
private:
    std::vector<T> mat; 
    size_t rows;    
    size_t cols;    
public:
    // you cannot pass more than one index to operator [] (at least not
    // until C++23). But what you can do is have [] return a proxy object
    // that allows access to the inner dimension
    template <class U>
    class proxy
    {
        matrix &ref;
        size_t index;
        // converts the proxy to a T (int in this case), but can be called
        // on a constant proxy. Needed for operator << because operator T&()
        // cannot perform T x = val; when val is const
        operator T() const
        {
            return ref.mat[index];
        }
    public:
        // just constructs the proxy around the around the matrix and holds
        // onto the index used so we have it later.
        proxy(matrix &ref,
              size_t index): ref(ref), index(index)
        {
        }
        // converts the proxy to a T (int in this case)
        operator T&()
        {
            return ref.mat[index]; // by reading the T out of the matrix directly
        }
        // accesses the proxy as an array so that we can get the value at
        // the inner dimension
        T& operator [](size_t col)
        {
            return ref.mat[index * ref.cols + col];
        }
        // allows transparent printing of the value referenced by the proxy
        // reading will be something similar, though, obviously, not on a
        // constant proxy
        friend std::ostream & operator <<(std::ostream & out,
                                          const proxy & val)
        {
            T x = val;
            out << x;
            return out;
        }
    };
    // this does the same thing as the basic proxy, but for const matrixes.
    // proxy's ref member cannot reference a const matrix, and this was the
    // best way I could think of to avoid const_casting away const
    template <class U>
    class const_proxy
    {
        const matrix &ref;
        size_t index;
    public:
        const_proxy(const matrix &ref,
                    size_t index): ref(ref), index(index)
        {
        }
        operator T() const
        {
            return ref.mat[index];
        }
        T operator [](size_t col)const
        {
            return ref.mat[index * ref.cols + col];
        }
        friend std::ostream & operator <<(std::ostream & out,
                                          const const_proxy & val)
        {
            T x = val;  // this time everything's const, so we don't need
                        // the extra method
            out << x;
            return out;
        }
    };
    // The following friendship declarations are required before C++11.
    // friend class proxy<T>;
    // friend class const_proxy<T>;
    matrix(int numRows, int numCols) :
            mat(numRows * numCols), rows(numRows), cols(numCols)
    {
    }
    matrix(std::initializer_list<T> initlist) :
            mat( { initlist }), rows(1), cols(initlist.size())
    {
    }
    matrix(std::initializer_list<std::initializer_list<T>> initlist) :
            rows(initlist.size()), cols(initlist.begin()->size())
    {
        for (const auto &rowElement : initlist)
        {
            mat.insert(mat.end(), rowElement.begin(), rowElement.end());
            if (rowElement.size() != cols)
            {
                throw std::runtime_error("Initializer list is not rectangular.");
            }
        }
    }
    // rather than returning the value directly, returns the proxy
    proxy<T> operator[](size_t index)
    {
        return proxy<T>(*this, index);
    }
    const_proxy<T> operator[](size_t index) const
    {
        return const_proxy<T>(*this, index);
    }
};
And usage
np::matrix<int> mat(3, 3);
np::matrix<int> mat1 = { { 1, 2, 3 }, { 4, 5, 6 } };
np::matrix<int> mat2 = { 1, 2, 3, 4, 5, 6 };
const np::matrix<int> mat3 = mat1; // test for const-correctness
std::cout << mat1[1][2] << std::endl;
// what just happened: mat1[1] returned a proxy. [2] is invoked on the proxy
// retrieving the value at mat1[1][2]. << is called with the returned
// value
std::cout << mat1[4] << std::endl;
// what just happened: mat1[4] returned a proxy. << is called on the proxy,
// which in turn retrieves the value at index 4 of our flattened-to-1D array
// << is called with the retrieved value
std::cout << mat3[1][2] << std::endl;
std::cout << mat3[4] << std::endl;
Now... Are you certain you want to inflict this much complexity on people?