I'm trying to implement my own struct with reference counter that is compiled under Visual Studio 2008's C++/Windows compiler. I came up with this:
struct STOP_FLAG
{
    STOP_FLAG() 
        : mRefCount(0)
        , pbStopFlag(NULL)
    {
        pbStopFlag = new (std::nothrow) volatile BOOL;
        ASSERT(pbStopFlag);
        this->grab();
    }
    ~STOP_FLAG()
    {
        this->release();
    }
    STOP_FLAG(const STOP_FLAG& s) 
        : pbStopFlag(s.pbStopFlag)
        , mRefCount(s.mRefCount)
    {
        //Copy constructor
        this->grab();
    }
    STOP_FLAG& operator = (const STOP_FLAG& s)
    {
        //Assignment operator
        if(pbStopFlag != s.pbStopFlag)
        {
            this->release();
            s.grab();
            pbStopFlag = s.pbStopFlag;
            mRefCount = s.mRefCount;
        }
        return *this;
    }
    //Helper methods
    volatile BOOL* operator->() const {return pbStopFlag;}      //x->member
    volatile BOOL& operator*() const {return *pbStopFlag;}      //*x, (*x).member
    operator volatile BOOL*() const {return pbStopFlag;}        //T* y = x;
    operator bool() const {return pbStopFlag != NULL;}          //if(x)
private:
    void grab() const 
    {
        //++mRefCount;
        ::InterlockedIncrement(&mRefCount);
    }
    void release() const
    {
        ASSERT(mRefCount > 0);
        //--mRefCount;
        LONG mCnt = ::InterlockedDecrement(&mRefCount);
        if(mCnt == 0)
        {
            ASSERT(pbStopFlag);
            if(pbStopFlag)
            {
                delete pbStopFlag;
                pbStopFlag = NULL;
            }
        }
    }
private:
    mutable volatile BOOL* pbStopFlag;
    mutable LONG mRefCount;
};
But when I test the following (running in a single thread) with a debugger:
{
    STOP_FLAG sf;
    {
        STOP_FLAG s2(sf);
        s2 = sf;
        STOP_FLAG s3;
        s3 = s2;
        STOP_FLAG s4[3];
        s4[0] = s3;
        s4[1] = s3;
        s4[2] = s3;
        STOP_FLAG s5;
        s3 = s5;
    }
}
I happen to have my new volatile BOOL operator called 6 times and delete only 5.
So where is my memory leak coming from?
EDIT: Here's an updated version after a suggestion below. It still produces the same result though:
struct _S_FLAG{
    volatile BOOL* pbStopFlag;
    LONG mRefCount;
    _S_FLAG(volatile BOOL* pb, LONG cntr)
    {
        pbStopFlag = pb;
        mRefCount = cntr;
    }
};
struct STOP_FLAG
{
    STOP_FLAG() 
        : _sf(NULL, 0)
    {
        _sf.pbStopFlag = new (std::nothrow) volatile BOOL;
        TRACE("new\n");
        ASSERT(_sf.pbStopFlag);
        this->grab();
    }
    ~STOP_FLAG()
    {
        this->release();
    }
    STOP_FLAG(const STOP_FLAG& s) 
        : _sf(s._sf.pbStopFlag, s._sf.mRefCount)
    {
        //Copy constructor
        this->grab();
    }
    STOP_FLAG& operator = (const STOP_FLAG& s)
    {
        //Assignment operator
        if(_sf.pbStopFlag != s._sf.pbStopFlag)
        {
            this->release();
            s.grab();
            _sf.pbStopFlag = s._sf.pbStopFlag;
            _sf.mRefCount = s._sf.mRefCount;
        }
        return *this;
    }
    //Helper methods
    volatile BOOL* operator->() const {return _sf.pbStopFlag;}      //x->member
    volatile BOOL& operator*() const {return *_sf.pbStopFlag;}      //*x, (*x).member
    operator volatile BOOL*() const {return _sf.pbStopFlag;}        //T* y = x;
    operator bool() const {return _sf.pbStopFlag != NULL;}          //if(x)
private:
    void grab() const 
    {
        //++mRefCount;
        ::InterlockedIncrement(&_sf.mRefCount);
    }
    void release() const
    {
        ASSERT(_sf.mRefCount > 0);
        //--mRefCount;
        LONG mCnt = ::InterlockedDecrement(&_sf.mRefCount);
        if(mCnt == 0)
        {
            ASSERT(_sf.pbStopFlag);
            if(_sf.pbStopFlag)
            {
                delete _sf.pbStopFlag;
                TRACE("delete\n");
                _sf.pbStopFlag = NULL;
            }
        }
    }
private:
    mutable _S_FLAG _sf;
};
 
     
    