After some works I wrote final version of this method. This XPathAppend does not use recursion, instead it uses some trick for inline parameter pack expansion for variadic template (look at  pretty-print a tuple (from https://stackoverflow.com/a/6245777/273767)). 
For comparison of the methods speed I added sample in main() body. So:
#include <iostream>
#include <string>
#include <utility>
#include <tuple>
#include <chrono>
// TCHAR strings as in MSVC++
#define TCHAR wchar_t
#define _T(x) L ## x
namespace std {
    typedef basic_string<TCHAR> tstring;
}
// xml node name normalization (trim spaces and delimeter slashes)
template<typename T>
inline std::tstring XPathNormalizeNodeName(T&& itm) {
    std::tstring val(std::forward<T>(itm));
    if (!val.empty()) {
        auto posS = val.find_first_not_of(_T(" /"));
        auto posE = (posS != std::tstring::npos) ? val.find_last_not_of(_T(" /")) : std::tstring::npos;
        if ((posS != std::tstring::npos) & (posE != std::tstring::npos)) {
            if (((++posE) < val.length()) | (posS > 0)) {
                val = val.substr(posS, posE - posS);
            }
        }
        else {
            val.clear();
        }
    }
    return val;
}
template<typename TFirst>
inline std::tstring XPathAppend_Old(TFirst&& itm) {
    return XPathNormalizeNodeName(std::forward<TFirst>(itm));
}
// build xpath by the names concatenations (recusrion method)
template<typename TFirst, typename ...Types>
inline std::tstring XPathAppend_Old(TFirst&& first, Types&&... args) {
    std::tstring left(XPathAppend_Old(std::forward<TFirst>(first)));
    std::tstring right(XPathAppend_Old(std::forward<Types>(args)...));
    if (!left.empty() & !right.empty()) {
        left += _T("/");
    }
    left += std::move(right);
    return left;
}
// build xpath by the names concatenations (parameter pack expansion method)
template<typename TFirst, typename ...Types>
inline std::tstring XPathAppend(TFirst&& first, Types&&... args) {
    std::tstring res(XPathNormalizeNodeName(std::forward<TFirst>(first)));
    using swallow = int[];
    (void)swallow{0, (void(res += _T('/')), void(res += XPathNormalizeNodeName(std::forward<Types>(args))), 0)...};
    return res;
}
// Test
int main()
{
    const size_t nIterations = 400000;
    {
        std::tstring a(_T("A")); 
        std::wcout << XPathAppend(a, std::tstring(_T("B/")), std::tstring(_T("/C")));
        auto start = std::chrono::high_resolution_clock::now();
        for(size_t nIndex = 0; nIndex<nIterations; nIndex++) {
            XPathAppend(a, std::tstring(_T("B/")), std::tstring(_T("/C")));
        }
        std::wcout << _T(" (") << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start).count() << _T("ms.)");
        start = std::chrono::high_resolution_clock::now();
        for(size_t nIndex = 0; nIndex<nIterations; nIndex++) {
            XPathAppend_Old(a, std::tstring(_T("B/")), std::tstring(_T("/C")));
        }
        std::wcout << _T(" (") << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start).count() << _T("ms.)");
        std::wcout << std::endl;
    }
    {
        std::wcout << XPathAppend(std::tstring(_T("A")), std::tstring(_T("B")), std::tstring(_T("C")));
        auto start = std::chrono::high_resolution_clock::now();
        for(size_t nIndex = 0; nIndex<nIterations; nIndex++) {
            XPathAppend(std::tstring(_T("A")), std::tstring(_T("B")), std::tstring(_T("C")));
        }
        std::wcout << _T(" (") << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start).count() << _T("ms.)");
        start = std::chrono::high_resolution_clock::now();
        for(size_t nIndex = 0; nIndex<nIterations; nIndex++) {
            XPathAppend_Old(std::tstring(_T("A")), std::tstring(_T("B")), std::tstring(_T("C")));
        }
        std::wcout << _T(" (") << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start).count() << _T("ms.)");
        std::wcout << std::endl;
    }
    {
        std::wcout << XPathAppend(_T("A"), _T("B"), _T("C"));
        auto start = std::chrono::high_resolution_clock::now();
        for(size_t nIndex = 0; nIndex<nIterations; nIndex++) {
            XPathAppend(_T("A"), _T("B"), _T("C"));
        }
        std::wcout << _T(" (") << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start).count() << _T("ms.)");
        start = std::chrono::high_resolution_clock::now();
        for(size_t nIndex = 0; nIndex<nIterations; nIndex++) {
            XPathAppend_Old(_T("A"), _T("B"), _T("C"));
        }
        std::wcout << _T(" (") << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start).count() << _T("ms.)");
        std::wcout << std::endl;
    }
}
Online demo.
Results, first duration for iteration method, last - for old version, with recursion:
A/B/C (202ms.) (233ms.)
A/B/C (169ms.) (196ms.)
A/B/C (167ms.) (195ms.)
As you can see performance difference is minimal - recursion and iteration method are very similar for the optimized code.