I wrote generic version of index sort.
template <class RAIter, class Compare>
void argsort(RAIter iterBegin, RAIter iterEnd, Compare comp, 
    std::vector<size_t>& indexes) {
    std::vector< std::pair<size_t,RAIter> > pv ;
    pv.reserve(iterEnd - iterBegin) ;
    RAIter iter ;
    size_t k ;
    for (iter = iterBegin, k = 0 ; iter != iterEnd ; iter++, k++) {
        pv.push_back( std::pair<int,RAIter>(k,iter) ) ;
    }
    std::sort(pv.begin(), pv.end(), 
        [&comp](const std::pair<size_t,RAIter>& a, const std::pair<size_t,RAIter>& b) -> bool 
        { return comp(*a.second, *b.second) ; }) ;
    indexes.resize(pv.size()) ;
    std::transform(pv.begin(), pv.end(), indexes.begin(), 
        [](const std::pair<size_t,RAIter>& a) -> size_t { return a.first ; }) ;
}
Usage is the same as that of std::sort except for an index container to receive sorted indexes.
testing:
int a[] = { 3, 1, 0, 4 } ;
std::vector<size_t> indexes ;
argsort(a, a + sizeof(a) / sizeof(a[0]), std::less<int>(), indexes) ;
for (size_t i : indexes) printf("%d\n", int(i)) ;
you should get 2 1 0 3.
for the compilers without c++0x support, replace the lamba expression as a class template:
template <class RAIter, class Compare> 
class PairComp {
public:
  Compare comp ;
  PairComp(Compare comp_) : comp(comp_) {}
  bool operator() (const std::pair<size_t,RAIter>& a, 
    const std::pair<size_t,RAIter>& b) const { return comp(*a.second, *b.second) ; }        
} ;
and rewrite std::sort as
std::sort(pv.begin(), pv.end(), PairComp(comp)()) ;