If it's about performance, I strongly suggest not to doubly wrap functions.
You can do without those:
template <typename Caption, typename F>
auto timed(Caption const& task, F&& f) {
    return [f=std::forward<F>(f), task](auto&&... args) {
        using namespace std::chrono;
        struct measure {
            high_resolution_clock::time_point start;
            Caption const& task;
            ~measure() { std::cout << " -- (" << task << " completed in " << duration_cast<microseconds>(high_resolution_clock::now() - start).count() << "µs)\n"; }
        } timing { high_resolution_clock::now(), task };
        return f(std::forward<decltype(args)>(args)...);
    };
}
See live demo:
Live On Coliru
#include <chrono>
#include <iostream>
template <typename Caption, typename F>
auto timed(Caption const& task, F&& f) {
    return [f=std::forward<F>(f), task](auto&&... args) {
        using namespace std::chrono;
        struct measure {
            high_resolution_clock::time_point start;
            Caption const& task;
            ~measure() { std::cout << " -- (" << task << " completed in " << duration_cast<microseconds>(high_resolution_clock::now() - start).count() << "µs)\n"; }
        } timing { high_resolution_clock::now(), task };
        return f(std::forward<decltype(args)>(args)...);
    };
}
#include <thread>
int main() {
    using namespace std;
    auto f = timed("IO", [] { cout << "hello world\n"; return 42; });
    auto g = timed("Sleep", [](int i) { this_thread::sleep_for(chrono::seconds(i)); });
    g(1);
    f();
    g(2);
    std::function<int()> f_wrapped = f;
    return f_wrapped();
}
Prints (e.g.):
 -- (Sleep completed in 1000188µs)
hello world
 -- (IO completed in 2µs)
 -- (Sleep completed in 2000126µs)
hello world
 -- (IO completed in 1µs)
exitcode: 42
UPDATE: c++11 version
Live On Coliru
#include <chrono>
#include <iostream>
namespace detail {
    template <typename F>
    struct timed_impl {
        std::string _caption;
        F _f;
        timed_impl(std::string const& task, F f) 
            : _caption(task), _f(std::move(f)) { }
        template <typename... Args>
        auto operator()(Args&&... args) const -> decltype(_f(std::forward<Args>(args)...))
        {
            using namespace std::chrono;
            struct measure {
                high_resolution_clock::time_point start;
                std::string const& task;
                ~measure() { std::cout << " -- (" << task << " completed in " << duration_cast<microseconds>(high_resolution_clock::now() - start).count() << "µs)\n"; }
            } timing { high_resolution_clock::now(), _caption };
            return _f(std::forward<decltype(args)>(args)...);
        }
    };
}
template <typename F>
detail::timed_impl<F> timed(std::string const& task, F&& f) {
    return { task, std::forward<F>(f) };
}
#include <thread>
int main() {
    using namespace std;
    auto f = timed("IO", [] { cout << "hello world\n"; return 42; });
    auto g = timed("Sleep", [](int i) { this_thread::sleep_for(chrono::seconds(i)); });
    g(1);
    f();
    g(2);
    std::function<int()> f_wrapped = f;
    return f_wrapped();
}