Edit Fixed the potential race in all versions:
1./1b Employs a sempaaphore built from a (mutex+condition+counter) as outlined in C++0x has no semaphores? How to synchronize threads?
2. uses a 'reverse' wait to ensure that a signal got ack-ed by the intended worker
I'd really suggest to use c++11 style <thread> and <condition_variable> to achieve this.
I have two (and a half) demonstations. They each assume you have 1 master that drives 10 workers. Each worker awaits a signal before it does it's work.
We'll use std::condition_variable (which works in conjunction with a std::mutex) to do the signaling. The difference between the first and second version will be the way in which the signaling is done:
- 1. Notifying any worker, one at a time:
- 1b. With a worker struct
- 2. Notifying all threads, coordinating which recipient worker is to respond
1. Notifying any worker, one at a time:
This is the simplest to do, because there's little coordination going on:
#include <vector>
#include <thread>
#include <mutex>
#include <algorithm>
#include <iostream>
#include <condition_variable>
using namespace std;
class semaphore 
{ // see https://stackoverflow.com/questions/4792449/c0x-has-no-semaphores-how-to-synchronize-threads
    std::mutex mx;
    std::condition_variable cv;
    unsigned long count;
public:
    semaphore() : count() {} 
    void notify();
    void wait();
};
static void run(int id, struct master& m);
struct master
{
    mutable semaphore sem;
    master()
    {
        for (int i = 0; i<10; ++i)
            threads.emplace_back(run, i, ref(*this));
    }
    ~master() {
        for(auto& th : threads) if (th.joinable()) th.join(); 
        std::cout << "done\n";
    }
    void drive()
    {
        // do wakeups
        for (unsigned i = 0; i<threads.size(); ++i)
        {
            this_thread::sleep_for(chrono::milliseconds(rand()%100));
            sem.notify();
        }
    }
  private:
    vector<thread> threads;
};
static void run(int id, master& m)
{
    m.sem.wait();
    {
        static mutex io_mx;
        lock_guard<mutex> lk(io_mx);
        cout << "signaled: " << id << "\n";
    }
}
int main()
{
    master instance;
    instance.drive();
}
/// semaphore members
void semaphore::notify()
{
    lock_guard<mutex> lk(mx);
    ++count;
    cv.notify_one();
}
void semaphore::wait()
{
    unique_lock<mutex> lk(mx);
    while(!count)
        cv.wait(lk);
    --count;
}
1b. With a worker struct
Note, if you had worker classes with worker::run a non-static member function, you can do the same with minor modifications: 
struct worker
{
    worker(int id) : id(id) {}
    void run(master& m) const;
    int id;
};
// ...
struct master
{
    // ...
    master()
    {
        for (int i = 0; i<10; ++i)
            workers.emplace_back(i);
        for (auto& w: workers)
            threads.emplace_back(&worker::run, ref(w), ref(*this));
    }
// ...
void worker::run(master& m) const
{
    m.sem.wait();
    {
        static mutex io_mx;
        lock_guard<mutex> lk(io_mx);
        cout << "signaled: " << id << "\n";
    }
}
A caveat
- cv.wait()could suffer spurious wake-ups, in which the condition variable wasn't atually raised (e.g. in the event of OS signal handlers). This is a common thing to happen with condition variables on any platfrom.
The following approach fixes this:
2. Notifying all threads, coordinating which recipient worker
Use a flag to signal which thread was intended to receive the signal:
struct master
{
    mutable mutex mx;
    mutable condition_variable cv;
    int signaled_id;               // ADDED
    master() : signaled_id(-1)
    {
Let's pretend that driver got a lot more interesting and wants to signal all workers in a specific (random...) order:
    void drive()
    {
        // generate random wakeup order
        vector<int> wakeups(10);
        iota(begin(wakeups), end(wakeups), 0);
        random_shuffle(begin(wakeups), end(wakeups));
        // do wakeups
        for (int id : wakeups)
        {
            this_thread::sleep_for(chrono::milliseconds(rand()%1000));
            signal(id);
        }
    }
  private:
    void signal(int id)                // ADDED id
    {
        unique_lock<mutex> lk(mx);
        std::cout << "signaling " << id << "\n";
        signaled_id = id;              // ADDED put it in the shared field
        cv.notify_all();
        cv.wait(lk, [&] { return signaled_id == -1; });
    }
Now all we have to do is make sure that the receiving thread checks that it's id matches:
m.cv.wait(lk, [&] { return m.signaled_id == id; });
m.signaled_id = -1;
m.cv.notify_all();
This puts an end to spurious wake-ups.
Full code listings/live demos: