I try to use ZeroMQ for fast message passing. Messages need to be delivered in less than 1 [ms]. I did some testing (inproc, single process on Linux, no TCP) and see that usually there is no problem with that. The latency is about 10 - 100 [us], depending on how often the messages are sent (why?). Sometimes however messages are received after 6 [ms] which is unacceptable. 
What can be the cause that some messages are delayed?
Maybe the process is preempted?
Or it's because of polling used (zmq_poll())?
Example results from my test :
avg lag =    28    [us]
max lag =  5221    [us]
std dev =    25.85 [us]
big lag =   180    x above 200 [us]
"big lag" means number of cases where latency was over 200 [us]. In my tests there are 500 000 messages sent so the value 180 means that latency over 200 [us] was recorded in 180 / 500000 = 0,036%. It's a quite low number but I'd like it to be zero. Even on the expense of average latency.
The test source code is below :
#include <stdlib.h>
#include <math.h>
#include <zmq.h>
#include <pthread.h>
#define SOCKETS_NUM 5
#define RUNS 100000
void *context;
int numbers[SOCKETS_NUM];
struct {
    struct timespec send_time;
    struct timespec receive_time;
} times[SOCKETS_NUM * RUNS], *ptimes;
static void * worker_thread(void * dummy) {
    int * number = dummy;
    char endpoint[] = "inproc://endpointX";
    endpoint[17] = (char)('0' + *number);
    void * socket = zmq_socket(context, ZMQ_PUSH);
    zmq_connect(socket, endpoint);
    struct timespec sleeptime, remtime;
    int rnd = rand() / 3000;
    sleeptime.tv_sec = 0;
    sleeptime.tv_nsec = rnd;
    nanosleep(&sleeptime, &remtime);
    clock_gettime(CLOCK_REALTIME, &(ptimes[*number].send_time));
    zmq_send(socket, "Hello", 5, 0);
    zmq_close(socket);
    return NULL;
}
static void run_test(zmq_pollitem_t items[]) {
    pthread_t threads[SOCKETS_NUM];
    for (int i = 0; i < SOCKETS_NUM; i++) {
        pthread_create(&threads[i], NULL, worker_thread, &numbers[i]);
    }
    char buffer[10];
    int to_receive = SOCKETS_NUM;
    for (int i = 0; i < SOCKETS_NUM; i++) {
        int rc = zmq_poll(items, SOCKETS_NUM, -1);
        for (int j = 0; j < SOCKETS_NUM; j++) {
            if (items[j].revents & ZMQ_POLLIN) {
                clock_gettime(CLOCK_REALTIME, &(ptimes[j].receive_time));
                zmq_recv(items[j].socket, buffer, 10, 0);
            }
        }
        to_receive -= rc;
        if (to_receive == 0) break;
    }
    for (int i = 0; i < SOCKETS_NUM; i++) {
        pthread_join(threads[i], NULL);
    }
}
int main(void)
{
    context = zmq_ctx_new();
    zmq_ctx_set(context, ZMQ_THREAD_SCHED_POLICY, SCHED_FIFO);
    zmq_ctx_set(context, ZMQ_THREAD_PRIORITY, 99);
    void * responders[SOCKETS_NUM];
    char endpoint[] = "inproc://endpointX";
    for (int i = 0; i < SOCKETS_NUM; i++) {
        responders[i] = zmq_socket(context, ZMQ_PULL);
        endpoint[17] = (char)('0' + i);
        zmq_bind(responders[i], endpoint);
        numbers[i] = i;
    }
    time_t tt;
    time_t t = time(&tt);
    srand((unsigned int)t);
    zmq_pollitem_t poll_items[SOCKETS_NUM];
    for (int i = 0; i < SOCKETS_NUM; i++) {
        poll_items[i].socket = responders[i];
        poll_items[i].events = ZMQ_POLLIN;
    }
    ptimes = times;
    for (int i = 0; i < RUNS; i++) {
        run_test(poll_items);
        ptimes += SOCKETS_NUM;
    }
    long int lags[SOCKETS_NUM * RUNS];
    long int total_lag = 0;
    long int max_lag = 0;
    long int big_lag = 0;
    for (int i = 0; i < SOCKETS_NUM * RUNS; i++) {
        lags[i] = (times[i].receive_time.tv_nsec - times[i].send_time.tv_nsec + (times[i].receive_time.tv_sec - times[i].send_time.tv_sec) * 1000000000) / 1000;
        if (lags[i] > max_lag) max_lag = lags[i];
        total_lag += lags[i];
        if (lags[i] > 200) big_lag++;
    }
    long int avg_lag = total_lag / SOCKETS_NUM / RUNS;
    double SD = 0.0;
    for (int i = 0; i < SOCKETS_NUM * RUNS; ++i) {
        SD += pow((double)(lags[i] - avg_lag), 2);
    }
    double std_lag = sqrt(SD / SOCKETS_NUM / RUNS);
    printf("avg lag = %l5d    [us]\n", avg_lag);
    printf("max lag = %l5d    [us]\n", max_lag);
    printf("std dev = %8.2f [us]\n", std_lag);
    printf("big lag = %l5d    x above 200 [us]\n", big_lag);
    for (int i = 0; i < SOCKETS_NUM; i++) {
        zmq_close(responders[i]);
    }
    zmq_ctx_destroy(context);
    return 0;
}