Perf allows you to use control file descriptors to enable or disable counting. It is documented under perf stat's man page, but it also works for perf record.
In short:
- You need to have a file descriptor ready and pass it to both
perf stat or perf record (with --control fd:${ctl_fd}) and your application (argv, environment variables, or however else your want). When your application decides to have perf start measuring, it sends enable\n to that file descriptor and disable\n to stop.
- Optionally, if you want perf to acknowledge that it did what you asked, you need to have a second file descriptor ready and pass both to perf with
--control fd:${ctl_fd},${ctl_fd_ack}. Perf will respond by writing 5 bytes back (i.e., ack\n followed by a null-terminator) that your application can read and check.
If the example in the documentation looks cryptic or too terse for you, here is an expanded version (NOTE: this code is intended to be short, it is NOT safe or suitable for production):
#include <assert.h> // assert
#include <stddef.h> // size_t
#include <stdio.h> // printf
#include <stdlib.h> // atoi, getenv
#include <string.h> // strcmp
#include <unistd.h> // read, write
size_t fib(size_t n)
{
if (n == 0)
return 0;
else if (n == 1)
return 1;
else
return fib(n - 1) + fib(n - 2);
}
int main(int argc, char* argv[])
{
int perf_ctl_fd;
int perf_ctl_ack_fd;
char ack[5];
size_t n;
size_t r;
// // Make sure we have the right number of arguments
// if (argc != 3)
// {
// fprintf(stderr, "Received %d arguments, expected 2.\n", argc - 1);
// fprintf(stderr, "Usage: %s <fd1> <fd2>", argv[0]);
// return 1;
// }
// perf_ctl_fd = atoi(argv[1]);
// perf_ctl_ack_fd = atoi(argv[2]);
perf_ctl_fd = atoi(getenv("PERF_CTL_FD"));
perf_ctl_ack_fd = atoi(getenv("PERF_CTL_ACK_FD"));
// scanf("%zu", &n);
n = 45;
// Start the performance counter and read the ack
write(perf_ctl_fd, "enable\n", 8);
read(perf_ctl_ack_fd, ack, 5);
assert(strcmp(ack, "ack\n") == 0);
// Compute the fibonacci number
r = fib(n);
// Stop the performance counter and read the ack
write(perf_ctl_fd, "disable\n", 9);
read(perf_ctl_ack_fd, ack, 5);
assert(strcmp(ack, "ack\n") == 0);
// Print the result
printf("Result: %zu\n", r);
return 0;
}
+ mkfifo ctl_fd.fifo
+ exec {ctl_fd}<>ctl_fd.fifo
+ mkfifo ctl_fd_ack.fifo
+ exec {ctl_fd_ack}<>ctl_fd_ack.fifo
+ echo ctl_fd: $ctl_fd
ctl_fd: 10
+ echo ctrl_fd_ack: $ctl_fd_ack
ctrl_fd_ack: 11
+ PERF_CTL_FD=$ctl_fd PERF_CTL_ACK_FD=$ctl_fd_ack perf stat --delay=-1 --control fd:${ctl_fd},${ctl_fd_ack} -- build/prog
++ PERF_CTL_FD=10
++ PERF_CTL_ACK_FD=11
++ perf stat --delay=-1 --control fd:10,11 -- build/prog
Events disabled
Events enabled
Events disabled
Result: 55
Performance counter stats for 'build/prog':
2,259.30 msec task-clock # 1.000 CPUs utilized
4 context-switches # 1.770 /sec
0 cpu-migrations # 0.000 /sec
0 page-faults # 0.000 /sec
7,162,104,467 cycles # 3.170 GHz
22,618,760,242 instructions # 3.16 insn per cycle
4,023,428,346 branches # 1.781 G/sec
14,050,645 branch-misses # 0.35% of all branches
2.260329786 seconds time elapsed
0.000000000 seconds user
0.000000000 seconds sys
+ exec {ctl_fd}>&-
+ exec {ctl_fd_ack}>&-
+ unlink ctl_fd.fifo
+ unlink ctl_fd_ack.fifo
Same goes with perf record:
+ mkfifo ctl_fd.fifo
+ exec {ctl_fd}<>ctl_fd.fifo
+ mkfifo ctl_fd_ack.fifo
+ exec {ctl_fd_ack}<>ctl_fd_ack.fifo
+ PERF_CTL_FD=$ctl_fd PERF_CTL_ACK_FD=$ctl_fd_ack perf record --delay=-1 --control fd:${ctl_fd},${ctl_fd_ack} -- build/prog
++ PERF_CTL_FD=10
++ PERF_CTL_ACK_FD=11
++ perf record --delay=-1 --control fd:10,11 -- build/prog
Events disabled
Events enabled
Events disabled
Result: 1134903170
[ perf record: Woken up 7 times to write data ]
[ perf record: Captured and wrote 0.435 MB perf.data (9053 samples) ]
+ exec {ctl_fd}>&-
+ exec {ctl_fd_ack}>&-
+ perf report
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 9K of event 'cycles'
# Event count (approx.): 7173277849
#
# Overhead Command Shared Object Symbol
# ........ ....... ................. .............................
#
99.99% prog prog [.] fib
0.01% prog [kernel.kallsyms] [k] rcu_check_callbacks
+ unlink ctl_fd.fifo
+ unlink ctl_fd_ack.fifo