Taken from this SO thread, this piece of code calculates the number of CPU cycles elapsed running code between lines //1 and //2.
$ cat cyc.c 
#include<stdio.h>
static __inline__ unsigned long long rdtsc(void)
{
  unsigned long long int x;
     __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
     return x;
}
int main() {
    unsigned long long cycles = rdtsc(); //1
    cycles = rdtsc() - cycles;           //2
    printf("Time is %d\n", (unsigned)cycles);
    return 0;
}
$ gcc cyc.c -o cyc
$ ./cyc
Time is 73
$ ./cyc
Time is 74
$ ./cyc
Time is 63
$ ./cyc
Time is 73
$
How does the rdtsc() function work?
 
     
    