The following has always proven to be fairly efficient for me:
return (a < b) ? -1 : (a > b);
With gcc -O2 -S, this compiles down to the following five instructions:
xorl    %edx, %edx
cmpl    %esi, %edi
movl    $-1, %eax
setg    %dl
cmovge  %edx, %eax
As a follow-up to Ambroz Bizjak's excellent companion answer, I was not convinced that his program tested the same assembly code what was posted above. And, when I was studying the compiler output more closely, I noticed that the compiler was not generating the same instructions as was posted in either of our answers. So, I took his test program, hand modified the assembly output to match what we posted, and compared the resulting times. It seems the two versions compare roughly identically.
./opt_cmp_branchless: 0m1.070s
./opt_cmp_branch:     0m1.037s
I am posting the assembly of each program in full so that others may attempt the same experiment, and confirm or contradict my observation.
The following is the version with the cmovge instruction ((a < b) ? -1 : (a > b)):
        .file   "cmp.c"
        .text
        .section        .rodata.str1.1,"aMS",@progbits,1
.LC0:
        .string "%d=0\n"
        .text
        .p2align 4,,15
.globl main
        .type   main, @function
main:
.LFB20:
        .cfi_startproc
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        pushq   %rbx
        .cfi_def_cfa_offset 24
        .cfi_offset 3, -24
        movl    $arr.2789, %ebx
        subq    $8, %rsp
        .cfi_def_cfa_offset 32
.L9:
        leaq    4(%rbx), %rbp
.L10:
        call    rand
        movb    %al, (%rbx)
        addq    $1, %rbx
        cmpq    %rbx, %rbp
        jne     .L10
        cmpq    $arr.2789+4096, %rbp
        jne     .L9
        xorl    %r8d, %r8d
        xorl    %esi, %esi
        orl     $-1, %edi
.L12:
        xorl    %ebp, %ebp
        .p2align 4,,10
        .p2align 3
.L18:
        movl    arr.2789(%rbp), %ecx
        xorl    %eax, %eax
        .p2align 4,,10
        .p2align 3
.L15:
        movl    arr.2789(%rax), %edx
        xorl    %ebx, %ebx
        cmpl    %ecx, %edx
        movl    $-1, %edx
        setg    %bl
        cmovge  %ebx, %edx
        addq    $4, %rax
        addl    %edx, %esi
        cmpq    $4096, %rax
        jne     .L15
        addq    $4, %rbp
        cmpq    $4096, %rbp
        jne     .L18
        addl    $1, %r8d
        cmpl    $500, %r8d
        jne     .L12
        movl    $.LC0, %edi
        xorl    %eax, %eax
        call    printf
        addq    $8, %rsp
        .cfi_def_cfa_offset 24
        xorl    %eax, %eax
        popq    %rbx
        .cfi_def_cfa_offset 16
        popq    %rbp
        .cfi_def_cfa_offset 8
        ret
        .cfi_endproc
.LFE20:
        .size   main, .-main
        .local  arr.2789
        .comm   arr.2789,4096,32
        .section        .note.GNU-stack,"",@progbits
The version below uses the branchless method ((a > b) - (a < b)):
        .file   "cmp.c"
        .text
        .section        .rodata.str1.1,"aMS",@progbits,1
.LC0:
        .string "%d=0\n"
        .text
        .p2align 4,,15
.globl main
        .type   main, @function
main:
.LFB20:
        .cfi_startproc
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        pushq   %rbx
        .cfi_def_cfa_offset 24
        .cfi_offset 3, -24
        movl    $arr.2789, %ebx
        subq    $8, %rsp
        .cfi_def_cfa_offset 32
.L9:
        leaq    4(%rbx), %rbp
.L10:
        call    rand
        movb    %al, (%rbx)
        addq    $1, %rbx
        cmpq    %rbx, %rbp
        jne     .L10
        cmpq    $arr.2789+4096, %rbp
        jne     .L9
        xorl    %r8d, %r8d
        xorl    %esi, %esi
.L19:
        movl    %ebp, %ebx
        xorl    %edi, %edi
        .p2align 4,,10
        .p2align 3
.L24:
        movl    %ebp, %ecx
        xorl    %eax, %eax
        jmp     .L22
        .p2align 4,,10
        .p2align 3
.L20:
        movl    arr.2789(%rax), %ecx
.L22:
        xorl    %edx, %edx
        cmpl    %ebx, %ecx
        setg    %cl
        setl    %dl
        movzbl  %cl, %ecx
        subl    %ecx, %edx
        addl    %edx, %esi
        addq    $4, %rax
        cmpq    $4096, %rax
        jne     .L20
        addq    $4, %rdi
        cmpq    $4096, %rdi
        je      .L21
        movl    arr.2789(%rdi), %ebx
        jmp     .L24
.L21:
        addl    $1, %r8d
        cmpl    $500, %r8d
        jne     .L19
        movl    $.LC0, %edi
        xorl    %eax, %eax
        call    printf
        addq    $8, %rsp
        .cfi_def_cfa_offset 24
        xorl    %eax, %eax
        popq    %rbx
        .cfi_def_cfa_offset 16
        popq    %rbp
        .cfi_def_cfa_offset 8
        ret
        .cfi_endproc
.LFE20:
        .size   main, .-main
        .local  arr.2789
        .comm   arr.2789,4096,32
        .section        .note.GNU-stack,"",@progbits