Considering the following example. (CPU: Intel(R) Core(TM) i7-4790, gcc (GCC) 4.8.5 20150623)
// file test.c
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main()
{
    srand(time(0));
    float i = rand();
    if (i < 0.042) {
        printf("i %f\n", i);
    }
}
compile the source code with
gcc test.c -O0 -g3
show the disassemble code with
objdump -S a.out
result present here
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main()
{
  4005fd:   55                      push   %rbp
  4005fe:   48 89 e5                mov    %rsp,%rbp
  400601:   48 83 ec 10             sub    $0x10,%rsp
    srand(time(0));
  400605:   bf 00 00 00 00          mov    $0x0,%edi
  40060a:   e8 e1 fe ff ff          callq  4004f0 <time@plt>
  40060f:   89 c7                   mov    %eax,%edi
  400611:   e8 ba fe ff ff          callq  4004d0 <srand@plt>
    float i = (float)rand() / (float)(RAND_MAX);
  400616:   e8 e5 fe ff ff          callq  400500 <rand@plt>
  40061b:   f3 0f 2a c0             cvtsi2ss %eax,%xmm0
  40061f:   f3 0f 10 0d d1 00 00    movss  0xd1(%rip),%xmm1        # 4006f8 <__dso_handle+0x10>
  400626:   00 
  400627:   f3 0f 5e c1             divss  %xmm1,%xmm0
  40062b:   f3 0f 11 45 fc          movss  %xmm0,-0x4(%rbp)
    if (i < 0.042) {
  400630:   f3 0f 10 45 fc          movss  -0x4(%rbp),%xmm0
  400635:   0f 5a c0                cvtps2pd %xmm0,%xmm0
  400638:   f2 0f 10 0d c0 00 00    movsd  0xc0(%rip),%xmm1        # 400700 <__dso_handle+0x18>
  40063f:   00 
  400640:   66 0f 2e c8             ucomisd %xmm0,%xmm1
  400644:   76 17                   jbe    40065d <main+0x60>
        printf("i %f\n", i);
  400646:   f3 0f 10 45 fc          movss  -0x4(%rbp),%xmm0
  40064b:   0f 5a c0                cvtps2pd %xmm0,%xmm0
  40064e:   bf f0 06 40 00          mov    $0x4006f0,%edi
  400653:   b8 01 00 00 00          mov    $0x1,%eax
  400658:   e8 53 fe ff ff          callq  4004b0 <printf@plt>
    }
}
I found that it read the 0.042 from a double-precision value and then convert the "i" as a double-precision value and then make a comparison. Why didn't it just load a single-precision(0.042) value and do the comparison? (which I think may result in better performance), and if I want to force it to do that, what should I do?
 
     
    