I always believed that the function below foo2 is faster than foo3 untile after a test.
All code below:
#include <iostream>
#include <boost/timer.hpp>
#include <boost/lexical_cast.hpp>
#include <stdint.h>
struct session {
bool operator==(const session& r) const;
uint8_t proto;
uint16_t sport;
uint16_t dport;
uint32_t sip;
uint32_t dip;
};
bool session::operator==(const session& r) const {
return proto == r.proto && sport == r.sport && dport == r.dport
&& sip == r.sip && dip == r.dip;
}
// my L1,L2,L3 total cache size is 16MB, so set it 32MB to overflow all 16MB caches.
static const int SIZE = 32 * 1024 * 1024 / sizeof(session);
int sum;
void foo1(session* p) {
session s = {1, 2, 3, 4, 5};
for (int i = 0; i < SIZE; i++)
if (p[i] == s)
sum++;
}
void foo2(session* p) {
session s = {1, 2, 3, 4, 5};
int n = SIZE - SIZE % 4;
int i;
for (i = 0; i < n; i += 4) {
if (p[i + 0] == s)
sum++;
if (p[i + 1] == s)
sum++;
if (p[i + 2] == s)
sum++;
if (p[i + 3] == s)
sum++;
}
/*
for (; i < SIZE; i++)
if (p[i] == s)
sum++;
*/
}
void foo3(session* p) {
session s = {1, 2, 3, 4, 5};
int n = SIZE - SIZE % 4;
int i;
for (i = 0; i < n; i += 4) {
if (p[i + 0] == s)
sum++;
else if (p[i + 1] == s)
sum++;
else if (p[i + 2] == s)
sum++;
else if (p[i + 3] == s)
sum++;
}
/*
for (; i < SIZE; i++)
if (p[i] == s)
sum++;
*/
}
int main(int argc, char* argv[]) {
if (argc < 2)
return -1;
int n = boost::lexical_cast<int>(argv[1]);
session* p = new session[SIZE];
boost::timer t;
for (int i = 0; i < n; i++)
foo1(p);
std::cout << t.elapsed() << std::endl;
t.restart();
for (int i = 0; i < n; i++)
foo2(p);
std::cout << t.elapsed() << std::endl;
t.restart();
for (int i = 0; i < n; i++)
foo3(p);
std::cout << t.elapsed() << std::endl;
delete [] p;
return 0;
}
test 1000 times, ./a.out 1000
output:
4.36
3.98
3.96
My machine:
CPU: Intel(R) Xeon(R) CPU E5-2420 0 @ 1.90GHz
Caches:
L1d cache: 32K
L1i cache: 32K
L2 cache: 256K
L3 cache: 15360K
In the test, foo2 and foo3 have equivalence performance. Due to foo2 could case
CPU execut all unrolled expressions in parallel, so foo3 is the same. It that right? If so, the else if syntax is violate the C/C++ basic else if semantics.
Is there someone explain it? Thanks a lot.
Update
My compiler is gcc 4.4.6 ins RedHat
g++ -Wall -O2 a.cpp