The class A has a template member function A::runImpl. The function A::run calls a specialized implementation based on the value of A::m_case, which is set within the class constructor.
In my project, the run function is called very frequently. It will speedup over 5% if the branch within it can be eliminated. Is there any template usage can do this?
My project is compiled by GCC 7.3.1 with C++14.
#include <iostream>
#include <cstdlib>
#include <cassert>
using namespace std;
class A {
public:
  A (uint32_t * arr, size_t len) : m_case(0) {
    for (size_t i = 0; i < len; ++i) {
      m_case += arr[i];
    }
  }
  template <size_t> void runImpl() { assert(0); };
  void run();
private:
  size_t m_case;
};
template <>
inline void A::runImpl<0>() {
  cout << "Default execution path." << endl;
}
template <>
inline void A::runImpl<1>() {
  cout << "Optimized execution path 1." << endl;
}
template <>
inline void A::runImpl<2>() {
  cout << "Optimized execution path 2." << endl;
}
void A::run() {
  switch (m_case) {
    case 1:
      runImpl<1>();
      break;
    case 2:
      runImpl<2>();
      break;
    default:
      runImpl<0>();
      break;
  }
}
int main() {
  uint32_t arr[] = {1, 1};
  A a(arr, 2);
  a.run();
  return 0;
}
 
     
    