My code is like this:
auto t1 = std::chrono::steady_clock::now();
for (int t{0}; t < 100; ++t) {
vector<int> table(256, 0);
Mat im2 = cv::imread(impth, cv::ImreadModes::IMREAD_COLOR);
im2.forEach<cv::Vec3b>([&table](cv::Vec3b &pix, const int* pos) {
for (int i{0}; i < 3; ++i) ++table[pix[i]];
});
}
auto t2 = std::chrono::steady_clock::now();
cout << "time is: " << std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count() << endl;
auto t3 = std::chrono::steady_clock::now();
for (int t{0}; t < 100; ++t) {
vector<int> table(256, 0);
Mat im2 = cv::imread(impth, cv::ImreadModes::IMREAD_COLOR);
for (int r{0}; r < im2.rows; ++r) {
auto ptr = im2.ptr<uint8_t>(r);
for (int c{0}; c < im2.cols; ++c) {
for (int i{0}; i < 3; ++i) ++table[ptr[i]];
ptr += 3;
}
}
}
auto t4 = std::chrono::steady_clock::now();
cout << "time is: " << std::chrono::duration_cast<std::chrono::milliseconds>(t4 - t3).count() << endl;
Intuitively, I feel that foreach should work faster since it used multi-thread mechanism to do the work, but the result turns out that the foreach methods took 14759ms while the naive loop method took only 6791ms. What is the cause of this slower foreach method, and how could make it faster ?