I am currently trying to translate a Python image detection algorithm in C++ (in order to use it in a smartphone app) using OpenCV. I have similar results to a certain point, where the two algorithms seem to diverge due to the accumulation of small differences in the images: I am applying some transformations to the image, but the number of decimal digits in Python is much larger than in C++ (where I use CV_64FC1 matrices), so I get more and more different results after some iterations...
Do you have any idea on how to overcome this issue?
Thanks in advance :)
EDIT: here is a code that gives a different result in Python and C++
void conv2(const Mat &img, const Mat& kernel, Mat& dest) {
  Mat source = img;
  Point anchor(kernel.cols - kernel.cols/2 - 1, kernel.rows - kernel.rows/2 - 1);
  flip(kernel, kernel, 0);
  filter2D(source, dest, -1, kernel, anchor);
}
void myFunc() {
  Mat im = imread("3.png", IMREAD_GRAYSCALE);
  im.convertTo(im, CV_64F);
  int rows = im.rows;
  int cols = im.cols;
  int sze = 7;
  Mat gauss = getGaussianKernel(sze, 1);
  Mat f = gauss * gauss.t();
  Mat fx, fy;
  Sobel(f, fx, -1, 1, 0);
  Sobel(f, fy, -1, 0, 1);
  Mat Gx, Gy, Gxx, Gyy, Gxy;
  filter2D(im, Gx, -1, fx);
  filter2D(im, Gy, -1, fy);
  pow(Gx, 2, Gxx);
  pow(Gy, 2, Gyy);
  Gxy = Gx.mul(Gy);
  gauss = getGaussianKernel(sze, 38);
  f = gauss * gauss.t();
  conv2(Gxx, f, Gxx);
  conv2(Gyy, f, Gyy);
  conv2(Gxy, f, Gxy);
  Gxy *= 2;
  Mat Gxx_minus_Gyy = Gxx - Gyy;
  Mat Gxy_squared, Gxx_minus_Gyy_squared;
  pow(Gxy, 2, Gxy_squared);
  pow(Gxx_minus_Gyy, 2, Gxx_minus_Gyy_squared);
  Mat denom;
  sqrt(Gxy_squared + Gxx_minus_Gyy_squared, denom);
  // denom += numeric_limits<double>::epsilon();
  Mat sin2theta = Gxy / denom;
  cout.precision(dbl::max_digits10);
  cout << fixed << sum(sin2theta) << endl;
  exit(1);
}
And Python:
def conv2(img, kernel):
    source = img
    kernel = cv2.flip(kernel, 0)
    rows, cols = kernel.shape
    anchor = (int(cols - cols/2 - 1), int(rows - rows/2 - 1))
    return cv2.filter2D(source, -1, kernel, anchor=anchor)
def myFunc():
    im = cv2.imread('3.png', cv2.IMREAD_GRAYSCALE)
    im = np.float64(im)
    rows, cols = im.shape;
    sze = 7
    gauss = cv2.getGaussianKernel(sze, 1);
    f = gauss * gauss.T;
    fx = cv2.Sobel(f, -1, 1, 0)
    fy = cv2.Sobel(f, -1, 0, 1)
    Gx = cv2.filter2D(im, -1, fx)
    Gy = cv2.filter2D(im, -1, fy)
    Gxx = cv2.pow(Gx,2);
    Gyy = cv2.pow(Gy,2);
    Gxy = cv2.multiply(Gx, Gy);
    gauss = cv2.getGaussianKernel(sze, 38);
    f = gauss * gauss.T;
    Gxx = conv2(Gxx, f)
    Gyy = conv2(Gyy, f)
    Gxy = 2*conv2(Gxy, f)
    Gxx_minus_Gyy = Gxx - Gyy
    Gxy_squared = cv2.pow(Gxy, 2)
    Gxx_minus_Gyy_squared = cv2.pow(Gxx_minus_Gyy, 2)
    denom = cv2.sqrt(Gxy_squared + Gxx_minus_Gyy_squared)
    # denom += np.finfo(float).eps;
    sin2theta = Gxy/denom
    print(cv2.sumElems(sin2theta))
    exit()
The call to myFunc() prints the sum of the elements in the "sin2theta" matrix: Python gives 86587.44928456949, and C++ gives 86825.05505451805947814, which starts to make a pretty important difference.
