My sequential java program is faster than than parallel version. Below is the code of my parallel version. Sequential version is exact same code without a Thread. I don't see that much of a process creation overhead, I am processing 2 million calculation in a thread and I have only 2 threads. But its sequential version is twice much faster than this parallel version.
In the program, I am trying to merge two sorted arrays into one large sorted array, concept from Merge Sort in parallel.
import java.util.Date;
public class MergePP {
    public static final int n = 2000000;
    public static void main(String[] args) {
        Integer[] X = new Integer[n];
        Integer[] Y = new Integer[n];
        Integer[] Z = new Integer[X.length + Y.length];
        /* input sorted lists X and Y */
        for (int i = 0; i < n; i++) {
            X[i] = i * 2;
        }
        for (int i = 0; i < n; i++) {
            Y[i] = 1 + i * 2;
        }
        Date t = new Date();
        Thread threadX = new Thread(() -> {
            int j;
            for (int i = 0; i < X.length; i++) {
                // find the position of x[i] and add to Z
                j = searchY(Y, X[i]);
                Z[i + j] = X[i];
            }
        });
        Thread threadY = new Thread(() -> {
            int m;
            for (int k = 0; k < Y.length; k++) {
                // find the position of Y[k] and add to Z
                m = searchX(X, Y[k]);
                Z[k + m] = Y[k];
            }
        });
        threadX.start();
        threadY.start();
        // wait for thread to complete
        try {
            threadX.join();
            threadY.join();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        Date s = new Date();
        System.out.print("Elapsed Time: ");
        System.out.println(s.getTime() - t.getTime());
    }
    private static Integer searchY(Integer[] arr, int k) {
        int startIndex = 0;
        int endIndex = arr.length - 1;
        while (startIndex <= endIndex) {
            int midIndex = (startIndex + endIndex) / 2;
            if (arr[midIndex] == k) {
                // treat it like a k > arr[midIndex]
                return midIndex + 1;
            } else if (k < arr[midIndex]) {
                endIndex = midIndex - 1;
            } else if (k > arr[midIndex]) {
                startIndex = midIndex + 1;
            }
        }
        return endIndex + 1;
    }
    private static Integer searchX(Integer[] arr, int k) {
        int startIndex = 0;
        int endIndex = arr.length - 1;
        while (startIndex <= endIndex) {
            int midIndex = (startIndex + endIndex) / 2;
            if (arr[midIndex] == k) {
                // treat it like a k < arr[midIndex]
                return midIndex - 1;
            } else if (k < arr[midIndex]) {
                endIndex = midIndex - 1;
            } else if (k > arr[midIndex]) {
                startIndex = midIndex + 1;
            }
        }
        return endIndex + 1;
    }
}
Note: I am using Mac Book Pro with quad core processor.
