I'm learning to use Python's Multiprocessing package for embarrassingly parallel problems, so I wrote serial and parallel versions for determining the number of primes less than or equal to a natural number n. Based on what I read from a blog post and a Stack Overflow question, I came up with the following code:
Serial
import math
import time
def is_prime(start, end):
    """determine how many primes within given range"""
    numPrime = 0
    for n in range(start, end+1):
        isPrime = True
        for i in range(2, math.floor(math.sqrt(n))+1):
            if n % i == 0:
                isPrime = False
                break
        if isPrime:
            numPrime += 1
    if start == 1:
        numPrime -= 1  # since 1 is not prime
    return numPrime
if __name__ == "__main__":
    natNum = 0
    while natNum < 2:
        natNum = int(input('Enter a natural number greater than 1: '))
    startTime = time.time()
    finalResult = is_prime(1, natNum)
    print('Elapsed time:', time.time()-startTime, 'seconds')
    print('The number of primes <=', natNum, 'is', finalResult)
Parallel
import math
import multiprocessing as mp
import numpy
import time
def is_prime(vec, output):
    """determine how many primes in vector"""
    numPrime = 0
    for n in vec:
        isPrime = True
        for i in range(2, math.floor(math.sqrt(n))+1):
            if n % i == 0:
                isPrime = False
                break
        if isPrime:
            numPrime += 1
    if vec[0] == 1:
        numPrime -= 1  # since 1 is not prime
    output.put(numPrime)
def chunks(vec, n):
    """evenly divide list into n chunks"""
    for i in range(0, len(vec), n):
        yield vec[i:i+n]
if __name__ == "__main__":
    natNum = 0
    while natNum < 2:
        natNum = int(input('Enter a natural number greater than 1: '))
    numProc = 0
    while numProc < 1:
        numProc = int(input('Enter the number of desired parallel processes: '))
    startTime = time.time()
    numSplits = math.ceil(natNum/numProc)
    splitList = list(chunks(tuple(range(1, natNum+1)), numSplits))
    output = mp.Queue()
    processes = [mp.Process(target=is_prime, args=(splitList[jobID], output))
                 for jobID in range(numProc)]
    for p in processes:
        p.start()
    for p in processes:
        p.join()
    print('Elapsed time:', time.time()-startTime, 'seconds')
    procResults = [output.get() for p in processes]
    finalResult = numpy.sum(numpy.array(procResults))
    print('Results from each process:\n', procResults)
    print('The number of primes <=', natNum, 'is', finalResult)
Here is what I get for n=10000000 (for parallel I request 8 processes):
$ python serial_prime_test.py 
Enter a natural number greater than 1: 10000000
Elapsed time: 162.1960825920105 seconds
The number of primes <= 10000000 is 664579
$ python parallel_prime_test.py
Enter a natural number greater than 1: 10000000
Enter the number of desired parallel processes: 8
Elapsed time: 49.41204643249512 seconds
Results from each process:
[96469, 86603, 83645, 80303, 81796, 79445, 78589, 77729]
The number of primes <= 10000000 is 664579
So it looks like I can get a little over 3x speedup. Here are my questions:
- Clearly this is not linear speedup, so how much better can I do (or what speedup should I realistically expect)?
- It looks like Amdahl's Law answers this, but I don't know how to determine what fraction of my program is strictly serial.
Any help is appreciated.
Edit: There are 4 physical cores, capable of hyperthreading.
 
     
     
     
    