Here's an answer comparing some of the possible methods with 2 differents datasets, one will consist of many little arrays, the other one will be few large arrays:
import timeit
    import random
    from itertools import chain
def f1(a, b):
    return list(chain.from_iterable(zip(a, b)))
def f2(a, b):
    return list(sum(zip(a, b), ()))
def f3(a, b):
    result = []
    for (e1, e2) in zip(a, b):
        result += [e1, e2]
    return result
def f4(a, b):
    result = []
    len_result = min(len(a), len(b))
    result = []
    i = 0
    while i < len_result:
        result.append(a[i])
        result.append(b[i])
        i += 1
    return result
# Small benchmark
N = 5000000
a_small = ['a', 'b', 'c', 'd']
b_small = ['e', 'f', 'g', 'h']
benchmark1 = [
    timeit.timeit(
        'f1(a_small, b_small)', setup='from __main__ import f1, a_small,b_small', number=N),
    timeit.timeit(
        'f2(a_small, b_small)', setup='from __main__ import f2, a_small,b_small', number=N),
    timeit.timeit(
        'f3(a_small, b_small)', setup='from __main__ import f3, a_small,b_small', number=N),
    timeit.timeit(
        'f4(a_small, b_small)', setup='from __main__ import f4, a_small,b_small', number=N)
]
for index, value in enumerate(benchmark1):
    print " - Small sample with {0} elements -> f{1}={2}".format(len(a_small), index + 1, value)
# Large benchmark
N = 5000
K = 100000
P = 1000
a_large = random.sample(range(K), P)
b_large = random.sample(range(K), P)
benchmark2 = [
    timeit.timeit(
        'f1(a_large, b_large)', setup='from __main__ import f1, a_large,b_large', number=N),
    timeit.timeit(
        'f2(a_large, b_large)', setup='from __main__ import f2, a_large,b_large', number=N),
    timeit.timeit(
        'f3(a_large, b_large)', setup='from __main__ import f3, a_large,b_large', number=N),
    timeit.timeit(
        'f4(a_large, b_large)', setup='from __main__ import f4, a_large,b_large', number=N)
]
for index, value in enumerate(benchmark2):
    print " - Large sample with {0} elements -> f{1}={2}".format(K, index + 1, value)
- Small sample with 4 elements -> f1=7.50175959666
- Small sample with 4 elements -> f2=5.52386084127
- Small sample with 4 elements -> f3=7.12457549607
- Small sample with 4 elements -> f4=7.24530968309
- Large sample with 100000 elements -> f1=0.512278885906
- Large sample with 100000 elements -> f2=28.0679210232
- Large sample with 100000 elements -> f3=1.05977378475
- Large sample with 100000 elements -> f4=1.17144886156
Conclusion: It seems f2 function is the slightly faster method when N is big and the lists are litte. When the arrays are large and the number is little, f1 is the winner though.
Specs: Python2.7.11(64) , N=5000000 on a i-7 2.6Ghz