I have 340,000 line file data , when I reading file with Python 3.5 timing is good but when I run it with Python 2.7 reading is very slow , don't have any clue what is going on here, here is code:
import codecs as cds
INPUT_DATA_DIR = 'some_tsv_file.tsv'
ENT = "entities"
def train_data_getter(input_dir=INPUT_DATA_DIR):
    file_h = cds.open(input_dir,  encoding='utf-8')
    data = file_h.read()
    file_h.close()
    sentences = data.split("\n\n")
    parsed_data = parser(sentences[0])
    return parsed_data
def parser(raw_data):
    words = [line for line in raw_data.split("\n")]
    temp_l = []
    temp_s = ""
    for word in words:
        token, ent = word.split('\t')
        temp_s += token
        temp_s += " "
        temp_l.append(ent)
    data = [(temp_s), {ENT: temp_l}]
    return data
Edit
Thanks to @PM 2Ring , problem was string concatenation inside a for loop but still reason for huge difference between Python2.7 and 3.5 is not clear for me.
 
    