I am learning how to work with Python's multiprocessing, and the following are virtually identical block of codes, one is written in single threaded, single process, and the other is multiprocessing, the speed of processing for multi-processing is so much worse than single one,
both working on the same 38910 records of gettext PO file, with english and Vietnamese translations
- single time:
# Execution took: 4.229461978015024 second(s)
and
- multi-processing took:
# Execution took: 35.94734842295293 second(s).
nearly 9 times worse off
here is the code:
#!/usr/bin/env python3
import re
from matcher import MatcherRecord
import os
from multiprocessing import Pool
from multiprocessing.managers import BaseManager
from babel.messages import Message
from sphinx_intl import catalog as c
from collections import OrderedDict
from translation_finder import TranslationFinder
from pattern_utils import PatternUtils as pu
from definition import Definitions as df
import time
class PatternFoundResult:
    def __init__(self):
        self.found_dict: OrderedDict = None
    def updateFoundResult(self, found_dict: OrderedDict):
        self.found_dict = found_dict
        is_found = len(found_dict) > 0
        if not is_found:
            return
        print(found_dict)
def task001(arg):
    def genericAbbrevFormat(orig_txt: str, trans_txt: str, is_reverse=False):
        def replaceAbbrev(entry):
            loc = entry[0]
            mm: MatcherRecord = entry[1]
            sub_list = mm.getSubEntriesAsList()
            (abbrev_loc, abbrev_txt) = sub_list[1]
            changing_txt = replaceAbbrev.txt
            try:
                (exp_loc, exp_txt) = sub_list[3]
            except Exception as e:
                (exp_loc, exp_txt) = sub_list[2]
            abbrev_txt = f'[{abbrev_txt} - {exp_txt}]'
            changed_txt = pu.jointText(changing_txt, abbrev_txt, loc)
            replaceAbbrev.txt = changed_txt
            return True
        abbrev_dict = pu.patternMatchAll(df.ABBR_WITH_PRE_POST_QUOTES, trans_txt, is_reversed=True)
        has_abbrev_embedded = (len(abbrev_dict) > 0)
        if has_abbrev_embedded:
            replaceAbbrev.txt = trans_txt
            list(filter(replaceAbbrev, abbrev_dict.items()))
            trans_txt = replaceAbbrev.txt
        orig_txt = df.squareBracket(orig_txt)
        is_tran = (trans_txt is not None)
        if not is_tran:
            format_string = f':abbr:`{orig_txt} ()`'
            return format_string
        trans_txt = df.squareBracket(trans_txt)
        if is_reverse:
            format_string = f':abbr:`{orig_txt} ({trans_txt})`'
        else:
            format_string = f':abbr:`{trans_txt} ({orig_txt})`'
        return format_string
    def isGlossary(m: Message):
        check_string = 'manual/glossary/index'
        locations = m.locations
        is_glossary = False
        for loc in locations:
            is_glossary = (check_string in locations)
            if is_glossary:
                break
        return is_glossary
    def formatFoundEntry(entry):
        mm: MatcherRecord = None
        (loc, mm) = entry
        en_txt = mm.getComponent(2, 1)
        vn_txt = tf.isInDict(en_txt)
        has_tran = (vn_txt is not None)
        if has_tran:
            in_catalog = tf.isEnGoesFirst(en_txt)
            is_en_coming_first = (in_catalog or is_glossary)
            abbrev_txt = genericAbbrevFormat(en_txt, vn_txt, is_reverse=is_en_coming_first)
            front_filler = mm.getComponent(1, 1)
            back_filler = mm.getComponent(3, 1)
            ast_txt = f'{front_filler}{abbrev_txt}{back_filler}'
            return ast_txt
        else:
            return en_txt
    pat: re.Pattern = None
    m: Message = None
    tf: TranslationFinder = None
    (index, m, tf, pat, is_simple) = arg
    en_txt = m.id
    is_glossary = isGlossary(m)
    is_repeat = tf.isRepeat(en_txt)
    found_dict = pu.patternMatchAll(pat, en_txt)
    is_found = len(found_dict) > 0
    if is_found:
        result_string_list = list(map(formatFoundEntry, found_dict.items()))
    else:
        result_string_list = []
    return result_string_list
if __name__ == "__main__":
    time_start = time.perf_counter()
    is_debug = False
    home_dev = os.environ['DEV']
    input_path = os.path.join(home_dev, "current_blender_manual_merge_flat_0001.po")
    input_cat = c.load_po(input_path)
    BaseManager.register('TranslationFinder', TranslationFinder)
    manager = BaseManager()
    manager.start()
    tf = manager.TranslationFinder()
    pat:re.Pattern = df.QUOTEDTEXT_UNTRANSLATED_PATTERN
    is_simple = True
    result_handler = PatternFoundResult()
    with Pool() as pool:
        m: Message = None
        for (index, m) in enumerate(input_cat):
            arg = (index, m, tf, pat, is_simple)
            pool.apply_async(task001, args=[arg], callback=result_handler.updateFoundResult)
        pool.close()
        pool.join()
    print(f'Execution took: {time.perf_counter() - time_start} second(s).')
and here is the single threaded, the main section:
if __name__ == "__main__":
    time_start = time.perf_counter()
    is_debug = False
    home_dev = os.environ['DEV']
    input_path = os.path.join(home_dev, "current_blender_manual_merge_flat_0001.po")
    input_cat = c.load_po(input_path)
    result_handler = PatternFoundResult()
    tf = TranslationFinder()
    pat:re.Pattern = df.QUOTEDTEXT_UNTRANSLATED_PATTERN
    is_simple = True
    for (index, m) in enumerate(input_cat):
        arg = (index, m, tf, pat, is_simple)
        result_string_list = task001(arg)
        result_handler.updateFoundResult(result_string_list)
    print(f'Execution took: {time.perf_counter() - time_start} second(s) - records: {len(input_cat)}')
Could you please tell me how so, where did I go wrong?
