Hello so I am trying to filter the bad words from this list, I have for this script usually list of 5 to 10 million line of words, I tried threading to make it fast but after the first 20k word it gets slower and slower why is that, will it be faster if I use Multiprocessing instead ? I run this script on Ubuntu with 48 CPU core and 200GB RAM
from tqdm import tqdm
import queue
import threading
a=input("The List: ")+".txt"
thr=input('Threads: ')
c=input("clear old[y]: ")
inputQueue = queue.Queue()
if c == 'y' or c == 'Y':#clean
    if c =="y":
        open("goodWord.txt",'w').close()
s = ["bad_word"]#bad words list
class myclass:
    def dem(self,my_word):
        for key in s:
            if key in my_word:
                return 1
        return 0
    def chk(self):
        while 1:
            old = open("goodWord.txt","r",encoding='utf-8',errors='ignore').readlines()
            y = inputQueue.get()
            if my_word not in old:
                rez = self.dem(my_word)
                if rez == 0:
                    sav = open("goodWord.txt","a+")
                    sav.write(my_word+"\n")
                    sav.close()
                    self.pbar.update(1)
                else :
                    self.pbar.update(1)
            inputQueue.task_done()
    def run_thread(self):
        for y in tqdm(open(a, 'r',encoding='utf-8', errors='ignore').readlines()):
            inputQueue.put(y)
        tqdm.write("All in the Queue")
        self.pbar = tqdm(total=inputQueue.qsize(),unit_divisor=1000)
        for x in range(int(thr)):
            t = threading.Thread(target=self.chk)
            t.setDaemon(True)
            t.start()
        inputQueue.join()
try:
    open("goodWord.txt","a")
except:
    open("goodWord.txt","w")
old = open("goodWord.txt","r",encoding='utf-8',errors='ignore').readlines()
myclass=myclass()
omyclass.run_thread()
 
    