I have a simple concurrency script that uses Oracle for the main database. I'm listening into SQL with python-oracledb and have used individual threads to open up a database connection. However, I have recognised that this is very slow by comparison for larger tables. For example, for table with a table size of 1.2 million rows, the programme hangs and my IDE crashes. Whereas, with row sizes of 80,000, it completes in a few seconds. What is causing this overflow and how to correct this?
SQL =  'SELECT /*+ ENABLE_PARALLEL_DML PARALLEL(AUTO) */ * FROM USER_TABLE offset :rowoffset rows fetch next :maxrows rows only'
MAX_ROWS = 1200000
NUM_THREADS = 12
def start_workload(fn):
    def wrapped(self, threads, *args, **kwargs):
        assert isinstance(threads, int)
        assert threads > 0
        ts = []
        for i in range(threads):
            new_args = (self, i, *args)
            t = threading.Thread(target=fn, args=new_args, kwargs=kwargs)
            t.start()
            ts.append(t)
        for t in ts:
            t.join()
    return wrapped
import pandas as pd
class TEST:
    def __init__(self, batchsize, maxrows, *args):
        self._pool = oracledb.create_pool(user = args[0], password = args[1], port=1521,host="localhost", service_name="service_name", min=NUM_THREADS, max=NUM_THREADS)
        self._batchsize = batchsize
        self._maxrows = maxrows
        
    @start_workload
    def do_query(self, tn):
        with self._pool.acquire() as connection:
            with connection.cursor() as cursor:
                max_rows = self._maxrows
                row_iter = int(max_rows/self._batchsize)
                cursor.arraysize = 10000
                cursor.prefetchrows = 1000000
                cursor.execute(SQL, dict(rowoffset=(tn*row_iter), maxrows=row_iter))
                columns = [col[0] for col in cursor.description]
                cursor.rowfactory = lambda *args: dict(zip(columns, args))
                pd.DataFrame(cursor.fetchall()).to_csv(f'TH_{tn}_customer.csv')
if __name__ == '__main__':
    result = TEST(NUM_THREADS,MAX_ROWS,username, password)
    import time
    start=time.time()
    Make = result.do_query(NUM_THREADS)
    end=time.time()
    print('Total Time: %s' % (end-start))
    print(Make)
 
     
     
    