It's my first real Python script, so feel free to make comments in order to improve my code. The purpose of this script is to extract 2 Oracle tables with Python, store them in a dataframe and then join them with pandas.
But for queries returning more than 500k lines I feel that it is slow. Do you know why?
import pandas as pd
from datetime import date
from sqlalchemy import create_engine
import cx_Oracle, time
import pandas as pd
import config
## Variable pour le timer
start = time.time()
## User input en ligne de commande
year = input('Saisir une annee : ')
month = input('Saisir le mois, au fomat MM : ')
societe_var  = input('SA (APPLE,PEACH,BANANA,ANANAS,ALL) : ')
## SA + BU correspondantes aux SA
sa_list = ['APPLE','PEACH','BANANA','ANANAS']
bu_list_MERE = ['006111','1311402','1311403','1311404','1340115','13411106','1311407','1111','6115910','1166157','6811207','8311345','1111','1188100','8118101','8811102','8810113','8811104','8118105','8811106','8811107','8118108','1111']
bu_list_GARE = ['131400','310254']
bu_list_VOYA = ['0151100','1110073','1007115','1311335','1113340','1311341','1113342','1331143']
bu_list_RESO = ['1211345','13111345','11113395','73111345']
#Permet de pointre vers la bonne liste en fonction de la SA saisie
bu_list_map = {
    'APPLE': bu_list_APPLE,
    'PEACH': bu_list_PEACH,
    'BANANA': bu_list_BANANA,
    'ANANAS' : bu_list_ANANAS
}
if societe_var == 'ALL' :
    print('non codé pour le moment')
        
elif societe_var in sa_list :
    bu_list = bu_list_map.get(societe_var)
    sa_var = societe_var
    
    i=1
    for bu in bu_list :
        start_bu = time.time()
        ## On vient ici charger la requête SQL avec les bonnes variables pour gla_va_parametre -- EPOST
        query1 = open('gla_va_parametre - VAR.sql',"r").read()
        query1 = query1.replace('@ANNEE',"'" + year + "'").replace('%MOIS%',"'" + month + "'").replace('%SA%',"'" + societe_var + "'").replace('%BUGL%',"'" + bu + "'").replace('%DIVISION%','"C__en__PS_S1_D_OP_UNIT13".OPERATING_UNIT')
        ## On vient ici charger la requête SQL avec les bonnes variables pour cle-gla_tva -- FPOST
        query2 = open('cle-gla_tva - VAR.sql',"r").read()
        query2 = query2.replace('@ANNEE',"'" + year + "'").replace('%MOIS%',"'" + month + "'").replace('%SA%',"'" + societe_var + "'").replace('%BUGL%',"'" + bu + "'").replace('%DIVISION%','OPERATING_UNIT')
        # Param de connexion
        connection_EPOST = cx_Oracle.connect(user=config.user_EPOST, password=config.password_EPOST, dsn=config.host_EPOST, )
        connection_FPOST = cx_Oracle.connect(user=config.user_FPOST, password=config.password_FPOST, dsn=config.host_FPOST, )
        ## Récup partie EPOST
        with connection_EPOST :
            # On déclare une variable liste vide
            dfl = []  
            # On déclare un DataFrame vide
            dfs = pd.DataFrame()  
            z=1
            # Start Chunking
            for chunk in pd.read_sql(query1, con=connection_EPOST,chunksize=25000) :
                # Start Appending Data Chunks from SQL Result set into List
                dfl.append(chunk)
                print('chunk num : ' + str(z))
                z = z + 1
            # Start appending data from list to dataframe
            dfs = pd.concat(dfl, ignore_index=True)
            print('param récupéré')
            
        ## Récup partie FPOST
        with connection_FPOST :
            # On déclare une variable liste vide
            df2 = []  
            # On déclare un DataFrame vide
            dfs2 = pd.DataFrame()  
            # Start Chunking
            for chunk in pd.read_sql(query2, con=connection_FPOST,chunksize=10000) :
                # Start Appending Data Chunks from SQL Result set into List
                df2.append(chunk)
            # Start appending data from list to dataframe
            dfs2 = pd.concat(df2, ignore_index=True)
            print('clé récupéré')
        print('Début de la jointure')
        jointure = pd.merge(dfs,dfs2,how='left',left_on=['Code_BU_GL','Code_division','Code_ecriture','Date_comptable','Code_ligne_ecriture','UNPOST_SEQ'],right_on=['BUSINESS_UNIT','OPERATING_UNIT','JOURNAL_ID','JOURNAL_DATE','JOURNAL_LINE','UNPOST_SEQ']).drop(columns= ['BUSINESS_UNIT','OPERATING_UNIT','JOURNAL_ID','JOURNAL_DATE','JOURNAL_LINE'])
        jointure.to_csv('out\gla_va_'+year+month+"_"+societe_var+"_"+bu+"_"+date.today().strftime("%Y%m%d")+'.csv', index=False, sep='|')
        print('Fichier ' + str(i) + "/" + str(len(bu_list)) + ' généré en : '+ str(time.time() - start_bu)+' secondes')
        i = i + 1 
print("L'extraction du périmètre de la SA " + societe_var + " s'est effectué en :" + str((time.time() - start)/60) + " min" )
