I'm trying to iterate through a lot of xml files that have ~1000 individual nodes that I want to iterate through to extract specific attributes (each node has 15 or so attributes, I only want one). In the end, there should be about 4 million rows. My code is below, but I have a feeling that it's not time efficient. What can I optimize about this?
import os, pandas as pd, xml.etree.ElementTree as xml
#init master df as accumulator of temp dfs
master_df = pd.DataFrame(
    columns = [
        'col1',
        'col2',
        'col3',
        'col4'
        ])
dir = 'C:\\somedir'
#iterate through files
for file in os.listdir(dir):
    #init xml handle and parse
    file = open(str(dir+"{}").format('\\'+file)
    parse = xml.parse(file)
    root = parse.getroot()
    
    #var assignments with desired data
    parent_node1 = str(root[0][0].get('pn1'))
    parent_node2 = str(root[0][1].get('pn2'))
    
    #resetting iteration dependent variables
    count = 0
    a_dict = {}
    
    #iterating through list of child nodes
    for i in list(root[1].iter())[1:]:
        child_node1 = str(i.get('cn1'))
        child_node2 = str(i.get('cn2'))
        a_dict.update({
            count: {
                "col1" : parent_node1,
                'col2': child_node1,
                "col3": parent_node2,
                "col4" : child_node2
                }})
        count = count+1
    temp_df = pd.DataFrame(a_dict).T
    master_df = pd.merge(
        left = master_df,
        right = temp_df,
        how = 'outer'
        )
