I have to process a large XML document for which I have several data cleaning and manipulating task to do.
The basic code below is using the xml.etree.ElementTree.
As the file is very large (about 2Gb) i would like to be able to print the value of my tagCounts accumulator variable on a regular basis.
What is the cleanest way to implement a timer using ElementTree printing every 3 minutes the content of self.tagCounts?
Thanks
import xml.etree.ElementTree as ET
import pprint
class TagCounter:
    def __init__(self):
        self.tagCounts = {}
    def start(self, tag, attrib):
        if tag in self.tagCounts:
            self.tagCounts[tag] += 1
        else:
            self.tagCounts[tag] = 1        
    def end(self, tag):
        pass
    def data(self, data):
        pass
    def close(self):
        return self.tagCounts
def count_tags(filename):
    parser = ET.XMLParser(target = TagCounter())
    with open(filename, mode='r') as f:
        for line in f:
            parser.feed(line)
    t = parser.close()
    return t
if __name__ == "__main__":
    tags = count_tags("file.osm")
    pprint.pprint(tags)