I need to make my script usable for UNIX commands, in order to make it possible for example to process 2 input files at once with my script. This script works perfectly well with command line arguments:
newlist = []
def f1()
 .... 
  return places 
  return persons
  return unknown
def f2(input_file):
  volume_id = sys.argv[3]   
  for line in input_data:  
     if any(place+'</dfn>' in line.decode('utf-8') for place in places):
         line = line.replace('"person"', '"place"')
         line = line.replace('id="', 'id="'+volume_id)
     elif any(unk+'</dfn>' in line.decode('utf-8') for unk in unknown):
         line = line.replace('"person"', '"undefined"')
         line = line.replace('id="', 'id="'+volume_id)
     elif 'class="person"' in line.decode('utf-8') and '<dfn' not in line:
         line = line.replace('class="person"', '')
         line = line.replace('id="', 'id="'+volume_id)
     elif 'id="' in line:
         line = line.replace('id="', 'id="'+volume_id)
     newlist.append(line)
  return  newlist                
def main():
   if len(sys.argv) < 4:
     print 'usage: ./myscript.py [file_in... file_out... volume_id]'
     sys.exit(1)
   else:
    filename = sys.argv[1]
    filename_out = sys.argv[2]
    tree = etree.parse(filename)
    extract(tree)
    input_file = open(filename, 'rU')
    change_class(input_file)
    file_new = open(filename_out, 'w')
    for x in newlist:
      if '\n' in x:                   
         x = x.replace('\n', '')                
      print>>file_new, x
When I tried to add stdin stdout to it, I first had a problem with reading the same input file first, and for this reason made some chages so that it would be actually open only once. I modified the following:
  def f2(input_data) #instead of input_file
and I modified main():
        filename = sys.argv[1]
        filename_out = sys.argv[2]
        if filename == '-':
           input_file = sys.stdin
        else:
            input_file = open(filename, 'rU')
        if filename_out == '-':
            filename_out = sys.stdout
            file_new = filename_out
        else:
            file_new = open(filename_out, 'w')
        input_data = input_file.read()
        tree = etree.fromstring(input_data)
        extract(tree)
        change_class(input_data)
        for x in newlist:
            if '\n' in x:                   
               x = x.replace('\n', '')                
            print>>file_new, x
I run the program from the command line: ./myscript.py - - volumeid < inputfile > outputfile
And now I get an encoding problem:
Traceback (most recent call last):
  File "./exportXMLstd.py", line 192, in <module>
    main()
  File "./exportXMLstd.py", line 182, in main
    change_class(input_data)
  File "./exportXMLstd.py", line 135, in change_class
    if any(place+'</dfn>' in line.decode('utf-8') for place in places):
  File "./exportXMLstd.py", line 135, in <genexpr>
    if any(place+'</dfn>' in line.decode('utf-8') for place in places):
  File "/usr/lib/python2.7/encodings/utf_8.py", line 16, in decode
    return codecs.utf_8_decode(input, errors, True)
UnicodeDecodeError: 'utf8' codec can't decode byte 0xe2 in position 0: unexpected end of data
What I am doing wrong?
 
    