I'm currently writing a script in python that takes a number of flags. This is my first attempt at such a program, and I am getting an output from the bash script that I don't quite understand. For example when I run the script in the bash shell:
$ python my_script.py -f <input_file.txt> -k <test_string> -c <user_input>
I get this output before my script's output:
usage: rm [-f | -i] [-dPRrvW] file ...
       unlink file
I can't seem to get rid of this, which is frustrating for the prettiness of the output. Any help would be great!
The code I'm using:
import sys, getopt, re, subprocess, collections, itertools
def find_kmers( arguments=sys.argv[1:] ):
    required_opts = ['-f','-c','-k']
    opts, args = getopt.getopt(arguments,'f:k:c:')
    opt_dic = dict(opts)
    for opt in required_opts:
        if opt not in opt_dic:
            return "incorrect arguments, please format as: python_script.py -f <filename> -k <kmer> -c <chromosome_name>"
    def rev_comp(sequence):
        reversed_dic = {'A':'T','T':'A','C':'G','G':'C'}
        return ''.join(reversed_dic[_] for _ in sequence[::-1])
    kmer = opt_dic['-k']
    subprocess.call(['bash','-c',"grep '>' S288C_R64.fasta > grep.tmp"])
    chromosomes = [_[1:].strip() for _ in open('grep.tmp')]
    subprocess.call(['bash','-c','rm','grep.tmp'])
    found = False
    if any(opt_dic['-c']==_ for _ in chromosomes):
        found = True
    def get_sequence(file):
        sequence = ''
        for line in file:
            if line.startswith('>'): break
            sequence += line.strip()
        return sequence.upper()
    ofile = open(opt_dic['-f'])
    if found == True:
        for line in ofile:
            if line.startswith('>'):
                if line[1:].strip() == opt_dic['-c']:
                    sequence = get_sequence(ofile)
                    break
    else:
        return 'chromosome not found in %s. \n chromosomes in file are:%s'%(opt_dic['-f'],', '.join(str(_) for _ in chromosomes))
    kmer_matches1 = re.finditer('(?=%s)'%opt_dic['-k'],sequence)
    kmer_matches2 = re.finditer('(?=%s)'%opt_dic['-k'],rev_comp(sequence))
    def print_statement(start,strand):
        return '%s\thw1_script\tkmer=%s\t%s\t%s\t.\t%s\t.\tID=S288C;Name=S288C\n'%(opt_dic['-c'],opt_dic['-k'],start,start+len(opt_dic['-k'])-1,strand)
    pos_strand = collections.deque()
    neg_strand = collections.deque()
    for match1,match2 in itertools.izip(kmer_matches1,kmer_matches2):
        pos_strand.append(match1.start()+1)
        neg_strand.append(match2.start()+1)
    wfile = open('answer.gff3','w')
    while len(pos_strand)>0 and len(neg_strand)>0:
        if pos_strand[0]<neg_strand[0]:
            start = pos_strand.popleft()
            wfile.write(print_statement(start,'+'))
        else:
            start = neg_strand.popleft()
            wfile.write(print_statement(start,'-'))
    while len(pos_strand)>0:
        start = pos_strand.popleft()
        wfile.write(print_statement(start,'+'))
    while len(neg_strand)>0:
        start = neg_strand.popleft()
        wfile.write(print_statement(start,'-'))
    wfile.close()
    return 'percent-GC = %s'%str(sum(sequence.count(gc) for gc in ["G","C"])/float(len(sequence)))
if __name__ == '__main__':
    print find_kmers()
 
     
    