I am trying to match two string with position of the string using set but not able to fix it.
In this function I am trying to allow two miss match of the x3 = 'NKXD' in the position of N can be T or A or G and in the position of K can be L and in the position of D can be E
I have define spl_amino but not sure its working can anyone plz suggest how to fix this problem?
Edited
Script
import csv
def match(X,Y):
    mismatch = 0
    for x,y in zip(X,Y):
            #print x, y
            if not (x == 'X' or x == y):
                    mismatch += 1
                    if mismatch > 1:
                        return False
    return True
def g4_match(X,Y):
    spl_amino = set('T','A','G','L','E')
    mismatch, spl_mismatch = 0, 0
    for x,y in zip(X,Y):
            print x, y
            if not (x == 'X' and y == x):
                    if (y in spl_amino):
                           spl_mismatch += 1
                    else:
                           mismatch += 1
            if mismatch > 1 or spl_mismatch > 1:
                        return False
    return True
def mean(arr):
    return (max(arr) + min(arr))/2
def H(protein,x1,x2,x3,x4, protein_name, pdb_id, source):
    def find_matches(x, g4_match):
        match_positions = []
        matches         = []
        for i in range(len(protein) - len(x)):
            candidate = protein[i : i + len(x)]
            if match(x, candidate):
                match_positions.append(i)
                matches        .append(candidate)
        return matches, match_positions
    L1, pL1 = find_matches(x1, match)
    L2, pL2 = find_matches(x2, match)
    L3, pL3 = find_matches(x3, g4_match)
    L4, pL4 = find_matches(x4, match)
    candidates = []
    for a in zip(pL1, L1):
        for b in zip(pL2, L2):
            for c in zip(pL3, L3):
                for d in zip(pL4, L4):
                    if (40 <= b[0] - a[0] <= 80 and
                        40 <= c[0] - b[0] <= 80 and
                        20 <= d[0] - c[0] <= 80    ):
                        #print(a,b,c,d)
                        candidates.append((a,b,c,d))
                    elif (80 <= b[0] - a[0] <= 120 and
                          40 <= c[0] - b[0] <= 80 and 
                          120 <= d[0] - c[0] <= 180 ):
                        #print(a,b,c,d)
                        candidates.append((a,b,c,d))
    with open('output_test.csv', 'a') as myfile:
        wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
        wr.writerow([pdb_id])
        wr.writerow([protein_name])
        wr.writerow([protein])
        wr.writerow([source])
        for i in candidates: 
            wr.writerow([i])
    return candidates
x1 = 'GXXXXGK'
x2 = 'DXXG'
x3 = 'NKXD'
x4 = 'EXSAX'
with open('input_file_1.csv') as infile:
    lines = csv.reader(infile, delimiter=',', skipinitialspace=True, dialect=csv.excel_tab)
    next(lines) # skip header
    for line in lines:
        protein = line[2]
    protein_name = line[1]
        pdb_id = line[0]
        source = line[3]
        H(protein,x1,x2,x3,x4, protein_name, pdb_id, source)
Input Like that in csv file
S No.   PDB ID  Protein Name    Sequence    Source
1   121P    H-RAS P21 PROTEIN   MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDLAARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQH  Homo sapiens
2   1A12    REGULATOR OF CHROMOSOME CONDENSATION 1  RRSPPADAIPKSKKVKVSHRSHSTEPGLVLTLGQGDVGQLGLGENVMERKKPALVSIPEDVVQAEAGGMHTVCLSKSGQVYSFGCNDEGALGRDTSVEGSEMVPGKVELQEKVVQVSAGDSHTAALTDDGRVFLWGSFRDNNGVIGLLEPMKKSMVPVQVQLDVPVVKVASGNDHLVMLTADGDLYTLGCGEQGQLGRVPELFANRGGRQGLERLLVPKCVMLKSRGSRGHVRFQDAFCGAYFTFAISHEGHVYGFGLSNYHQLGTPGTESCFIPQNLTSFKNSTKSWVGFSGGQHHTVCMDSEGKAYSLGRAEYGRLGLGEGAEEKSIPTLISRLPAVSSVACGASVGYAVTKDGRVFAWGMGTNYQLGTGQDEDAWSPVEMMGKQLENRVVLSVSSGGQHTVLLVKDKEQS   Homo sapiens
