def JoinCtgs(smb, f1, f2, id1, id2, submat, abet): print 'JoinCtgs', f1, f2 bv = dynprog.FastSubMatrix(submat, abet, smb[f1][id1], smb[f2][id2]) scm, ar = dynprog.FastSW(bv, smb[f1][id1], smb[f2][id2]) t1, t2, vgap, hgap = SWBacktrace2(scm, ar, smb[f1][id1], smb[f2][id2]) GapCtgByList(smb[f1], hgap) GapCtgByList(smb[f2], vgap) for i in smb[f2].keys(): smb[f1][i] = smb[f2][i] del (smb[f2])
def InitAligns(ids, seqs, submat, abet): lines = [] N = len(seqs) for i in range(N): for j in range(i): if i != j: bv = dynprog.FastSubMatrix(submat, abet, seqs[i], seqs[j]) sc, ar = dynprog.FastSW(bv, seqs[i], seqs[j]) t1, t2, vs, hs = SWBacktrace2(sc, ar, seqs[i], seqs[j]) lines.append((ids[i], ids[j], t1, t2)) return lines
def AddToCtg(smb, fm, idm, idum, ilinei, submat, abet): print 'AddToCtg', idum, 'added to ', fm if ilinei[0] == idm: matseq = ilinei[2] # the seq that is also in the smb umatseq = ilinei[3] else: matseq = ilinei[3] # the seq that is also in the smb umatseq = ilinei[2] # align match seq with the same seq in smb bv = dynprog.FastSubMatrix(submat, abet, smb[fm][idm], matseq) scm, ar = dynprog.FastSW(bv, smb[fm][idm], matseq) t1, t2, vgap, hgap = SWBacktrace2(scm, ar, smb[fm][idm], matseq) # gap contig GapCtgByList(smb[fm], hgap) # create and add new sequence newseq = GapSeqByList(umatseq, vgap) newseq = LeadTrailGaps(newseq) smb[fm][idum] = newseq
st2 += '-' v -= 1 elif arrow[v, h] == 1: st1 += '-' st2 += seq2[h - 1] h -= 1 elif arrow[v, h] == 2: st1 += seq1[v - 1] st2 += seq2[h - 1] v -= 1 h -= 1 if v == 0 and h == 0: ok = 0 #reverse the strings st1 = st1[::-1] st2 = st2[::-1] return st1, st2 st1, st2 = dpg.Backtrace(arrow, s1, s2) print(st1) print(st2) import blosum sq1 = 'KMTIFFMILK' sq2 = 'NQTIFF' subvals = dpg.FastSubValues(blosum.BLOSUM50, blosum.PBET, sq1, sq2) scmat, arrow = dpg.FastSW(subvals, sq1, sq2) t1, t2 = dpg.SWBacktrace(scmat, arrow, sq1, sq2) print(subvals, scmat, arrow, t1, t2)
# Create a second string with the form YAYBY where # - Y is a different set of random letters # - each Y can have a different length. # Align the sequences using Smith-Waterman. # The scoring matrix will have two major maximum for the alignments of the A and B regions. # Modify the program to extract both alignments. import dynprog as dpg from blosum import BLOSUM50, PBET import random as rd a = ''.join(rd.choices(PBET, k=12)) b = ''.join(rd.choices(PBET, k=12)) Xstr = stringXAXBX(a, b) Ystr = stringXAXBX(a, b) subvals = dpg.FastSubValues(BLOSUM50, PBET, Xstr, Ystr) scmat, arrow = dpg.FastSW(subvals, Xstr, Ystr) t1, t2, = dpg.SWBacktrace(scmat, arrow, Xstr, Ystr) print(t1, t2) # import numpy as np # import random as rd # import string # N=12 # r1 = "abc-ABC" # r2 = "abd-ABd" # r3 = ''.join(rd.choices(string.ascii_uppercase + string.ascii_lowercase, k=N)) # r4 = ''.join(rd.choices(string.ascii_uppercase + string.ascii_lowercase, k=N)) # r5 = ''.join(rd.choices(string.ascii_uppercase + string.ascii_lowercase, k=N)) # r6 = ''.join(rd.choices(string.ascii_uppercase + string.ascii_lowercase, k=N))