def sims(f, debug=False, align="fit"): """Main driver to solve the SIMS problem.""" (s, t), score, gap_score = ro.fafsa_values(f), ra.FixedCost(1, -1), -1 d, (ss, tt) = ra.optimal_alignment(s, t, score, gap_score, gap_score, align=align, debug=debug) print d print ss print tt
def glob(f): '''Main driver to solve this problem.''' x, y = ro.fafsa_values(f) d, (xx, yy) = optimal_alignment(x, y, ra.BLOSUM62, -5) print d print xx print yy
def gcon(f, debug=False, optimal_alignment=optimal_alignment_separate_arrays): """Main driver to solve the GCON problem.""" x, y = ro.fafsa_values(f) d, (xx, yy) = optimal_alignment(x, y, ra.BLOSUM62, -5, 0, debug=debug) print xx print yy return d
def gaff(f, score=ra.BLOSUM62, gap_init= -11, gap_ext= -1, debug=False): '''Main driver to solve the GAFF problem.''' x, y = ro.fafsa_values(f) d, (xx, yy) = ra.optimal_alignment(x, y, score, gap_init, gap_ext, debug=debug) print d print xx print yy
def mult(f, match_score=0, mismatch_score= -1, gap_score= -1, debug=False): '''Main driver to solve this problem.''' s = ro.fafsa_values(f) score = lambda x, y: match_score if x == y else mismatch_score c = align4(s, score, gap_score=gap_score) # print c print ro.join_list([c[-1, -1, -1, -1][0]] + list(alignment4(s, c)), delimiter='\n')
def edta(f, debug=False): '''Main driver to solve this problem.''' x, y = ro.fafsa_values(f) d, (xx, yy) = edit_distance_alignment(x, y, debug=debug) print d print xx print yy
def osym(f, debug=1): '''Main driver to solve the GLOB problem.''' (x, y), score, gap_score = ro.fafsa_values(f), ra.FixedCost(1, -1), -1 a = ra.global_alignment_score_matrix((x, y), score, gap_score, debug=debug) m = a[:-1, :-1] + np.flipud(np.fliplr(ra.global_alignment_score_matrix((''.join(reversed(x)), ''.join(reversed(y))), ra.FixedCost(1, -1), -1, debug=debug)[:-1, :-1])) + \ np.array([[score(x[i], y[j]) for j in xrange(len(y))] for i in xrange(len(x))]) print m[-1, -1] print np.sum(m)
def gcon(f, debug=False): '''Main driver to solve the GCON problem.''' x, y = ro.fafsa_values(f) d, (xx, yy) = ra.optimal_alignment(x, y, ra.BLOSUM62, -5, 0, debug=debug) print d print xx print yy return d
def laff(f, debug=0): '''Main driver to solve the LOCA problem.''' x, y = ro.fafsa_values(f) d, (xx, yy) = optimal_alignment_array_form(x, y, ra.BLOSUM62, -11, -1, debug=debug, gap_symbol='-') if debug: print x print y print ra.score_of(xx, yy, ra.BLOSUM62, -11, -1) # , debug=True) return ro.join_list((d, xx, yy), delimiter='\n')
def oap(f, debug=False): '''Main driver to solve this problem.''' x, y = ro.fafsa_values(f) score = lambda x, y: 1 if x == y else -2 c, I, J = semi_global_alignment((x, y), score, -2, debug=debug) row_max = (np.argmax(c[:, -1]), len(y)) col_max = (len(x), np.argmax(c[-1, :])) print c print row_max, col_max overall_max = row_max if c[row_max[0], row_max[1]] > c[col_max[0], col_max[1]] else col_max print c[overall_max[0], overall_max[1]] print ro.join_list(alignment_from_matrix((x, y), overall_max, c, I, J), delimiter='\n')
def gaff( f, score=ra.BLOSUM62, gap_init=-11, gap_ext=-1, debug=False, optimal_alignment=optimal_alignment_separate_arrays ): """Main driver to solve the GAFF problem.""" x, y = ro.fafsa_values(f) d, (xx, yy) = optimal_alignment(x, y, score, gap_init, gap_ext, debug=debug) # d, (xx, yy) = optimal_alignment(x, y, ra.BLOSUM62, -12, -1, debug=debug) print x print y print ra.score_of(xx, yy, score, gap_init, gap_ext) print print d print xx print yy return d
def smgb(f, debug=False): """Main driver to solve this problem.""" x, y = ro.fafsa_values(f) # [r.seq for r in SeqIO.parse('rosalind_smgb.dat', 'fasta')] c, p = semi_global_alignment((x, y), debug=debug) # row_max = (np.argmax(c[:, -1]), len(y)) # col_max = (len(x), np.argmax(c[-1, :])) m, n = len(x), len(y) row_max = (np.argmax([c[i][n] for i in xrange(m + 1)]), n) col_max = (m, np.argmax(c[-1])) overall_max = row_max if c[row_max[0]][row_max[1]] > c[col_max[0]][col_max[1]] else col_max if debug >= 2: print np.array(c) print np.array(p) # print row_max, col_max print overall_max print c[overall_max[0]][overall_max[1]] print ro.join_list(alignment_from_matrix((x, y), overall_max, c, p), delimiter="\n")
def laff_yesimon(f): score = ra.BLOSUM62 s, t = ro.fafsa_values(f) sl, tl = len(s), len(t) T = np.zeros((sl + 1, tl + 1), dtype=np.character) V = np.zeros((sl + 1, tl + 1)) F = np.zeros((sl + 1, tl + 1)) G = np.zeros((sl + 1, tl + 1)) for i, j in product(xrange(1, sl + 1), xrange(1, tl + 1)): cost = score(s[i - 1], t[j - 1]) F[i, j] = V[i - 1, j - 1] + cost G[i, j] = max(F[i - 1, j] - 12, G[i - 1, j] - 1, F[i, j - 1] - 12, G[i, j - 1] - 1) V[i, j] = v = max(F[i, j], G[i, j], 0) if v == F[i, j]: T[i, j] = 'D' elif v == G[i, j]: if v == F[i - 1, j] - 12 or G[i - 1, j] - 1: T[i, j] = 'L' else: T[i, j] = 'U' elif v == 0: T[i, j] = '' i, j = np.unravel_index(np.argmax(V), V.shape) print(int(V[i, j])) sa = '' ta = '' while T[i, j]: direction = T[i, j] if direction == 'D': sa += s[i - 1] ta += t[j - 1] i, j = i - 1, j - 1 elif direction == 'L': ta += t[j - 1] i = i - 1 elif direction == 'U': sa += s[i - 1] j = j - 1 print(''.join(reversed(sa))) print(''.join(reversed(ta)))
def sseq(f): return " ".join(it.imap(str, sseq_indices(*ro.fafsa_values(f))))
def pmch(f): c = Counter(ro.fafsa_values(f)[0]) return factorial(c['A']) * factorial(c['G'])
def corr(f): '''Finds and prints all matches, probably with an average complexity of O(n m log m) where n=#strings and m = size of strings.''' return '\n'.join('%s->%s' % x for x in match_errors(ro.fafsa_values(f)))
def ctea(f, debug=False): '''Main driver to solve this problem.''' x, y = ro.fafsa_values(f) return edit_distance_and_count(x, y, 2 ** 27 - 1, debug=debug)
def assemble_fafsa(file_name, fraction, hash_type='rolling'): return assemble(ro.fafsa_values(file_name), fraction, hash_type=hash_type)
i, j, s = m, n, '' while i > 0 and j > 0: if x[i - 1] == y[j - 1]: i, j, s = i - 1, j - 1, x[i - 1] + s else: if c[i - 1, j] > c[i, j - 1]: i -= 1 else: j -= 1 return s def lcsq2(x, y): # Integrated DP+backtracking, O(mn) time, O(min(m,n)) storage m, n = len(x), len(y) if m < n: return lcsq2(y, x) c_old, c, s_old, s = [0] * (n + 1), [0] * (n + 1), [''] * (n + 1), [''] * (n + 1) for xi in x: c_old[:] = c[:]; s_old[:] = s[:] for j, yj in enumerate(y, 1): if xi == yj: c[j], s[j] = c_old[j - 1] + 1, s_old[j - 1] + xi else: if c_old[j] > c[j - 1]: c[j], s[j] = c_old[j], s_old[j] else: c[j], s[j] = c[j - 1], s[j - 1] return s[-1] lcsq_solution = lambda f: lcsq2(*ro.fafsa_values(f)) if __name__ == "__main__": print lcsq('TAC', 'TCA') print lcsq2('TAC', 'TCA') print lcsq('AACCTTGG', 'ACACTGTGA') print lcsq2('AACCTTGG', 'ACACTGTGA') print lcsq_solution('rosalind_lcsq_sample.dat') print lcsq_solution('rosalind_lcsq.dat')
def tran(f): transitions, transversions = count_tran(*ro.fafsa_values(f)) return transitions / transversions
def gaff_burschka(f): x, y = ro.fafsa_values(f) return optimal_alignment_gaff_burschka(x, y, -11, -1)
s, t, u, v, i, j, k, l = '', '', '', '', len(x), len(y), len(z), len(w) # print i, j, k, l while i or j or k or l: ip, jp, kp, lp = c[i, j, k, l][1] s, t, u, v = \ (gap_symbol if ip == i else x[i - 1]) + s, \ (gap_symbol if jp == j else y[j - 1]) + t, \ (gap_symbol if kp == k else z[k - 1]) + u, \ (gap_symbol if lp == l else w[l - 1]) + v i, j, k, l = ip, jp, kp, lp # print i, j, k,l return s, t, u, v def test_align2(f, (i, j), match_score=0, mismatch_score= -1, gap_score= -1, debug=False): '''Two-string test.''' s = ro.fafsa_values(f) score = lambda x, y: match_score if x == y else mismatch_score print (s[i], s[j]) c = ra.global_alignment_matrix((s[i], s[j]), score, gap_score=gap_score) # print c print ro.join_list([c[-1, -1][0]] + list(ra.alignment_from_matrix((s[i], s[j]), c)), delimiter='\n') def test_align3(f, (i, j, k), match_score=0, mismatch_score= -1, gap_score= -1, debug=False): '''Three-string test.''' s = ro.fafsa_values(f) score = lambda x, y: match_score if x == y else mismatch_score print (s[i], s[j], s[k]) c = align3((s[i], s[j], s[k]), score, gap_score=gap_score) # print c print ro.join_list([c[-1, -1, -1][0]] + list(alignment3((s[i], s[j], s[k]), c)), delimiter='\n')
def gaff_yesimon(f): x, y = ro.fafsa_values(f) return optimal_alignment_gaff_yesimon(x, y)
def gcon_yesimon(f): x, y = ro.fafsa_values(f) return optimal_alignment_yesimon(x, y, -5)
def loca(f, debug=False): '''Main driver to solve the LOCA problem.''' x, y = ro.fafsa_values(f) d, (xx, yy) = ra.optimal_alignment(x, y, ra.PAM250, -5, -5, align='local', debug=debug, gap_symbol='') return ro.join_list((d, xx, yy), delimiter='\n')
def mmch(f): s = ro.fafsa_values(f)[0] #return num_max_matching(s), long(num_max_matching_luiz(s)) print s print len(s) return num_max_matching(s), long(num_max_matching_luiz(s))
def edit(f): """Main driver to solve this problem.""" return edit_distance(*ro.fafsa_values(f)), edit_distance_lean(*ro.fafsa_values(f))