def optimal_alignment_gaff_yesimon(s, t, gap_init= -11, gap_ext= -1): sl, tl = len(s), len(t) if sl < tl: return optimal_alignment_gaff_yesimon(t, s, gap_init, gap_ext) m = {(0, 0): (0, None)} f = {} g = {} h = {} m.update({((i, 0), (gap_init + gap_ext * (i - 1), (i - 1, 0))) for i in range(1, sl + 1)}) m.update({((0, i), (gap_init + gap_ext * (i - 1), (0, i - 1))) for i in range(1, tl + 1)}) for i, j in product(range(1, sl + 1), range(1, tl + 1)): cost = blosum62.get((s[i - 1], t[j - 1])) if cost == None: cost = blosum62.get((t[j - 1], s[i - 1])) f[(i, j)] = m[(i - 1, j - 1)][0] + cost gg = g.get((i - 1, j)) if gg != None: gg += gap_ext hh = h.get((i, j - 1)) if hh != None: hh += gap_ext g[(i, j)] = max(m[(i - 1, j)][0] + gap_init, gg) h[(i, j)] = max(m[(i, j - 1)][0] + gap_init, hh) v = max(f[(i, j)], g[(i, j)], h[(i, j)]) if v == f[(i, j)]: m[(i, j)] = (v, (i - 1, j - 1)) elif v == g[(i, j)]: m[(i, j)] = (v, (i - 1, j)) elif v == h[(i, j)]: m[(i, j)] = (v, (i, j - 1)) retval = m[(i, j)] print sl, tl for i in xrange(sl + 1): for j in xrange(tl + 1): print m[(i, j)] if (i, j) in m else '-', print print m return retval
def maxAlignscorgMat(s, t): string_s = len(s) string_t = len(t) AlnScore = {(0, 0): (0, None)} AlnScore.update({((m, 0), (m * -5, (m - 1, 0))) for m in range(1, string_s + 1)}) AlnScore.update({((0, m), (m * -5, (0, m - 1))) for m in range(1, string_t + 1)}) for m, n in product(range(1, string_s + 1), range(1, string_t + 1)): cost = blosum62.get((s[m - 1], t[n - 1])) if cost == None: cost = blosum62.get((t[n - 1], s[m - 1])) a = AlnScore[(m - 1, n - 1)][0] + cost b = AlnScore[(m - 1, n)][0] - 5 c = AlnScore[(m, n - 1)][0] - 5 max_align = max(a, b, c) if a == max_align: AlnScore[(m, n)] = (max_align, (m - 1, n - 1)) elif b == max_align: AlnScore[(m, n)] = (max_align, (m - 1, n)) elif c == max_align: AlnScore[(m, n)] = (max_align, (m, n - 1)) return AlnScore[(m, n)][0]
def globAlign(s, t): protein_s = len(s) protein_t = len(t) maximumAlnScore = {(0, 0): (0, None)} a, b, c = {}, {}, {} maximumAlnScore.update({((m, 0), (-5, (m - 1, 0))) for m in range(1, protein_s + 1)}) maximumAlnScore.update({((0, m), (-5, (0, m - 1))) for m in range(1, protein_t + 1)}) for m, n in product(range(1, protein_s + 1), range(1, protein_t + 1)): cost = blosum62.get((s[m - 1], t[n - 1])) if cost == None: cost = blosum62.get((t[n - 1], s[m - 1])) a[(m, n)] = maximumAlnScore[(m - 1, n - 1)][0] + cost b[(m, n)] = max(maximumAlnScore[(m - 1, n)][0] - 5, b.get((m - 1, n))) c[(m, n)] = max(maximumAlnScore[(m, n - 1)][0] - 5, c.get((m, n - 1))) score = max(a[(m, n)], b[(m, n)], c[(m, n)]) if score == a[(m, n)]: maximumAlnScore[(m, n)] = (score, (m - 1, n - 1)) elif score == b[(m, n)]: maximumAlnScore[(m, n)] = (score, (m - 1, n)) elif score == c[(m, n)]: maximumAlnScore[(m, n)] = (score, (m, n - 1)) return maximumAlnScore[(m, n)][0]
def calcBLOSUMfeatures(SAV_coords): feat_dtype = np.dtype([('BLOSUM', 'f')]) features = np.zeros(len(SAV_coords), dtype=feat_dtype) for i, SAV in enumerate(SAV_coords): aa1 = SAV['aa_wt'] aa2 = SAV['aa_mut'] features[i] = blosum62.get((aa1, aa2), blosum62.get((aa2, aa1))) return features
def score(A, B): scores = [ #get(A,B) dan juga get(B,A), karena berbentuk tabel blosum hanya berbentuk matriks segitiga blosum62.get((A[i], B[i]), blosum62.get((B[i], A[i]))) for i in range(len(A)) ] return sum(scores)
def get_score(s, t): sl, tl = len(s), len(t) m = {(0, 0): (0, None)} f, g, h = {}, {}, {} m.update({((i, 0), (-5, (i - 1, 0))) for i in range(1, sl + 1)}) m.update({((0, i), (-5, (0, i - 1))) for i in range(1, tl + 1)}) for i, j in product(range(1, sl + 1), range(1, tl + 1)): cost = blosum62.get((s[i - 1], t[j - 1])) if cost == None: cost = blosum62.get((t[j - 1], s[i - 1])) f[(i, j)] = m[(i - 1, j - 1)][0] + cost g[(i, j)] = max(m[(i - 1, j)][0] - 5, g.get((i - 1, j))) h[(i, j)] = max(m[(i, j - 1)][0] - 5, h.get((i, j - 1))) v = max(f[(i, j)], g[(i, j)], h[(i, j)]) if v == f[(i, j)]: m[(i, j)] = (v, (i - 1, j - 1)) elif v == g[(i, j)]: m[(i, j)] = (v, (i - 1, j)) elif v == h[(i, j)]: m[(i, j)] = (v, (i, j - 1)) return m[(i, j)][0]
def get_max_alignment(s, t): sl, tl = len(s), len(t) m = {(0, 0): (0, None)} m.update({((i, 0), (i * - 5, (i - 1, 0))) for i in range(1, sl + 1)}) m.update({((0, i), (i * - 5, (0, i - 1))) for i in range(1, tl + 1)}) for i, j in product(range(1, sl + 1), range(1, tl + 1)): cost = blosum62.get((s[i - 1], t[j - 1])) if cost == None: cost = blosum62.get((t[j - 1], s[i - 1])) d = m[(i - 1, j - 1)][0] + cost l = m[(i - 1, j)][0] - 5 u = m[(i, j - 1)][0] - 5 b = max(d, l, u) if d == b: m[(i, j)] = (b, (i - 1, j - 1)) elif l == b: m[(i, j)] = (b, (i - 1, j)) elif u == b: m[(i, j)] = (b, (i, j - 1)) return m[(i, j)][0]
def get_score(s, t): sl, tl = len(s), len(t) m = {(0, 0): (0, None)} f, g, h = {}, {}, {} m.update({((i, 0), (-5, (i - 1, 0))) for i in range(1, sl + 1)}) m.update({((0, i), (-5, (0, i - 1))) for i in range(1, tl + 1)}) for i, j in product(range(1, sl + 1), range(1, tl + 1)): cost = blosum62.get((s[i - 1], t[j - 1])) if cost == None: cost = blosum62.get((t[j - 1], s[i - 1])) f[(i, j)] = m[(i - 1, j - 1)][0] + cost g[(i, j)] = max(m[(i - 1, j)][0] - 5, g.get((i - 1, j))) h[(i, j)] = max(m[(i, j - 1)][0] - 5, h.get((i, j - 1))) v = max(f[(i, j)], g[(i, j)], h[(i, j)]) if v == f[(i, j)]: m[(i, j)] = (v, (i-1, j - 1)) elif v == g[(i, j)]: m[(i, j)] = (v, (i - 1, j)) elif v == h[(i, j)]: m[(i, j)] = (v, (i, j - 1)) return m[(i,j)][0]
def get_max_alignment(s, t): sl, tl = len(s), len(t) m = {(0, 0): (0, None)} m.update({((i, 0), (i * -5, (i - 1, 0))) for i in range(1, sl + 1)}) m.update({((0, i), (i * -5, (0, i - 1))) for i in range(1, tl + 1)}) for i, j in product(range(1, sl + 1), range(1, tl + 1)): cost = blosum62.get((s[i - 1], t[j - 1])) if cost == None: cost = blosum62.get((t[j - 1], s[i - 1])) d = m[(i - 1, j - 1)][0] + cost l = m[(i - 1, j)][0] - 5 u = m[(i, j - 1)][0] - 5 b = max(d, l, u) if d == b: m[(i, j)] = (b, (i - 1, j - 1)) elif l == b: m[(i, j)] = (b, (i - 1, j)) elif u == b: m[(i, j)] = (b, (i, j - 1)) return (m[(i, j)][0])
def match_callback(c1, c2): return blosum62.get((c1, c2), 1 if c1 == c2 else -4)
iy = [[0] * (len(s2) + 1) for i in xrange(len(s1) + 1)] for i in xrange(1, len(s1) + 1): m[i][0] = -100 ix[i][0] = -b - (i - 1) * a iy[i][0] = -100 for j in xrange(1, len(s2) + 1): m[0][j] = -100 ix[0][j] = -100 iy[0][j] = -b - (j - 1) * a l1, l2 = len(s1), len(s2) for i in xrange(1, l1 + 1): for j in xrange(1, l2 + 1): sc = blosum62.get((s1[i - 1], s2[j - 1])) if sc == None: sc = blosum62.get((s2[j - 1], s1[i - 1])) m[i][j] = max(m[i - 1][j - 1] + sc, ix[i - 1][j - 1] + sc, iy[i - 1][j - 1] + sc) ix[i][j] = max(m[i - 1][j] - b, ix[i - 1][j] - a, iy[i - 1][j] - b) iy[i][j] = max(m[i][j - 1] - b, ix[i - 1][j] - b, iy[i][j - 1] - a) print max(m[l1][l2], ix[l1][l2], iy[l1][l2]) def traceback(dist, s1, s2): t1, t2, i, j = '', '', len(s1), len(s2) while not (i == 0 and j == 0): l, d, t = dist[i][j - 1], dist[i - 1][j - 1], dist[i - 1][j] if s1[i - 1] == s2[j - 1] or (d == max(l, d, t) and d > l and d > t):
def create_figures(feature_to_weighted_sums, weight_totals, min_total, report_dir, filetype): aa_blosum = set() for aa1, aa2 in blosum62.keys(): aa_blosum.add(aa1) aa_blosum.add(aa2) include_mask = weight_totals >= min_total p = re.compile(r'aa_to_([A-Z])$') aa_to_features = {} for feature_name, weighted_sums in feature_to_weighted_sums.items(): m = p.match(feature_name) if m: aa = m[1] mean_by_heads = np.where(include_mask, weighted_sums / weight_totals, -1) feature_vector = mean_by_heads.flatten() feature_vector = feature_vector[feature_vector != -1] aa_to_features[aa] = feature_vector aas = sorted(aa_to_features.keys()) aas_set = set(aas) print('Excluding following AAs not in feature set', aa_blosum - aas_set) print('Excluding following AAs not in blosum62', aas_set - aa_blosum) aa_list = sorted(list(aas_set & aa_blosum)) n_aa = len(aa_list) corr = np.zeros((n_aa, n_aa)) for i, aa1 in enumerate(aa_list): vector1 = aa_to_features[aa1] for j, aa2 in enumerate(aa_list): if i == j: corr[i, j] = None else: vector2 = aa_to_features[aa2] corr[i, j], _ = pearsonr(vector1, vector2) cmap = 'Blues' ax = sns.heatmap(corr, cmap=cmap, vmin=-0.5) ax.set_xticklabels(aa_list) ax.set_yticklabels(aa_list) plt.savefig(report_dir / f'aa_corr_to.pdf', format=filetype) plt.close() blosum = np.zeros((n_aa, n_aa)) for i, aa1 in enumerate(aa_list): for j, aa2 in enumerate(aa_list): if i == j: blosum[i, j] = None else: if blosum62.get((aa1, aa2)) is not None: blosum[i, j] = blosum62.get((aa1, aa2)) else: blosum[i, j] = blosum62.get((aa2, aa1)) ax = sns.heatmap(blosum, cmap=cmap, vmin=-4, vmax=4) ax.set_xticklabels(aa_list) ax.set_yticklabels(aa_list) plt.savefig(report_dir / f'blosum62.pdf', format=filetype) plt.close() corr_scores = [] blos_scores = [] for i in range(n_aa): for j in range(i): corr_scores.append(corr[i, j]) blos_scores.append(blosum[i, j]) print('Pearson Correlation between feature corr and blosum', pearsonr(corr_scores, blos_scores)[0])
def match_callback(c1, c2): return blosum62.get((c1, c2), 1 if c1 == c2 else -4)
def score_function(x, y): return blosum62.get((x, y), blosum62.get((y, x)))
def score_function(x, y): if x == '-' or y == '-': return -5 else: return blosum62.get((x, y), blosum62.get((y, x)))
import sys from itertools import product from Bio.SubsMat.MatrixInfo import blosum62 if __name__ == '__main__': s, t = sys.stdin.read().strip().split('\n') sl, tl = len(s), len(t) m = {(0, 0): (0, None)} f = {} g = {} h = {} m.update({((i, 0), (-11 - i, (i-1, 0))) for i in range(1, sl+1)}) m.update({((0, i), (-11 - i, (0, i-1))) for i in range(1, tl+1)}) for i, j in product(range(1, sl+1), range(1, tl+1)): cost = blosum62.get((s[i-1], t[j-1])) if cost == None: cost = blosum62.get((t[j-1], s[i-1])) f[(i, j)] = m[(i-1, j-1)][0] + cost gg = g.get((i-1, j)) if gg != None: gg -= 1 hh = h.get((i, j-1)) if hh != None: hh -= 1 g[(i, j)] = max(m[(i-1, j)][0] - 12, gg) h[(i, j)] = max(m[(i, j-1)][0] - 12, hh) v = max(f[(i, j)], g[(i, j)], h[(i, j)]) if v == f[(i, j)]: m[(i, j)] = (v, (i-1, j-1)) elif v == g[(i, j)]:
def blosum62_score(x, y): cost = blosum62.get((x, y)) if cost == None: cost = blosum62.get((y, x)) return cost