def bestInfo(self, id1, id2, alignment, coevolution): "Points out the best coevolution scores" seq = class_sequence(self.file1, self.file2, self.id1, self.id2, self.chain1, self.chain2, self.parameterfile, self.dirname) histogram = LP(self.parameterfile, "results_histogram") heatmap = LP(self.parameterfile, "results_heatmap") best_info = LP(self.parameterfile, "best_results") surface1 = [] surface2 = [] interface = [] try: surface1 = seq.parseSurfacePDB(id1) surface2 = seq.parseSurfacePDB(id2) except: pass try: interface = seq.parseInterfacePDB(id1) except: pass input = self.dirname + alignment + "_" + coevolution + ".txt" output = self.dirname + alignment + "_" + coevolution + "_best.txt" bestResults(input, output, best_info, surface1, surface2, interface) if histogram == True: input = self.dirname + alignment + "_" + coevolution + ".txt" output = self.dirname + alignment + "_" + coevolution + "_hg.png" drawHistogram(input, output) if heatmap == True: input = self.dirname + alignment + "_" + coevolution + ".txt" output = self.dirname + alignment + "_" + coevolution + "_hm.png" drawHeatmap(id1, id2, input, output)
def coevolAnalysis(self, file1, file2, id1, id2, chain1, chain2, alignment, coevolution): "Returns a matrix of coevolution scores" seq = class_sequence(self.file1, self.file2, self.id1, self.id2, self.chain1, self.chain2, self.parameterfile, self.dirname) aln = class_alignment(self.id1, self.id2, self.alignment, self.parameterfile, self.dirname) alignment1 = aln.cutAlignment(file1, id1, alignment) alignment2 = aln.cutAlignment(file2, id2, alignment) try: assert len(alignment1) == len(alignment2) except: raise StandardError, "Alignments must have the same number of sequences" protein1 = [] protein2 = [] try: protein1 = seq.matchResiduePosition(id1, chain1) protein2 = seq.matchResiduePosition(id2, chain2) except: pass info = dict() alignment1 = [e for e in alignment1] columns1 = transpose(alignment1) alignment2 = [e for e in alignment2] columns2 = transpose(alignment2) if coevolution == "mi": Flash('Mutual Information') mi = dict() pD1 = probabilityDict(columns1) pD2 = probabilityDict(columns2) for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): mi[(i, j)] = mutualInformation(i, j, columns1, columns2, pD1, pD2) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(mi[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if mi[(i, j)] != 0.0: info[(i, j)] = mi[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "mie": Flash('Mutual Information by Pair Entropy') mie = dict() pD1 = probabilityDict(columns1) pD2 = probabilityDict(columns2) for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): mie[(i, j)] = miEntropy(i, j, columns1, columns2, pD1, pD2) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(mie[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if mie[(i, j)] != 0.0: info[(i, j)] = mie[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "rcwmi": Flash('Row and Column Weighed Mutual Information') rcwmi = dict() pD1 = probabilityDict(columns1) pD2 = probabilityDict(columns2) i_all = dict() all_j = dict() for i in range(len(columns1)): v_i = 0 for j in range(len(columns2)): v_i += mutualInformation(i, j, columns1, columns2, pD1, pD2) i_all[i] = v_i for j in range(len(columns2)): v_j = 0 for i in range(len(columns1)): v_j += mutualInformation(i, j, columns1, columns2, pD1, pD2) all_j[j] = v_j column = columns1[0] n = len(column) for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): mi = mutualInformation(i, j, columns1, columns2, pD1, pD2) rcwmi[(i, j)] = rowColumnWeighed(mi, i_all[i], all_j[j], n) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(rcwmi[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if rcwmi[(i, j)] != 0.0: info[(i, j)] = rcwmi[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "cpvn": Flash('Contact Preferences, Volume Normalized') cpvn = dict() score_matrix = mapMatrix("CPVN") for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): res1 = str(alignment1[0][i]) res2 = str(alignment2[0][j]) average = [] for a, b in zip(columns1[i], columns2[j]): if a in aa and b in aa: average.append( float(matchScore(res1, res2, score_matrix))) cpvn[(i, j)] = mean(average) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(cpvn[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if cpvn[(i, j)] != 0.0: info[(i, j)] = cpvn[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "clm": Flash('Contact PDB-derived Likelihood Matrix') clm = dict() score_matrix = mapMatrix("CLM") for i in range(len(alignment1[0])): Flash('Column ' + str(i)) for j in range(len(alignment2[0])): res1 = str(alignment1[0][i]) res2 = str(alignment2[0][j]) average = [] for a, b in zip(columns1[i], columns2[j]): if a in aa and b in aa: average.append( float(matchScore(res1, res2, score_matrix))) clm[(i, j)] = mean(average) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(clm[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if clm[(i, j)] != 0.0: info[(i, j)] = clm[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "vol": Flash('Residue-residue Volume Normalized') vol = dict() score_matrix = mapMatrix("VOL") for i in range(len(alignment1[0])): Flash('Column ' + str(i)) for j in range(len(alignment2[0])): res1 = str(alignment1[0][i]) res2 = str(alignment2[0][j]) average = [] for a, b in zip(columns1[i], columns2[j]): if a in aa and b in aa: average.append( float(matchScore(res1, res2, score_matrix))) vol[(i, j)] = mean(average) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(vol[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if vol[(i, j)] != 0.0: info[(i, j)] = vol[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "omes": Flash('Observed Minus Expected Squared') omes = dict() for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): omes[(i, j)] = covarianceOMES(columns1[i], columns2[j]) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(omes[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if omes[(i, j)] != 0.0: info[(i, j)] = omes[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "pearson": Flash("Pearson's correlation") pearson = dict() score_matrix = mapMatrix("MCLACHLAN") N = len(columns1[0]) for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix) d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix) pearson[(i, j)] = pearsonsCorrelation(d_matrix1, d_matrix2, N) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(pearson[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if pearson[(i, j)] != 0.0: info[(i, j)] = pearson[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "spearman": Flash("Spearman's rank correlation") score_matrix = mapMatrix("MCLACHLAN") spearman = dict() N = len(columns1[0]) for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix) d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix) spearman[(i, j)] = spearmansCorrelation( d_matrix1, d_matrix2, N) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(spearman[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if spearman[(i, j)] != 0.0: info[(i, j)] = spearman[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "mcbasc": Flash('McLachlan Based Substitution Correlation') mcbasc = dict() score_matrix = mapMatrix("MCLACHLAN") N = len(columns1[0]) for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix) d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix) mcbasc[(i, j)] = mcbascCorrelation(d_matrix1, d_matrix2, N) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(mcbasc[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if mcbasc[(i, j)] != 0.0: info[(i, j)] = mcbasc[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "quartets": Flash('Quartets') quartets = dict() for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): quartets[(i, j)] = quartetsCorrelation( columns1[i], columns2[j]) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(quartets[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if quartets[(i, j)] != 0.0: info[(i, j)] = quartets[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "sca": Flash('Statistical Coupling Analysis') sca = dict() for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): sca[(i, j)] = perturbationSCA(columns1[i], columns2[j], \ j, columns2) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(sca[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if sca[(i, j)] != 0.0: info[(i, j)] = sca[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "elsc": Flash('Explicit Likelihood of Subset Covariation') elsc = dict() for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): elsc[(i, j)] = perturbationELSC(columns1[i], columns2[j], \ j, columns2) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(elsc[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if elsc[(i, j)] != 0.0: info[(i, j)] = elsc[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 else: pass output = self.dirname + alignment + "_" + coevolution + ".txt" results = open(output, "w") for i, j in sorted(info.keys()): if protein1 != [] and protein2 != []: print >> results, protein1[i], protein2[j], \ round((info[(i, j)]), 4) elif protein1 != [] and protein2 == []: print >> results, protein1[i], protein1[j], \ round((info[(i, j)]), 4) else: print >> results, str(i + 1), str(j + 1), \ round((info[(i, j)]), 4) results.close()
def coevolAnalysis(self, file1, file2, id1, id2, chain1, chain2, alignment, coevolution): "Returns a matrix of coevolution scores" seq = class_sequence(self.file1, self.file2, self.id1, self.id2, self.chain1, self.chain2, self.parameterfile, self.dirname) aln = class_alignment(self.id1, self.id2, self.alignment, self.parameterfile, self.dirname) alignment1 = aln.cutAlignment(file1, id1, alignment) alignment2 = aln.cutAlignment(file2, id2, alignment) try: assert len(alignment1) == len(alignment2) except: raise StandardError, "Alignments must have the same number of sequences" protein1 = [] protein2 = [] try: protein1 = seq.matchResiduePosition(id1, chain1) protein2 = seq.matchResiduePosition(id2, chain2) except: pass info = dict() alignment1 = [e for e in alignment1] columns1 = transpose(alignment1) alignment2 = [e for e in alignment2] columns2 = transpose(alignment2) if coevolution == "mi": Flash('Mutual Information') mi = dict() pD1 = probabilityDict(columns1) pD2 = probabilityDict(columns2) for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): mi[(i, j)] = mutualInformation(i, j, columns1, columns2, pD1, pD2) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(mi[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if mi[(i, j)] != 0.0: info[(i, j)] = mi[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "mie": Flash('Mutual Information by Pair Entropy') mie = dict() pD1 = probabilityDict(columns1) pD2 = probabilityDict(columns2) for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): mie[(i, j)] = miEntropy(i, j, columns1, columns2, pD1, pD2) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(mie[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if mie[(i, j)] != 0.0: info[(i, j)] = mie[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "rcwmi": Flash('Row and Column Weighed Mutual Information') rcwmi = dict() pD1 = probabilityDict(columns1) pD2 = probabilityDict(columns2) i_all = dict() all_j = dict() for i in range(len(columns1)): v_i = 0 for j in range(len(columns2)): v_i += mutualInformation(i, j, columns1, columns2, pD1, pD2) i_all[i] = v_i for j in range(len(columns2)): v_j = 0 for i in range(len(columns1)): v_j += mutualInformation(i, j, columns1, columns2, pD1, pD2) all_j[j] = v_j column = columns1[0] n = len(column) for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): mi = mutualInformation(i, j, columns1, columns2, pD1, pD2) rcwmi[(i, j)] = rowColumnWeighed(mi, i_all[i], all_j[j], n) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(rcwmi[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if rcwmi[(i, j)] != 0.0: info[(i, j)] = rcwmi[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "cpvn": Flash('Contact Preferences, Volume Normalized') cpvn = dict() score_matrix = mapMatrix("CPVN") for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): res1 = str(alignment1[0][i]) res2 = str(alignment2[0][j]) average = [] for a, b in zip(columns1[i], columns2[j]): if a in aa and b in aa: average.append(float(matchScore(res1, res2, score_matrix))) cpvn[(i, j)] = mean(average) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(cpvn[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if cpvn[(i, j)] != 0.0: info[(i, j)] = cpvn[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "clm": Flash('Contact PDB-derived Likelihood Matrix') clm = dict() score_matrix = mapMatrix("CLM") for i in range(len(alignment1[0])): Flash('Column ' + str(i)) for j in range(len(alignment2[0])): res1 = str(alignment1[0][i]) res2 = str(alignment2[0][j]) average = [] for a, b in zip(columns1[i], columns2[j]): if a in aa and b in aa: average.append(float(matchScore(res1, res2, score_matrix))) clm[(i, j)] = mean(average) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(clm[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if clm[(i, j)] != 0.0: info[(i, j)] = clm[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "vol": Flash('Residue-residue Volume Normalized') vol = dict() score_matrix = mapMatrix("VOL") for i in range(len(alignment1[0])): Flash('Column ' + str(i)) for j in range(len(alignment2[0])): res1 = str(alignment1[0][i]) res2 = str(alignment2[0][j]) average = [] for a, b in zip(columns1[i], columns2[j]): if a in aa and b in aa: average.append(float(matchScore(res1, res2, score_matrix))) vol[(i, j)] = mean(average) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(vol[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if vol[(i, j)] != 0.0: info[(i, j)] = vol[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "omes": Flash('Observed Minus Expected Squared') omes = dict() for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): omes[(i, j)] = covarianceOMES(columns1[i], columns2[j]) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(omes[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if omes[(i, j)] != 0.0: info[(i, j)] = omes[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "pearson": Flash("Pearson's correlation") pearson = dict() score_matrix = mapMatrix("MCLACHLAN") N = len(columns1[0]) for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix) d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix) pearson[(i, j)] = pearsonsCorrelation(d_matrix1, d_matrix2, N) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(pearson[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if pearson[(i, j)] != 0.0: info[(i, j)] = pearson[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "spearman": Flash("Spearman's rank correlation") score_matrix = mapMatrix("MCLACHLAN") spearman = dict() N = len(columns1[0]) for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix) d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix) spearman[(i, j)] = spearmansCorrelation(d_matrix1, d_matrix2, N) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(spearman[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if spearman[(i, j)] != 0.0: info[(i, j)] = spearman[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "mcbasc": Flash('McLachlan Based Substitution Correlation') mcbasc = dict() score_matrix = mapMatrix("MCLACHLAN") N = len(columns1[0]) for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix) d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix) mcbasc[(i, j)] = mcbascCorrelation(d_matrix1, d_matrix2, N) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(mcbasc[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if mcbasc[(i, j)] != 0.0: info[(i, j)] = mcbasc[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "quartets": Flash('Quartets') quartets = dict() for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): quartets[(i, j)] = quartetsCorrelation(columns1[i], columns2[j]) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(quartets[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if quartets[(i, j)] != 0.0: info[(i, j)] = quartets[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "sca": Flash('Statistical Coupling Analysis') sca = dict() for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): sca[(i, j)] = perturbationSCA(columns1[i], columns2[j], \ j, columns2) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(sca[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if sca[(i, j)] != 0.0: info[(i, j)] = sca[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 elif coevolution == "elsc": Flash('Explicit Likelihood of Subset Covariation') elsc = dict() for i in range(len(columns1)): Flash('Column ' + str(i)) for j in range(len(columns2)): elsc[(i, j)] = perturbationELSC(columns1[i], columns2[j], \ j, columns2) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(elsc[(i, j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if elsc[(i, j)] != 0.0: info[(i, j)] = elsc[(i, j)] * 1.0 / max_val else: info[(i, j)] = 0.0 else: pass output = self.dirname + alignment + "_" + coevolution + ".txt" results = open(output, "w") for i, j in sorted(info.keys()): if protein1 != [] and protein2 != []: print >> results, protein1[i], protein2[j], \ round((info[(i, j)]), 4) elif protein1 != [] and protein2 == []: print >> results, protein1[i], protein1[j], \ round((info[(i, j)]), 4) else: print >> results, str(i + 1), str(j + 1), \ round((info[(i, j)]), 4) results.close()
def coevolAnalysis(self, file1, file2, id1, id2, chain1, chain2, alignment, coevolution): "Returns a matrix of coevolution scores" seq = class_sequence(self.file1, self.file2, self.id1, self.id2, self.chain1, self.chain2) aln = class_alignment(self.id1, self.id2, self.alignment) alignment1 = aln.cutAlignment(file1, id1, alignment) alignment2 = aln.cutAlignment(file2, id2, alignment) try: assert len(alignment1) == len(alignment2) except: raise StandardError, "Alignments must have the same number of sequences" protein1 = [] protein2 = [] try: protein1 = seq.matchResiduePosition(id1, chain1) protein2 = seq.matchResiduePosition(id2, chain2) except: pass info = dict() alignment1 = [e for e in alignment1] columns1 = transpose(alignment1) alignment2 = [e for e in alignment2] columns2 = transpose(alignment2) if coevolution == "mi": pD1 = probabilityDict(columns1) pD2 = probabilityDict(columns2) for i in range(len(columns1)): for j in range(len(columns2)): info[(i,j)] = mutualInformation(i, j, columns1, columns2, pD1, pD2) elif coevolution == "mie": pD1 = probabilityDict(columns1) pD2 = probabilityDict(columns2) for i in range(len(columns1)): for j in range(len(columns2)): info[(i,j)] = miEntropy(i, j, columns1, columns2, pD1, pD2) elif coevolution == "rcwmi": pD1 = probabilityDict(columns1) pD2 = probabilityDict(columns2) i_all = dict() all_j = dict() for i in range(len(columns1)): v_i = 0 for j in range(len(columns2)): v_i += mutualInformation(i, j, columns1, columns2, pD1, pD2) i_all[i]= v_i for j in range(len(columns2)): v_j = 0 for i in range(len(columns1)): v_j += mutualInformation(i, j, columns1, columns2, pD1, pD2) all_j[j]= v_j column = columns1[0] n = len(column) for i in range(len(columns1)): for j in range(len(columns2)): mi = mutualInformation(i, j, columns1, columns2, pD1, pD2) info[(i,j)] = rowColumnWeighed(mi, i_all[i], all_j[j], n) elif coevolution == "cpvnmie": pD1 = probabilityDict(columns1) pD2 = probabilityDict(columns2) for i in range(len(columns1)): for j in range(len(columns2)): res1 = str(alignment1[0][i]) res2 = str(alignment2[0][j]) mie = miEntropy(i, j, columns1, columns2, pD1, pD2) info[(i,j)] = contactPreferenceMI(mie, res1, res2) elif coevolution == "cpvn": score_matrix = mapMatrix("CPVN") for i in range(len(columns1)): for j in range(len(columns2)): res1 = str(alignment1[0][i]) res2 = str(alignment2[0][j]) average = [] for a,b in zip(columns1[i],columns2[j]): if a in aa and b in aa: average.append(float(matchScore(res1, res2, score_matrix))) info[(i,j)] = mean(average) elif coevolution == "clm": score_matrix = mapMatrix("CLM") for i in range(len(alignment1[0])): for j in range(len(alignment2[0])): res1 = str(alignment1[0][i]) res2 = str(alignment2[0][j]) average = [] for a,b in zip(columns1[i],columns2[j]): if a in aa and b in aa: average.append(float(matchScore(res1, res2, score_matrix))) info[(i,j)] = mean(average) elif coevolution == "vol": score_matrix = mapMatrix("VOL") for i in range(len(alignment1[0])): for j in range(len(alignment2[0])): res1 = str(alignment1[0][i]) res2 = str(alignment2[0][j]) average = [] for a,b in zip(columns1[i],columns2[j]): if a in aa and b in aa: average.append(float(matchScore(res1, res2, score_matrix))) info[(i,j)] = mean(average) elif coevolution == "omes": omes = dict() for i in range(len(columns1)): for j in range(len(columns2)): omes[(i,j)] = covarianceOMES(columns1[i],columns2[j]) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(omes[(i,j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if omes[(i,j)] != 0.0: info[(i,j)] = omes[(i,j)] * 1.0 / max_val else: info[(i,j)] = 0.0 elif coevolution == "pearson": score_matrix = mapMatrix("MCLACHLAN") N = len(columns1[0]) for i in range(len(columns1)): for j in range(len(columns2)): d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix) d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix) info[(i,j)] = pearsonsCorrelation(d_matrix1, d_matrix2, N) elif coevolution == "spearman": score_matrix = mapMatrix("MCLACHLAN") spearman = dict() N = len(columns1[0]) for i in range(len(columns1)): for j in range(len(columns2)): d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix) d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix) spearman[(i,j)] = spearmansCorrelation(d_matrix1, d_matrix2, N) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(spearman[(i,j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if spearman[(i,j)] != 0.0: info[(i,j)] = spearman[(i,j)] * 1.0 / max_val else: info[(i,j)] = 0.0 elif coevolution == "mcbasc": score_matrix = mapMatrix("MCLACHLAN") N = len(columns1[0]) for i in range(len(columns1)): for j in range(len(columns2)): d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix) d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix) info[(i,j)] = mcbascCorrelation(d_matrix1,d_matrix2, N) elif coevolution == "quartets": quartets = dict() for i in range(len(columns1)): for j in range(len(columns2)): quartets[(i,j)] = quartetsCorrelation(columns1[i],columns2[j]) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(quartets[(i,j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if quartets[(i,j)] != 0.0: info[(i,j)] = quartets[(i,j)] * 1.0 / max_val else: info[(i,j)] = 0.0 elif coevolution == "sca": sca = dict() for i in range(len(columns1)): for j in range(len(columns2)): sca[(i,j)] = perturbationSCA(columns1[i],columns2[j],\ j,columns2) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(sca[(i,j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if sca[(i,j)] != 0.0: info[(i,j)] = sca[(i,j)] * 1.0 / max_val else: info[(i,j)] = 0.0 elif coevolution == "elsc": elsc = dict() for i in range(len(columns1)): for j in range(len(columns2)): elsc[(i,j)] = perturbationELSC(columns1[i],columns2[j],\ j,columns2) max_pos = [] for i in range(len(columns1)): for j in range(len(columns2)): max_pos.append(elsc[(i,j)]) max_val = max(max_pos) for i in range(len(columns1)): for j in range(len(columns2)): if elsc[(i,j)] != 0.0: info[(i,j)] = elsc[(i,j)] * 1.0 / max_val else: info[(i,j)] = 0.0 else: pass output = "./Results/" + alignment + "_" + coevolution + ".txt" results = open(output, "w") for i, j in sorted(info.keys()): if protein1 != [] and protein2 != []: print >> results, protein1[i], protein2[j], \ round((info[(i, j)]), 4) elif protein1 != [] and protein2 == []: print >> results, protein1[i], protein1[j], \ round((info[(i, j)]), 4) else: print >> results, str(i+1), str(j+1), \ round((info[(i, j)]), 4) results.close()