def mult_align(sum_dict, align_dict): """Returns a biopython multiple alignment instance (MultipleSeqAlignment)""" mult_align_dict = {} for j in align_dict.abs(1).pos_align_dict: mult_align_dict[j] = '' for i in range(1, len(align_dict) + 1): # loop on positions for j in align_dict.abs(i).pos_align_dict: # loop within a position mult_align_dict[j] += align_dict.abs(i).pos_align_dict[j].aa alpha = Alphabet.Gapped(Alphabet.IUPAC.extended_protein) fssp_align = MultipleSeqAlignment([], alphabet=alpha) for i in sorted(mult_align_dict): fssp_align.append( SeqRecord(Seq(mult_align_dict[i], alpha), sum_dict[i].pdb2 + sum_dict[i].chain2)) return fssp_align
def get_alphabet(self): alph = self.alphabets.get(self.type, Alphabet.generic_alphabet) if self.mol_seq and self.mol_seq.is_aligned: return Alphabet.Gapped(alph) return alph
print(consensus) print("") print( summary.pos_specific_score_matrix(chars_to_ignore=['-'], axis_seq=consensus)) print("") #Have a generic alphabet, without a declared gap char, so must tell #provide the frequencies and chars to ignore explicitly. print( summary.information_content(e_freq_table=expected, chars_to_ignore=['-'])) print("") print("Trying a protein sequence with gaps and stops") alpha = Alphabet.HasStopCodon( Alphabet.Gapped(Alphabet.generic_protein, "-"), "*") a = Alignment(alpha) a.add_sequence("ID001", "MHQAIFIYQIGYP*LKSGYIQSIRSPEYDNW-") a.add_sequence("ID002", "MH--IFIYQIGYAYLKSGYIQSIRSPEY-NW*") a.add_sequence("ID003", "MHQAIFIYQIGYPYLKSGYIQSIRSPEYDNW*") print(a) print("=" * a.get_alignment_length()) s = SummaryInfo(a) c = s.dumb_consensus(ambiguous="X") print(c) c = s.gap_consensus(ambiguous="X") print(c) print("") print(s.pos_specific_score_matrix(chars_to_ignore=['-', '*'], axis_seq=c))