def Get_Pairwise(m1, m2): """Complete a pairwise global alignment of both model's sequence. Input -model1, model2 Output: max_pair = return pair of sequences with highest alignment score """ try: ppb = pdb.CaPPBuilder() for polypeptide in ppb.build_peptides(m1): sequence_ref = polypeptide.get_sequence() for polypeptide in ppb.build_peptides(m2): sequence_sample = polypeptide.get_sequence() align = pairwise2.align.globalxx(sequence_ref, sequence_sample) max_pair = max(align,key = lambda x:x[2]) return max_pair except UnboundLocalError: #this error raises when the two compared chains are of diferent kind return None
def bonus_9_2(chain1, chain2, struct1, struct2): """ handling the case that the number of atoms is different by align the amino-acids and remove the unnecessary atoms. :param chain1: the first protein's chain :param chain2: the second protein's chain :param struct1: the first protein :param struct2: the second protein :return: filtered lists of atoms to align """ ppbils = pdb.CaPPBuilder() peptide1, peptide2 = [], [] filter_peptide_by_chain(chain1, peptide1, ppbils, struct1) filter_peptide_by_chain(chain2, peptide2, ppbils, struct2) # converting list to peptide peptide1 = pdb.Polypeptide.Polypeptide(peptide1) peptide2 = pdb.Polypeptide.Polypeptide(peptide2) # converting peptide to sequence and CA list seq1 = peptide1.get_sequence() atoms1 = peptide1.get_ca_list() seq2 = peptide2.get_sequence() atoms2 = peptide2.get_ca_list() # align the sequences alignments = pairwise2.align.globalxx(seq1, seq2) # filter the atoms lists ignore_inx1 = [ i for i in range(len(alignments[0][0])) if alignments[0][0][i] == "-" ] ignore_inx2 = [ i for i in range(len(alignments[0][1])) if alignments[0][1][i] == "-" ] atoms2 = [atoms2[i] for i in range(len(atoms2)) if i not in ignore_inx1] atoms1 = [atoms1[i] for i in range(len(atoms1)) if i not in ignore_inx2] return atoms1, atoms2
def __init__(self, filepath): """Constructor of one pdb file : PDBFile. Arguments : ------------ filepath : string path to the pdb file """ # ----- # save id extracted from path : self.id = filepath[-8:-4] # ----- # init parser : parser = PDB.PDBParser() struct = parser.get_structure("", filepath) # ----- # extract from header : self.keywords = struct.header['keywords'] self.name = struct.header['name'] self.head = struct.header['head'] self.deposition_date = struct.header['deposition_date'] self.release_date = struct.header['release_date'] self.structure_method = struct.header['structure_method'] self.resolution = struct.header['resolution'] self.structure_reference = str(struct.header['structure_reference']) self.journal_reference = struct.header['journal_reference'] self.author = struct.header['author'] self.compound = str(struct.header['compound']) # ----- # Get the sequence and the angles # extract all polypeptides from the structure : ppb = PDB.CaPPBuilder() # The sequence of each polypeptide can then easily be obtained # from the Polypeptide objects : self.seq = "" atom_idx = 0 start = 0 end = 0 for pp, chain in zip(ppb.build_peptides(struct), struct.get_chains()): print (pp) seq = str(pp.get_sequence()) # The sequence is represented as a Biopython Seq object, # and its alphabet is defined by a ProteinAlphabet object. print (seq) self.seq += seq # Get the boundary of the peptide # using residu id # A residue id is a tuple with three elements: # - The hetero-flag # - *The sequence identifier in the chain* # - The insertion code, # start of the polypeptide : pp[0].get_id()[1] # end of the polypeptide : pp[-1].get_id()[1] start = end + 1 print (start) end = start + len(seq)-1 print (end) # |-----------||-------------------| # sA sA sB eB self.chains.append(Chain(chain.id, self.id, start, end)) # Get phi psi angle angles = pp.get_phi_psi_list() # Some are None because : # - Some atoms are missing # -> Phi/Psi cannot be calculated for some residue # - No phi for residue 0 # - No psi for last residue print(angles) for phi, psi in angles: atom_idx += 1 self.angles.append(Angle(self.id, atom_idx, phi, psi))