Exemplo n.º 1
0
def Get_Pairwise(m1, m2):
    """Complete a pairwise global alignment of both model's sequence.
    Input
    -model1, model2
    Output:
    max_pair = return pair of sequences with highest alignment score
    """

    try:
        ppb = pdb.CaPPBuilder()
        for polypeptide in ppb.build_peptides(m1):
            sequence_ref = polypeptide.get_sequence()
        for polypeptide in ppb.build_peptides(m2):
            sequence_sample = polypeptide.get_sequence()
        align = pairwise2.align.globalxx(sequence_ref, sequence_sample)
        max_pair = max(align,key = lambda x:x[2])
        return max_pair
    except UnboundLocalError: #this error raises when the two compared chains are of diferent kind
        return None
Exemplo n.º 2
0
def bonus_9_2(chain1, chain2, struct1, struct2):
    """
    handling the case that the number of atoms is different by align the amino-acids and remove the unnecessary atoms.
    :param chain1: the first protein's chain
    :param chain2: the second protein's chain
    :param struct1: the first protein
    :param struct2: the second protein
    :return: filtered lists of atoms to align
    """
    ppbils = pdb.CaPPBuilder()
    peptide1, peptide2 = [], []
    filter_peptide_by_chain(chain1, peptide1, ppbils, struct1)
    filter_peptide_by_chain(chain2, peptide2, ppbils, struct2)

    # converting list to peptide
    peptide1 = pdb.Polypeptide.Polypeptide(peptide1)
    peptide2 = pdb.Polypeptide.Polypeptide(peptide2)

    # converting peptide to sequence and CA list
    seq1 = peptide1.get_sequence()
    atoms1 = peptide1.get_ca_list()
    seq2 = peptide2.get_sequence()
    atoms2 = peptide2.get_ca_list()

    # align the sequences
    alignments = pairwise2.align.globalxx(seq1, seq2)

    # filter the atoms lists
    ignore_inx1 = [
        i for i in range(len(alignments[0][0])) if alignments[0][0][i] == "-"
    ]
    ignore_inx2 = [
        i for i in range(len(alignments[0][1])) if alignments[0][1][i] == "-"
    ]
    atoms2 = [atoms2[i] for i in range(len(atoms2)) if i not in ignore_inx1]
    atoms1 = [atoms1[i] for i in range(len(atoms1)) if i not in ignore_inx2]
    return atoms1, atoms2
Exemplo n.º 3
0
    def __init__(self, filepath):
        """Constructor of one pdb file : PDBFile.

        Arguments :
        ------------
        filepath : string
            path to the pdb file
        """
        # -----
        # save id extracted from path :
        self.id = filepath[-8:-4]

        # -----
        # init parser :
        parser = PDB.PDBParser()
        struct = parser.get_structure("", filepath)

        # -----
        # extract from header :
        self.keywords = struct.header['keywords']
        self.name = struct.header['name']
        self.head = struct.header['head']
        self.deposition_date = struct.header['deposition_date']
        self.release_date = struct.header['release_date']
        self.structure_method = struct.header['structure_method']
        self.resolution = struct.header['resolution']
        self.structure_reference = str(struct.header['structure_reference'])
        self.journal_reference = struct.header['journal_reference']
        self.author = struct.header['author']
        self.compound = str(struct.header['compound'])

        # -----
        # Get the sequence and the angles

        # extract all polypeptides from the structure :
        ppb = PDB.CaPPBuilder()

        # The sequence of each polypeptide can then easily be obtained
        # from the Polypeptide objects :
        self.seq = ""
        atom_idx = 0
        start = 0
        end = 0

        for pp, chain in zip(ppb.build_peptides(struct), struct.get_chains()):
            print (pp)

            seq = str(pp.get_sequence())
            # The sequence is represented as a Biopython Seq object,
            # and its alphabet is defined by a ProteinAlphabet object.
            print (seq)
            self.seq += seq

            # Get the boundary of the peptide
            # using residu id
            # A residue id is a tuple with three elements:
            # - The hetero-flag
            # - *The sequence identifier in the chain*
            # - The insertion code,
            # start of the polypeptide : pp[0].get_id()[1]
            #  end of the polypeptide : pp[-1].get_id()[1]
            start = end + 1
            print (start)
            end = start + len(seq)-1
            print (end)
            # |-----------||-------------------|
            # sA        sA sB                  eB

            self.chains.append(Chain(chain.id, self.id, start, end))

            # Get phi psi angle
            angles = pp.get_phi_psi_list()
            # Some are None because :
            # - Some atoms are missing
            #   -> Phi/Psi cannot be calculated for some residue
            # - No phi for residue 0
            # - No psi for last residue
            print(angles)

            for phi, psi in angles:
                atom_idx += 1
                self.angles.append(Angle(self.id, atom_idx, phi, psi))