示例#1
0
def write_backbone_angles(chain,
                          region=None,
                          offset=0,
                          outfile=sys.stdout,
                          header=False):
    """
    Write Psi/Phi angles from a pdb file
    """
    if region is None:
        region = (0, float('inf'))

    polypeptide_builder = PPBuilder()
    polypeptides = polypeptide_builder.build_peptides(chain)

    if header:
        print(HEADER, file=outfile)

    for peptide in polypeptides:
        angles = peptide.get_phi_psi_list()
        for residue, (phi, psi) in zip(peptide, angles):
            position = residue.get_id()[1]
            if region[0] <= position <= region[1]:
                print(chain.id,
                      position,
                      seq1(residue.get_resname()),
                      position + offset,
                      'NA' if phi is None else phi * RAD_FACTOR,
                      'NA' if psi is None else psi * RAD_FACTOR,
                      sep='\t',
                      file=outfile)
def SplitChain(PDB_objects):
    """
	Splits a list of PDB files by chain creating one PDB and one FASTA file per chain.
	
	Arguments:

	PDB_objects: list of PDB objects (with many chains) generated by the PDB parser.
	"""

    File_prefix = []

    for pdb in PDB_objects:
        chain_names = set()
        io = PDBIO()

        # Creates a PDB file for each chain of the original file.
        for chain in pdb.get_chains():
            if chain.get_id() not in chain_names:
                io.set_structure(chain)
                io.save(pdb.get_id() + "_" + chain.get_id() + ".pdb")
                File_prefix.append(pdb.get_id() + "_" + chain.get_id())

                # Creates a FASTA file for each chain of the original file.
                polipeptide = PPBuilder()
                for pp in polipeptide.build_peptides(pdb):
                    fasta = open(pdb.get_id() + "_" + chain.get_id() + ".fa",
                                 "w")
                    fasta.write(">" + pdb.get_id() + "_" + chain.get_id() +
                                "\n")
                    fasta.write(str(pp.get_sequence()))

                chain_names.add(chain.get_id())

    return File_prefix
def CreateJoinedFastas(input_PDB_objects):
    """
	Joins many PDB objects and creates a FASTA file with all objects joined.

	Arguments:

	input_PDB_objects: list of PDB objects whose sequence will be added to the FASTA file.
	"""

    polipeptide = PPBuilder()
    first_line = True
    filename = ""

    # Create FASTA files.
    for obj in input_PDB_objects:
        filename = filename + obj.get_id() + "_"
    filename = filename + ".fa"
    joined_fasta = open(filename, 'w')

    # Write FASTA files.
    for obj in input_PDB_objects:
        if first_line:
            joined_fasta.write(">" + obj.get_id() + "\n")
            first_line = False
        else:
            joined_fasta.write("\n" + ">" + obj.get_id() + "\n")
        for polipep in polipeptide.build_peptides(obj):
            joined_fasta.write(str(polipep.get_sequence()))

    return filename
示例#4
0
    def compute_secondary_structure(self, model):
        """
        This function defines all the secondary structures of the model passed in input
        :param model: one model
        :return: the matrix of secondary structures
        """

        # Calculate PSI and PHI
        ppb = PPBuilder()
        rama = {
        }  # { chain : [[residue_1, ...], [phi_residue_1, ...], [psi_residue_2, ...] ] }

        residue_found = 0
        for chain in model:
            for pp in ppb.build_peptides(chain):
                phi_psi = pp.get_phi_psi_list()

                for i, residue in enumerate(pp):

                    if phi_psi[i][0] is not None and phi_psi[i][1] is not None:
                        # Conversion to degrees when the values are not None (for first and last)
                        rama.setdefault(chain.id, [[], [], []])
                        rama[chain.id][0].append(residue)
                        rama[chain.id][1].append(math.degrees(phi_psi[i][0]))
                        rama[chain.id][2].append(math.degrees(phi_psi[i][1]))
                    else:
                        # Adding of Nan if the angles are None (for first and last)
                        rama.setdefault(chain.id, [[], [], []])
                        rama[chain.id][0].append(residue)
                        rama[chain.id][1].append(math.nan)
                        rama[chain.id][2].append(math.nan)

                    residue_found += 1

        # Eventual nan-padding if something goes wrong during the angle computation
        if residue_found < self._residues:
            for i in range(self._residues - residue_found):
                rama.setdefault('Z', [[], [], []])
                rama['Z'][0].append(None)
                rama['Z'][1].append(math.nan)
                rama['Z'][2].append(math.nan)

        # Comparison of the angles with the Ramachandran regions
        ss = []
        for chain_id in rama:
            for residue, phi, psi in zip(*rama[chain_id]):
                ss_class = None
                if math.isnan(phi) and math.isnan(psi):
                    # If nan (angles not available) insert a symbol indicating this situation
                    ss_class = '-'
                else:
                    # Determine the correspondent region and store it
                    for x, y, width, height, ss_c, color in self._ranges:
                        if x <= phi < x + width and y <= psi < y + height:
                            ss_class = ss_c
                            break

                ss.append(ss_class)

        return ss
def get_secondary_structure(structure):
    rama_ss_ranges = [(-180, -180, 80, 60, 'E', 'blue'),
                      (-180, 50, 80, 130, 'E', 'blue'),
                      (-100, -180, 100, 60, 'P', 'green'),
                      (-100, 50, 100, 130, 'P', 'green'),
                      (-180, -120, 180, 170, 'H', 'red'),
                      (0, -180, 180, 360, 'L', 'yellow')]

    # Calculate PSI and PHI
    ppb = PPBuilder()  # PolyPeptideBuilder
    ss = ["" for x in range(N)]
    for chain in structure:
        for pp in ppb.build_peptides(chain):
            phi_psi = pp.get_phi_psi_list(
            )  # [(phi_residue_1, psi_residue_1), ...]
            for i, residue in enumerate(pp):
                # print(model, chain, i, residue, phi_psi[i])
                # Convert radians to degrees and remove first and last value that are None
                if phi_psi[i][0] is not None and phi_psi[i][1] is not None:
                    for x, y, w, h, ss_c, color in rama_ss_ranges:
                        if x <= phi_psi[i][0] < x + w and y <= phi_psi[i][
                                1] < y + h:
                            ss[i] = ss_c
                            break
    return ss
示例#6
0
    def test_ppbuilder_torsion(self):
        """Test phi/psi angles calculated with PPBuilder."""
        ppb = PPBuilder()
        pp = ppb.build_peptides(self.structure)

        phi_psi = pp[0].get_phi_psi_list()
        self.assertIsNone(phi_psi[0][0])
        self.assertAlmostEqual(phi_psi[0][1], -0.46297171497725553, places=3)
        self.assertAlmostEqual(phi_psi[1][0], -1.0873937604007962, places=3)
        self.assertAlmostEqual(phi_psi[1][1], 2.1337707832637109, places=3)
        self.assertAlmostEqual(phi_psi[2][0], -2.4052232743651878, places=3)
        self.assertAlmostEqual(phi_psi[2][1], 2.3807316946081554, places=3)

        phi_psi = pp[1].get_phi_psi_list()
        self.assertIsNone(phi_psi[0][0])
        self.assertAlmostEqual(phi_psi[0][1], -0.6810077089092923, places=3)
        self.assertAlmostEqual(phi_psi[1][0], -1.2654003477656888, places=3)
        self.assertAlmostEqual(phi_psi[1][1], -0.58689987042756309, places=3)
        self.assertAlmostEqual(phi_psi[2][0], -1.7467679151684763, places=3)
        self.assertAlmostEqual(phi_psi[2][1], -1.5655066256698336, places=3)

        phi_psi = pp[2].get_phi_psi_list()
        self.assertIsNone(phi_psi[0][0])
        self.assertAlmostEqual(phi_psi[0][1], -0.73222884210889716, places=3)
        self.assertAlmostEqual(phi_psi[1][0], -1.1044740234566259, places=3)
        self.assertAlmostEqual(phi_psi[1][1], -0.69681334592782884, places=3)
        self.assertAlmostEqual(phi_psi[2][0], -1.8497413300164958, places=3)
        self.assertAlmostEqual(phi_psi[2][1], 0.34762889834809058, places=3)
示例#7
0
 def test_c_n(self):
     """Extract polypeptides using C-N."""
     ppbuild = PPBuilder()
     polypeptides = ppbuild.build_peptides(self.structure[1])
     self.assertEqual(len(polypeptides), 1)
     pp = polypeptides[0]
     # Check the start and end positions
     self.assertEqual(pp[0].get_id()[1], 2)
     self.assertEqual(pp[-1].get_id()[1], 86)
示例#8
0
 def get_structure_sequence(struct):
     # type: (Structure) -> str
     """
     Gets the structure sequence using PPBuilder
     :param struct: Structure object
     :return: struct sequence
     """
     ppb = PPBuilder()
     return ''.join(
         [str(pp.get_sequence()) for pp in ppb.build_peptides(struct)])
示例#9
0
 def test_polypeptide(self):
     """Tests on polypetide class and methods."""
     p = PDBParser(PERMISSIVE=True)
     pdb1 = "PDB/1A8O.pdb"
     s = p.get_structure("scr", pdb1)
     ppb = PPBuilder()
     pp = ppb.build_peptides(s)
     self.assertEqual(str(pp[0].get_sequence()),
                      "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW")
     self.assertEqual(str(pp[1].get_sequence()),
                      "TETLLVQNANPDCKTILKALGPGATLEE")
     self.assertEqual(str(pp[2].get_sequence()), "TACQG")
     phi_psi = pp[0].get_phi_psi_list()
     self.assertEqual(phi_psi[0][0], None)
     self.assertAlmostEqual(phi_psi[0][1], -0.46297171497725553, places=3)
     self.assertAlmostEqual(phi_psi[1][0], -1.0873937604007962, places=3)
     self.assertAlmostEqual(phi_psi[1][1], 2.1337707832637109, places=3)
     self.assertAlmostEqual(phi_psi[2][0], -2.4052232743651878, places=3)
     self.assertAlmostEqual(phi_psi[2][1], 2.3807316946081554, places=3)
     phi_psi = pp[1].get_phi_psi_list()
     self.assertEqual(phi_psi[0][0], None)
     self.assertAlmostEqual(phi_psi[0][1], -0.6810077089092923, places=3)
     self.assertAlmostEqual(phi_psi[1][0], -1.2654003477656888, places=3)
     self.assertAlmostEqual(phi_psi[1][1], -0.58689987042756309, places=3)
     self.assertAlmostEqual(phi_psi[2][0], -1.7467679151684763, places=3)
     self.assertAlmostEqual(phi_psi[2][1], -1.5655066256698336, places=3)
     phi_psi = pp[2].get_phi_psi_list()
     self.assertEqual(phi_psi[0][0], None)
     self.assertAlmostEqual(phi_psi[0][1], -0.73222884210889716, places=3)
     self.assertAlmostEqual(phi_psi[1][0], -1.1044740234566259, places=3)
     self.assertAlmostEqual(phi_psi[1][1], -0.69681334592782884, places=3)
     self.assertAlmostEqual(phi_psi[2][0], -1.8497413300164958, places=3)
     self.assertAlmostEqual(phi_psi[2][1], 0.34762889834809058, places=3)
     ppb = CaPPBuilder()
     pp = ppb.build_peptides(s)
     self.assertEqual(str(pp[0].get_sequence()),
                      "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW")
     self.assertEqual(str(pp[1].get_sequence()),
                      "TETLLVQNANPDCKTILKALGPGATLEE")
     self.assertEqual(str(pp[2].get_sequence()), "TACQG")
     self.assertEqual([ca.serial_number for ca in pp[0].get_ca_list()], [
         10, 18, 26, 37, 46, 50, 57, 66, 75, 82, 93, 104, 112, 124, 131,
         139, 150, 161, 173, 182, 189, 197, 208, 213, 222, 231, 236, 242,
         251, 260, 267, 276, 284
     ])
     taus = pp[1].get_tau_list()
     self.assertAlmostEqual(taus[0], 0.3597907225123525, places=3)
     self.assertAlmostEqual(taus[1], 0.43239284636769254, places=3)
     self.assertAlmostEqual(taus[2], 0.99820157492712114, places=3)
     thetas = pp[2].get_theta_list()
     self.assertAlmostEqual(thetas[0], 1.6610069445335354, places=3)
     self.assertAlmostEqual(thetas[1], 1.7491703334817772, places=3)
     self.assertAlmostEqual(thetas[2], 2.0702447422720143, places=3)
def is_protein(chain):
    """
        Check if chain is a protein.

    :param chain:
    :return:
    """
    ppb = PPBuilder()
    for pp in ppb.build_peptides(chain):
        if len(pp.get_sequence()) > 0:
            return True
    return False
def chain_to_one_pp(chain):
    ppb = PPBuilder()

    polypeptides = ppb.build_peptides(chain)

    if len(polypeptides) != 1:
        print('warning ', len(polypeptides),
              ' polypeptides from one chain, extending first pp')

        for pp in polypeptides[1:]:
            polypeptides[0].extend(pp)

    return polypeptides[0]
示例#12
0
def structure_filtered_dca_get_sequence_from_structure(structure):

    from Bio.PDB import PPBuilder

    sequence = ""

    ppb = PPBuilder(radius=10.0)

    for pp in ppb.build_peptides(structure, aa_only=False):

        sequence += '%s\n' % pp.get_sequence()

    return sequence.replace('\n', '')
示例#13
0
def run_test():
    from Bio.PDB import PDBParser, PPBuilder, CaPPBuilder


    # first make a PDB parser object
    p=PDBParser(PERMISSIVE=1) 

    # get the structure, call it "example"
    structure=p.get_structure("example", "PDB/a_structure.pdb")

    # now loop over content and print some info
    for model in structure.get_list():
        model_id=model.get_id()
        print "Model %i contains %i chains." % (model_id, len(model))
        for chain in model.get_list():
            chain_id=chain.get_id()
            print "\tChain '%s' contains %i residues." % (chain_id, len(chain))
            for residue in chain.get_list():
                residue_id=residue.get_id()
                hetfield, resseq, icode=residue_id
                print "\t\tResidue ('%s', %i, '%s') contains %i atoms." % (hetfield, resseq, icode, len(residue))
                # check if there is disorder due to a point mutation --- this is rare
                if residue.is_disordered()==2:
                    print "\t\t\tThere is a point mutation present in the crystal at this position."
                    s="\t\t\tResidues at this position are "
                    for resname in residue.disordered_get_id_list():
                        s=s+resname+" "
                    print s[:-1]+"."
                # count the number of disordered atoms
                if residue.is_disordered()==1:
                    disordered_count=0
                    for atom in residue.get_list():
                        if atom.is_disordered():
                            disordered_count=disordered_count+1
                    if disordered_count>0:
                        print "\t\t\tThe residue contains %i disordered atoms." % disordered_count


    print "Polypeptides using C-N"
    ppb=PPBuilder()
    for pp in ppb.build_peptides(structure[1]):
        print pp

    print "Polypeptides using CA-CA"
    ppb=CaPPBuilder()
    for pp in ppb.build_peptides(structure[1]):
        print pp

    print "NeighborSearch test"
    quick_neighbor_search_test()
示例#14
0
def run(infile, splitpdb):
    parser = PDBParser()
    struct = parser.get_structure('mystruct', infile)
    ppb = PPBuilder()

    basename = os.path.basename(infile)
    prefix = os.path.splitext(basename)[0]
    if splitpdb == 0:  # We do NOT split the PDB and fasta files!
        seqfile = open(prefix + '.fasta', 'w')
        pdbio = PDBIO_RPL.PDBIO()
        pdbio.set_structure(struct)
        cleanfile = prefix + '_clean.pdb'
        pdbio.save(cleanfile)
    ListChains = []
    for model in struct:
        for chain in model:
            ListChains.append(chain.id)
            ListPpdb = ppb.build_peptides(chain)
            if (len(ListPpdb) > 0):
                for index, pp in enumerate(ListPpdb):
                    #                    print(chain.id,index,pp.get_sequence(),pp
                    if splitpdb == 1:  # We split the PDB and fasta files!
                        seqfile = open(
                            prefix + '_' + chain.id + '.' + str(index) +
                            '.fasta', 'w')
                    seq = pp.get_sequence()
                    seqfile.write('>%s %s\n' % (prefix + '_chain_' + chain.id +
                                                '_' + str(index), len(seq)))
                    seqfile.write('%s' % seq)
                    seqfile.write('\n')
                    if splitpdb == 1:  # We split the PDB and fasta files!
                        seqfile.close()
                        startres = pp[0].id[1]
                        endres = pp[-1].id[1]
                        ofile = prefix + '_' + chain.id + '.' + str(
                            index) + '.pdb'
                        Dice_RPL.extract(struct, chain.id, startres, endres,
                                         ofile)
            else:
                #               Also split chains that do not consist of amino acids!
                ChainList = chain.get_list()
                startres = ChainList[0].id[1]
                endres = ChainList[0].id[-1]
                ofile = prefix + '_' + chain.id + '.' + str(index) + '.pdb'
                Dice_RPL.extract(struct, chain.id, startres, endres, ofile)
    if splitpdb == 0:  # We do NOT split the PDB and fasta files!
        seqfile.close()

    return ListChains
示例#15
0
def run_test():
    from Bio.PDB import PDBParser, PPBuilder, CaPPBuilder

    # first make a PDB parser object
    p = PDBParser(PERMISSIVE=1)

    # get the structure, call it "example"
    structure = p.get_structure("example", "PDB/a_structure.pdb")

    # now loop over content and print some info
    for model in structure.get_list():
        model_id = model.get_id()
        print "Model %i contains %i chains." % (model_id, len(model))
        for chain in model.get_list():
            chain_id = chain.get_id()
            print "\tChain '%s' contains %i residues." % (chain_id, len(chain))
            for residue in chain.get_list():
                residue_id = residue.get_id()
                hetfield, resseq, icode = residue_id
                print "\t\tResidue ('%s', %i, '%s') contains %i atoms." % (
                    hetfield, resseq, icode, len(residue))
                # check if there is disorder due to a point mutation --- this is rare
                if residue.is_disordered() == 2:
                    print "\t\t\tThere is a point mutation present in the crystal at this position."
                    s = "\t\t\tResidues at this position are "
                    for resname in residue.disordered_get_id_list():
                        s = s + resname + " "
                    print s[:-1] + "."
                # count the number of disordered atoms
                if residue.is_disordered() == 1:
                    disordered_count = 0
                    for atom in residue.get_list():
                        if atom.is_disordered():
                            disordered_count = disordered_count + 1
                    if disordered_count > 0:
                        print "\t\t\tThe residue contains %i disordered atoms." % disordered_count

    print "Polypeptides using C-N"
    ppb = PPBuilder()
    for pp in ppb.build_peptides(structure[1]):
        print pp

    print "Polypeptides using CA-CA"
    ppb = CaPPBuilder()
    for pp in ppb.build_peptides(structure[1]):
        print pp

    print "NeighborSearch test"
    quick_neighbor_search_test()
示例#16
0
 def test_c_n(self):
     """Extract polypeptides using C-N."""
     ppbuild = PPBuilder()
     polypeptides = ppbuild.build_peptides(self.structure[1])
     self.assertEqual(len(polypeptides), 1)
     pp = polypeptides[0]
     # Check the start and end positions
     self.assertEqual(pp[0].get_id()[1], 2)
     self.assertEqual(pp[-1].get_id()[1], 86)
     # Check the sequence
     s = pp.get_sequence()
     self.assertTrue(isinstance(s, Seq))
     self.assertEqual(s.alphabet, generic_protein)
     self.assertEqual("RCGSQGGGSTCPGLRCCSIWGWCGDSEPYCGRTCENKCWSGER"
                      "SDHRCGAAVGNPPCGQDRCCSVHGWCGGGNDYCSGGNCQYRC",
                      str(s))
示例#17
0
 def test_insertions(self):
     """Test file with residue insertion codes."""
     parser = MMCIFParser(QUIET=1)
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", PDBConstructionWarning)
         structure = parser.get_structure("example", "PDB/4ZHL.cif")
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         # First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 2)
         pp = polypeptides[0]
         # Check the start and end positions (first segment only)
         self.assertEqual(pp[0].get_id()[1], 16)
         self.assertEqual(pp[-1].get_id()[1], 244)
         # Check the sequence
         refseq = (
             "IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATHCFIDYPKKEDYIVYLGR"
             "SRLNSNTQGEMKFEVENLILHKDYSADTLAYHNDIALLKIRSKEGRCAQPSRTIQTIALPSMY"
             "NDPQFGTSCEITGFGKEQSTDYLYPEQLKMTVVKLISHRECQQPHYYGSEVTTKMLCAADPQW"
             "KTDSCQGDSGGPLVCSLQGRMTLTGIVSWGRGCALKDKPGVYTRVSHFLPWIRSHTKE"
         )
         s = pp.get_sequence()
         self.assertIsInstance(s, Seq)
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual(refseq, str(s))
示例#18
0
def read_pdb_file(file_name, name=None):
    """
    Extract info from a PDB file
        file_name: path of pdb file
        name: name of the structure (default name of the file without extension)
        return:: (structure,R,polypeptides,sequence,seq_res_dict)

            structure: structure object
            residues: list of residues
            polypeptides: list of polypeptides in the structure
            sequence: combined sequence (for all polypeptides)
            seq_res_dict: Sequence to residues mapping index list, sequence[i] corresponds to
                residues[seq_res_dict[i]]
    """

    if name is None:
        name = splitext(file_name)[0]

    structure = PDBParser().get_structure(name, file_name)

    if len(structure) != 1:
        raise ValueError("Unexpected number of structures in " + name)

    # residues = Selection.unfold_entities(structure, 'R')
    atoms = Selection.unfold_entities(structure, 'A')
    polypeptides = PPBuilder().build_peptides(structure)
    if len(polypeptides) == 0:
        polypeptides = CaPPBuilder().build_peptides(structure)
    sequence = ''.join([str(p.get_sequence()) for p in polypeptides])
    residues = [
        residue for polypeptide in polypeptides for residue in polypeptide
    ]
    protein_name = os.path.basename(file_name).replace(".pdb", "")
    return protein_name, structure, residues, sequence, atoms
示例#19
0
    def get_sequence(self, chain_id):
        """
			Input:
				self: Use Biopython.PDB structure which has been stored in an object variable
				chain_id  : String (usually in ['A','B', 'C' ...]. The number of chains
						depends on the specific protein and the resulting structure)
			Return:
				Return the amino acid sequence (single-letter alphabet!) of a given chain (chain_id)
				in a Biopython.PDB structure as a string.
		"""
        sequence = 'SEQWENCE'

        ppb = PPBuilder()

        for pp in ppb.build_peptides(self.structure[0][chain_id]):
            return pp.get_sequence()
示例#20
0
 def test_c_n(self):
     """Extract polypeptides using C-N."""
     ppbuild = PPBuilder()
     polypeptides = ppbuild.build_peptides(self.structure[1])
     self.assertEqual(len(polypeptides), 1)
     pp = polypeptides[0]
     # Check the start and end positions
     self.assertEqual(pp[0].get_id()[1], 2)
     self.assertEqual(pp[-1].get_id()[1], 86)
     # Check the sequence
     s = pp.get_sequence()
     self.assertTrue(isinstance(s, Seq))
     self.assertEqual(s.alphabet, generic_protein)
     self.assertEqual("RCGSQGGGSTCPGLRCCSIWGWCGDSEPYCGRTCENKCWSGER"
                      "SDHRCGAAVGNPPCGQDRCCSVHGWCGGGNDYCSGGNCQYRC",
                      str(s))
示例#21
0
 def _pp(self, pdb_path, chain_id):
     pdb_id = Path(pdb_path).stem
     pp_list = PPBuilder().build_peptides(PDBParser().get_structure(
         pdb_id, pdb_path)[0][chain_id])
     pp = pp_list[0]
     for i in pp_list[1:]:
         pp += i
     return pp
示例#22
0
 def test_polypeptide(self):
     """Tests on polypetide class and methods."""
     p = PDBParser(PERMISSIVE=True)
     pdb1 = "PDB/1A8O.pdb"
     s = p.get_structure("scr", pdb1)
     ppb = PPBuilder()
     pp = ppb.build_peptides(s)
     self.assertEqual(str(pp[0].get_sequence()), "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW")
     self.assertEqual(str(pp[1].get_sequence()), "TETLLVQNANPDCKTILKALGPGATLEE")
     self.assertEqual(str(pp[2].get_sequence()), "TACQG")
     phi_psi = pp[0].get_phi_psi_list()
     self.assertEqual(phi_psi[0][0], None)
     self.assertAlmostEqual(phi_psi[0][1], -0.46297171497725553, places=3)
     self.assertAlmostEqual(phi_psi[1][0], -1.0873937604007962, places=3)
     self.assertAlmostEqual(phi_psi[1][1], 2.1337707832637109, places=3)
     self.assertAlmostEqual(phi_psi[2][0], -2.4052232743651878, places=3)
     self.assertAlmostEqual(phi_psi[2][1], 2.3807316946081554, places=3)
     phi_psi = pp[1].get_phi_psi_list()
     self.assertEqual(phi_psi[0][0], None)
     self.assertAlmostEqual(phi_psi[0][1], -0.6810077089092923, places=3)
     self.assertAlmostEqual(phi_psi[1][0], -1.2654003477656888, places=3)
     self.assertAlmostEqual(phi_psi[1][1], -0.58689987042756309, places=3)
     self.assertAlmostEqual(phi_psi[2][0], -1.7467679151684763, places=3)
     self.assertAlmostEqual(phi_psi[2][1], -1.5655066256698336, places=3)
     phi_psi = pp[2].get_phi_psi_list()
     self.assertEqual(phi_psi[0][0], None)
     self.assertAlmostEqual(phi_psi[0][1], -0.73222884210889716, places=3)
     self.assertAlmostEqual(phi_psi[1][0], -1.1044740234566259, places=3)
     self.assertAlmostEqual(phi_psi[1][1], -0.69681334592782884, places=3)
     self.assertAlmostEqual(phi_psi[2][0], -1.8497413300164958, places=3)
     self.assertAlmostEqual(phi_psi[2][1], 0.34762889834809058, places=3)
     ppb = CaPPBuilder()
     pp = ppb.build_peptides(s)
     self.assertEqual(str(pp[0].get_sequence()), "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW")
     self.assertEqual(str(pp[1].get_sequence()), "TETLLVQNANPDCKTILKALGPGATLEE")
     self.assertEqual(str(pp[2].get_sequence()), "TACQG")
     self.assertEqual([ca.serial_number for ca in pp[0].get_ca_list()], [10, 18, 26, 37, 46, 50, 57, 66, 75, 82, 93, 104, 112, 124, 131, 139, 150, 161, 173, 182, 189, 197, 208, 213, 222, 231, 236, 242, 251, 260, 267, 276, 284])
     taus = pp[1].get_tau_list()
     self.assertAlmostEqual(taus[0], 0.3597907225123525, places=3)
     self.assertAlmostEqual(taus[1], 0.43239284636769254, places=3)
     self.assertAlmostEqual(taus[2], 0.99820157492712114, places=3)
     thetas = pp[2].get_theta_list()
     self.assertAlmostEqual(thetas[0], 1.6610069445335354, places=3)
     self.assertAlmostEqual(thetas[1], 1.7491703334817772, places=3)
     self.assertAlmostEqual(thetas[2], 2.0702447422720143, places=3)
示例#23
0
    def test_ppbuilder_real_nonstd(self):
        """Test PPBuilder on real PDB file allowing non-standard amino acids."""
        ppb = PPBuilder()
        pp = ppb.build_peptides(self.structure, False)

        self.assertEqual(len(pp), 1)

        # Check the start and end positions
        self.assertEqual(pp[0][0].get_id()[1], 151)
        self.assertEqual(pp[0][-1].get_id()[1], 220)

        # Check the sequence
        s = pp[0].get_sequence()
        self.assertIsInstance(s, Seq)
        # Here non-standard MSE are shown as M
        self.assertEqual(
            "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG",
            s)
示例#24
0
	def getSeqLocation(self, seq):		# return sequence position and chain id
		ppb=PPBuilder()
		bltpep = ppb.build_peptides(self.__struct[0])
		for pp in bltpep: 
			beg = 0
			end = 0
			s = str(pp.get_sequence())
			ind = s.find(seq, 0, len(s))
			if (ind != -1):
				beg = beg + ind
				end = beg + len(seq) - 1
				chain = pp[0].get_parent().get_id()
				break	
		if beg == end == 0:	
			line = '\n' + seq + ' not found in '+str(self.__struct.get_id()) + '!\n'
			self.printerr(line)
			return None, None, None
		return beg, end, chain
示例#25
0
def get_sequence(pdb, chain):
        pdb_parser = PDBParser(PERMISSIVE=0)                    # The PERMISSIVE instruction allows PDBs presenting errors.
        pdb_structure = pdb_parser.get_structure(pdb,pdb)

	pdb_chain = pdb_structure[0][chain]
	ppb=PPBuilder()
	Sequence = ""
	for pp in ppb.build_peptides(pdb_chain):
		Sequence = Sequence + pp.get_sequence()

	io = PDBIO()
	io.set_structure(pdb_structure)
	output = pdb[-8:-4] +"_"+chain+".pdb"
#        output = pdb
	out = open(output[:-4]+chain+".fasta.txt","w")
	out.write(">"+pdb[:-4]+chain+"\n")
	out.write(str(Sequence)+"\n")
	out.close()
	io.save(output,SelectChains(chain))
示例#26
0
def get_pp(pdb, chain, start, length, seq):
    """retrieve the residiues for a given pdb file and chain as polypeptides"""
    f = make_filename(pdb)
    p = PDBParser(PERMISSIVE=1)
    pdb_struct = p.get_structure(
        pdb, f)  # Load the pdb structure pdb contained on the file f.
    pdb_chain = pdb_struct[0][
        chain]  # Select the right Chain of the structure.
    ppb = PPBuilder()  # Initialize a peptide builder.
    peptides = ppb.build_peptides(
        pdb_chain)  # Load the given chain as a peptide.
    for i, pep in enumerate(peptides):
        if str(pep.get_sequence()).find(seq) != -1:
            start = str(pep.get_sequence()).find(seq)
            break
    if start > 0 and (start + length + 2) <= len(pep):
        pp = pep[(start - 1):(start + length + 2)]
        return pp
    else:
        raise
示例#27
0
 def test_parser(self):
     """Extract polypeptides from 1A80."""
     parser = MMCIFParser()
     structure = parser.get_structure("example", "PDB/1A8O.cif")
     self.assertEqual(len(structure), 1)
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         # ==========================================================
         # Check that serial_num (model column) is stored properly
         self.assertEqual(structure[0].serial_num, 1)
         # First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 151)
         self.assertEqual(pp[-1].get_id()[1], 220)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         # Here non-standard MSE are shown as M
         self.assertEqual(
             "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
             "NANPDCKTILKALGPGATLEEMMTACQG", str(s))
         # ==========================================================
         # Now try strict version with only standard amino acids
         # Should ignore MSE 151 at start, and then break the chain
         # at MSE 185, and MSE 214,215
         polypeptides = ppbuild.build_peptides(structure[0], True)
         self.assertEqual(len(polypeptides), 3)
         # First fragment
         pp = polypeptides[0]
         self.assertEqual(pp[0].get_id()[1], 152)
         self.assertEqual(pp[-1].get_id()[1], 184)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))
         # Second fragment
         pp = polypeptides[1]
         self.assertEqual(pp[0].get_id()[1], 186)
         self.assertEqual(pp[-1].get_id()[1], 213)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))
         # Third fragment
         pp = polypeptides[2]
         self.assertEqual(pp[0].get_id()[1], 216)
         self.assertEqual(pp[-1].get_id()[1], 220)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TACQG", str(s))
示例#28
0
	def getRegionsResidues(self):		# fill self.__regions_res dictionary with list of residues
		ppb=PPBuilder()			# for every region contained in self.__regions_res
		res = []
		bltpep = ppb.build_peptides(self.__struct[0])
		for key in self.__regions_res:
			for pp in bltpep: 
				s = str(pp.get_sequence())
				reg_seq = list(self.__regions.get_group(key)['tcr_region_seq'])[0]
				ind = s.find(reg_seq, 0, len(s))
				if (ind != -1):
					for i in range(ind, ind + len(reg_seq)):
						res.append(pp[i])
					self.__regions_res[key] = res
					break
			if not res:	
				line = '\n' + reg_seq + ' not found in '+ self.__name + '!\n'
				self.printerr('getRegionResidues(): ' + line)
				return 0
			res = []
		return 1
示例#29
0
    def testModels(self):
        """Test file with multiple models."""
        parser = MMCIFParser(QUIET=1)
        f_parser = FastMMCIFParser(QUIET=1)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            structure = parser.get_structure("example", "PDB/1LCD.cif")
            f_structure = f_parser.get_structure("example", "PDB/1LCD.cif")

        self.assertEqual(len(structure), 3)
        self.assertEqual(len(f_structure), 3)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(structure[1].serial_num, 2)
            self.assertEqual(structure[2].serial_num, 3)
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            # Here non-standard MSE are shown as M
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)
            )
            # ==========================================================
            # Now try strict version with only standard amino acids
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)
            )

        # This structure contains several models with multiple lengths.
        # The tests were failing.
        structure = parser.get_structure("example", "PDB/2OFG.cif")
        self.assertEqual(len(structure), 3)
示例#30
0
def get_sequence(pdb, chain):
    if chain is "%":
        chain = " "
    warnings.filterwarnings('always', message='.*discontinuous at.*')
    pdb_parser = PDBParser(
        PERMISSIVE=0, QUIET=True
    )  # The PERMISSIVE instruction allows PDBs presenting errors.
    pdb_structure = pdb_parser.get_structure(pdb, pdb)

    pdb_chain = pdb_structure[0][chain]
    ppb = PPBuilder()
    Sequence = ""
    for pp in ppb.build_peptides(pdb_chain, aa_only=False):
        Sequence = Sequence + pp.get_sequence()

    io = PDBIO()
    io.set_structure(pdb_structure)
    output = pdb[0:-4] + ".pdb"
    out = open(output[:-4] + ".fasta.atom", "w")
    out.write(">" + pdb[0:-4] + "\n")
    out.write(str(Sequence) + "\n")
    out.close()
示例#31
0
def get_sequence(pdb, chain, first, last, output):
    pdb_parser = PDBParser(PERMISSIVE=0)                    # The PERMISSIVE instruction allows PDBs presenting errors.
    pdb_structure = pdb_parser.get_structure(pdb,pdb)

    pdb_chain = pdb_structure[0][chain]
    ppb=PPBuilder()
    Sequence = ""
    for pp in ppb.build_peptides(pdb_chain):
        Sequence = Sequence + pp.get_sequence()

    io = PDBIO()
    io.set_structure(pdb_structure)
#        if pdb[-5] == chain:
#            output = pdb
#        else:
#            output = pdb[:-4]+chain+".pdb"
### writing out sequence to fasta
#    out = open(output[:-4]+".fasta.txt","w")
#    out.write(">"+output[:-4]+"\n")
#        out.write(str(Sequence[first-1: last-2])+"\n")
#        out.close()
    io.save(output,SelectDomain(chain, first, last))
示例#32
0
    def test_ppbuilder_real(self):
        """Test PPBuilder on real PDB file."""
        ppb = PPBuilder()
        pp = ppb.build_peptides(self.structure)

        self.assertEqual(len(pp), 3)

        # Check termini
        self.assertEqual(pp[0][0].get_id()[1], 152)
        self.assertEqual(pp[0][-1].get_id()[1], 184)
        self.assertEqual(pp[1][0].get_id()[1], 186)
        self.assertEqual(pp[1][-1].get_id()[1], 213)
        self.assertEqual(pp[2][0].get_id()[1], 216)
        self.assertEqual(pp[2][-1].get_id()[1], 220)

        # Now check sequences
        pp0_seq = pp[0].get_sequence()
        pp1_seq = pp[1].get_sequence()
        pp2_seq = pp[2].get_sequence()
        self.assertIsInstance(pp0_seq, Seq)
        self.assertEqual(pp0_seq, "DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW")
        self.assertEqual(pp1_seq, "TETLLVQNANPDCKTILKALGPGATLEE")
        self.assertEqual(pp2_seq, "TACQG")
示例#33
0
def get_sequence(pdb, chain):
    pdb_parser = PDBParser(
        PERMISSIVE=0
    )  # The PERMISSIVE instruction allows PDBs presenting errors.
    pdb_structure = pdb_parser.get_structure(pdb, pdb)

    pdb_chain = pdb_structure[0][chain]
    ppb = PPBuilder()
    Sequence = ""
    for pp in ppb.build_peptides(pdb_chain):
        Sequence = Sequence + pp.get_sequence()
    start = [residue.id[1] for residue in pdb_chain][0]
    if start is not 1:
        for residue in pdb_chain:
            residue.id = (' ', residue.id[1] - start + 1, ' ')
    io = PDBIO()
    io.set_structure(pdb_structure)
    #        output = pdb[-8:-4] +"_"+chain+".pdb"
    output = "renumbered_" + pdb
    #        out = open(output[:-4]+".fasta.txt","w")
    #        out.write(">"+pdb[-8:-4]+"_"+chain+"\n")
    #        out.write(str(Sequence))
    #        out.close()
    io.save(output, SelectChains(chain))
示例#34
0
    def get_ignored_res(file: str):
        x, y, ignored, output = [], [], [], {}
        for model in PDBParser().get_structure(id=None, file=file):
            for chain in model:
                peptides = PPBuilder().build_peptides(chain)
                for peptide in peptides:
                    for aa, angles in zip(peptide, peptide.get_phi_psi_list()):
                        residue = chain.id + ":" + aa.resname + str(aa.id[1])
                        output[residue] = angles

        for key, value in output.items():
            # Only get residues with both phi and psi angles
            if value[0] and value[1]:
                x.append(value[0] * 180 / pi)
                y.append(value[1] * 180 / pi)
            else:
                ignored.append((key, value))

        return output, ignored, x, y
示例#35
0
def split_pdb_by_chain(pdb_id):
    if not os.path.isdir("pdb_chains/" + pdb_id.upper()):
        os.mkdir("pdb_chains/" + pdb_id.upper())
    actual_pdbfile = PDBParser().get_structure(
        pdb_id, "ent_files/pdb" + pdb_id.lower() + ".ent")
    return_dict = dict()
    for model in actual_pdbfile:
        for chain in model:
            outfilename = pdb_id.upper() + "-" + str(
                model.get_id() + 1) + "_" + str(chain.get_id()) + ".pdb"
            if not os.path.isfile("pdb_chains/" + pdb_id.upper() + "/" +
                                  outfilename):
                io = PDBIO()
                io.set_structure(chain)
                io.save("pdb_chains/" + pdb_id.upper() + "/" + outfilename)
            ppb = PPBuilder().build_peptides(chain)
            this_seq = Seq("", generic_protein)
            for pp in ppb:
                this_seq += pp.get_sequence()
            return_dict[outfilename] = this_seq
    return return_dict
示例#36
0
 def testModels(self):
     """Test file with multiple models"""
     parser = MMCIFParser()
     structure = parser.get_structure("example", "PDB/1LCD.cif")
     self.assertEqual(len(structure), 3)
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         #==========================================================
         # Check that serial_num (model column) is stored properly
         self.assertEqual(structure[0].serial_num, 1)
         self.assertEqual(structure[1].serial_num, 2)
         self.assertEqual(structure[2].serial_num, 3)
         #First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 1)
         self.assertEqual(pp[-1].get_id()[1], 51)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         #Here non-standard MSE are shown as M
         self.assertEqual(
             "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s))
         #==========================================================
         #Now try strict version with only standard amino acids
         polypeptides = ppbuild.build_peptides(structure[0], True)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 1)
         self.assertEqual(pp[-1].get_id()[1], 51)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual(
             "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s))
示例#37
0
    def create_rotamers(self, structure, pdb_path):
        wt_lookup = {} #used to match WT seq_number to WT residue record
        pdbseq = {} #used to keep track of pdbseq residue positions vs index in seq
        ref_positions = {} #WT postions in alignment
        mapped_seq = {} # index in contruct, tuple of AA and WT [position,AA]

        preferred_chain = structure.preferred_chain

        if len(preferred_chain.split(','))>1: #if A,B
            preferred_chain = preferred_chain.split(',')[0]


        AA = {'ALA':'A', 'ARG':'R', 'ASN':'N', 'ASP':'D',
     'CYS':'C', 'GLN':'Q', 'GLU':'E', 'GLY':'G',
     'HIS':'H', 'ILE':'I', 'LEU':'L', 'LYS':'K',
     'MET':'M', 'PHE':'F', 'PRO':'P', 'SER':'S',
     'THR':'T', 'TRP':'W', 'TYR':'Y', 'VAL':'V'}


        s = PDBParser(PERMISSIVE=True, QUIET=True).get_structure('ref', pdb_path)[0]
        chain = s[preferred_chain] #select only one chain (avoid n-mer receptors)
        ppb=PPBuilder()
        seq = ''
        i = 1

        check_1000 = 0
        for pp in ppb.build_peptides(chain): #remove >1000 pos (fusion protein / gprotein)
            for res in pp:
                id = res.id
                if id[1]<600: 
                    check_1000 += 1
                    #need check_1000 to catch structures where they lie in 1000s (4LDE, 4LDL, 4LDO, 4N4W, 4QKX)
                if id[1]>1000 and check_1000>200: 
                    chain.detach_child(id)

        for pp in ppb.build_peptides(chain): 
            seq += str(pp.get_sequence()) #get seq from fasta (only chain A)
            for residue in pp:
                residue_id = residue.get_full_id()
                chain = residue_id[2]
                if chain not in pdbseq:
                    pdbseq[chain] = {}
                pos = residue_id[3][1]
                pdbseq[chain][pos] = [i,AA[residue.resname]]
                i += 1

        parent_seq = str(structure.protein_conformation.protein.parent.sequence)

        rs = Residue.objects.filter(protein_conformation__protein=structure.protein_conformation.protein.parent).prefetch_related('display_generic_number','generic_number','protein_segment')

        for r in rs: #required to match WT position to a record (for duplication of GN values)
            wt_lookup[r.sequence_number] = r

        #align WT with structure seq -- make gaps penalties big, so to avoid too much overfitting
        pw2 = pairwise2.align.localms(parent_seq, seq, 2, -4, -4, -.1)

        gaps = 0
        unmapped_ref = {}
        for i, r in enumerate(pw2[0][0], 1): #loop over alignment to create lookups (track pos)
            #print(i,r,pw2[0][1][i-1]) #print alignment for sanity check
            if r == "-":
                gaps += 1
            if r != "-":
                ref_positions[i] = [i-gaps,r]
            elif r == "-":
                ref_positions[i] = [None,'-']

            if pw2[0][1][i-1]=='-':
                unmapped_ref[i-gaps] = '-'

        gaps = 0
        for i, r in enumerate(pw2[0][1], 1): #make second lookup
            if r == "-":
                gaps += 1
            if r != "-":
                mapped_seq[i-gaps] = [r,ref_positions[i]]


        pdb = structure.pdb_data.pdb
        protein_conformation=structure.protein_conformation
        temp = ''
        check = 0
        errors = 0
        mismatch_seq = 0
        match_seq = 0
        not_matched = 0
        matched_by_pos = 0
        aa_mismatch = 0

        pdblines_temp = pdb.splitlines()
        pdblines = []
        for line in pdblines_temp: #Get rid of all odd records
            if line.startswith('ATOM'):
                pdblines.append(line)
        pdblines.append('') #add a line to not "run out"

        for i,line in enumerate(pdblines):
            if line.startswith('ATOM'): 
                chain = line[21]
                if preferred_chain and chain!=preferred_chain: #If perferred is defined and is not the same as the current line, then skip
                    pass
                else:   
                    nextline = pdblines[i+1]
                    residue_number = line[22:26].strip()
                    if (check==0 or nextline[22:26].strip()==check) and nextline.startswith('TER')==False and nextline.startswith('ATOM')==True: #If this is either the begining or the same as previous line add to current rotamer
                        temp += line + "\n"
                        #print('same res',pdb.splitlines()[i+1])
                    else: #if this is a new residue
                        #print(pdb.splitlines()[i+1][22:26].strip(),check)
                        temp += line + "\n"
                        if int(check.strip())<2000:
                            residue = Residue()
                            residue.sequence_number = int(check.strip())
                            residue.amino_acid = AA[residue_name.upper()]
                            residue.protein_conformation = protein_conformation

                            #print(residue.sequence_number,residue.amino_acid) #sanity check
                            try:
                                seq_num_pos = pdbseq[chain][residue.sequence_number][0]
                            except:
                                #print('failed residue',pdb_path,residue.sequence_number)
                                temp = "" #start new line for rotamer
                                check = pdblines[i+1][22:26].strip()
                                continue
                            if seq_num_pos in mapped_seq:
                                if mapped_seq[seq_num_pos][1][0]==None:
                                    #print('no match found') #sanity check
                                    #print(residue.sequence_number,residue.amino_acid) #sanity check
                                    residue.display_generic_number = None
                                    residue.generic_number = None
                                    residue.protein_segment = None
                                    not_matched +=1
                                else:
                                    wt_r = wt_lookup[mapped_seq[seq_num_pos][1][0]]
                                    if residue.sequence_number!=wt_r.sequence_number and residue.amino_acid!=wt_r.amino_acid and residue.sequence_number in wt_lookup: #if pos numbers not work -- see if the pos number might be in WT and unmapped
                                        if wt_lookup[residue.sequence_number].amino_acid==residue.amino_acid:
                                            if residue.sequence_number in unmapped_ref: #WT was not mapped, so could be it
                                               # print(residue.sequence_number,residue.amino_acid) #sanity check
                                                #print('wrongly matched, better match on pos+aa',residue.sequence_number,residue.amino_acid,wt_r.sequence_number,wt_r.amino_acid)
                                                wt_r = wt_lookup[residue.sequence_number]
                                                matched_by_pos +=1
                                                match_seq += 1
                                            else:
                                                mismatch_seq += 1
                                                #print('could have been matched, but already aligned to another position',residue.sequence_number,residue.amino_acid,wt_r.sequence_number,wt_r.amino_acid)
                                        else:
                                            #print('WT pos not same AA, mismatch',residue.sequence_number,residue.amino_acid,wt_r.sequence_number,wt_r.amino_acid)
                                            mismatch_seq += 1
                                    elif residue.sequence_number!=wt_r.sequence_number:
                                        #print('WT pos not same pos, mismatch',residue.sequence_number,residue.amino_acid,wt_r.sequence_number,wt_r.amino_acid)
                                        mismatch_seq += 1
                                    elif residue.amino_acid!=wt_r.amino_acid:
                                        #print('aa mismatch',residue.sequence_number,residue.amino_acid,wt_r.sequence_number,wt_r.amino_acid)
                                        aa_mismatch += 1

                                    else:
                                        match_seq += 1
                                    if wt_r.generic_number is not None:
                                        residue.display_generic_number = wt_r.display_generic_number
                                        residue.generic_number = wt_r.generic_number 
                                    else:
                                        residue.display_generic_number = None
                                        residue.generic_number = None
                                        #print('no GN')
                                    residue.protein_segment = wt_r.protein_segment
                            else:
                                #print('wierd error') #sanity check
                                residue.display_generic_number = None
                                residue.generic_number = None
                                residue.protein_segment = None

                            #print('inserted',residue.sequence_number) #sanity check
                            residue.save()

                            rotamer_data, created = PdbData.objects.get_or_create(pdb=temp)
                            rotamer, created = Rotamer.objects.get_or_create(residue=residue, structure=structure, pdbdata=rotamer_data)

                        temp = "" #start new line for rotamer
                        check = pdblines[i+1][22:26].strip()
                    
                    check = pdblines[i+1][22:26].strip()
                chain = line[21]
                residue_name = line[17:20].title() #use title to get GLY to Gly so it matches
        #print(structure.pdb_code.index,'length',len(seq),len(mapped_seq),'mapped res',str(mismatch_seq+match_seq+aa_mismatch),'pos mismatch',mismatch_seq,'aa mismatch',aa_mismatch,'not mapped',not_matched,' mapping off, matched on pos,aa',matched_by_pos)
        return None
示例#38
0
from Bio.PDB import PDBParser
from Bio.PDB import PPBuilder
from Bio.PDB import Polypeptide

item = '2bnr'	
structure = PDBParser().get_structure(item, '../pdbs/'+item+'.pdb')
ppb=PPBuilder()
peps = ppb.build_peptides(structure)

print structure.get_id()
print peps[0]
#print peps[0][1:-3]
print peps[0][3:9]
p = peps[0][3:9]
print peps[0][1].get_resname()
示例#39
0
文件: parser.py 项目: bolod/bioEuler
#           n += 1
#       list[n].append(atom)
#       previous = atom
#   return list

if __name__ == "__main__":

    current_path = os.path.dirname(sys.argv[0])
    pdb_path = current_path + '../pdb/'
    pdb_id = '2vb1'


    structure = get_structure(pdb_id, pdb_path)
    model = structure[0]

    ppb = PPBuilder()
    pp_list = ppb.build_peptides(model)

    # orient
    orient(pp_list)

    # first split stage
    fs = first_split(pp_list)





    for seg in fs:
        pp = Polypeptide.Polypeptide(seg)
        print pp.get_sequence()