示例#1
0
 def from_list(cls, reslist, cif_path, parent_entry, annotate=True):
     """Construct PdbSite object directly from residue list"""
     mmcif_dict = dict()
     # First reduce redundant residues with multiple function locations
     reslist = PdbSite._cleanup_list(reslist)
     site = cls()
     site.parent_entry = parent_entry
     try:
         if annotate:
             parser = MMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
             mmcif_dict = parser._mmcif_dict
         else:
             parser = FastMMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
     except (TypeError, PDBConstructionException):
         warnings.warn(
             'Could not build site from residue list. Check entry',
             RuntimeWarning)
         return
     for res in reslist:
         if structure:
             res.add_structure(structure)
         site.add(res)
     if annotate:
         site.parent_structure = structure
         site.mmcif_dict = mmcif_dict
         site.find_ligands()
     return site
示例#2
0
    def test_filehandle(self):
        """Test if the parser can handle file handle as well as filename"""
        parser = MMCIFParser()
        structure = parser.get_structure("example", "PDB/1A8O.cif")
        self.assertEqual(len(structure), 1)

        structure = parser.get_structure("example", open("PDB/1A8O.cif"))
        self.assertEqual(len(structure), 1)
示例#3
0
    def test_filehandle(self):
        """Test if the parser can handle file handle as well as filename."""
        parser = MMCIFParser()
        structure = parser.get_structure("example", "PDB/1A8O.cif")
        self.assertEqual(len(structure), 1)

        structure = parser.get_structure("example", open("PDB/1A8O.cif"))
        self.assertEqual(len(structure), 1)
示例#4
0
    def testModels(self):
        """Test file with multiple models."""
        parser = MMCIFParser(QUIET=1)
        f_parser = FastMMCIFParser(QUIET=1)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            structure = parser.get_structure("example", "PDB/1LCD.cif")
            f_structure = f_parser.get_structure("example", "PDB/1LCD.cif")

        self.assertEqual(len(structure), 3)
        self.assertEqual(len(f_structure), 3)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(structure[1].serial_num, 2)
            self.assertEqual(structure[2].serial_num, 3)
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            # Here non-standard MSE are shown as M
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)
            )
            # ==========================================================
            # Now try strict version with only standard amino acids
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertIsInstance(s, Seq)
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual(
                "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)
            )

        # This structure contains several models with multiple lengths.
        # The tests were failing.
        structure = parser.get_structure("example", "PDB/2OFG.cif")
        self.assertEqual(len(structure), 3)
示例#5
0
    def testModels(self):
        """Test file with multiple models"""

        parser = MMCIFParser(QUIET=1)
        f_parser = FastMMCIFParser(QUIET=1)
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', PDBConstructionWarning)
            structure = parser.get_structure("example", "PDB/1LCD.cif")
            f_structure = f_parser.get_structure("example", "PDB/1LCD.cif")

        self.assertEqual(len(structure), 3)
        self.assertEqual(len(f_structure), 3)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(structure[1].serial_num, 2)
            self.assertEqual(structure[2].serial_num, 3)
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            # Here non-standard MSE are shown as M
            self.assertEqual("MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR",
                             str(s))
            # ==========================================================
            # Now try strict version with only standard amino acids
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 1)
            pp = polypeptides[0]
            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 1)
            self.assertEqual(pp[-1].get_id()[1], 51)
            # Check the sequence
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR",
                             str(s))

        # This structure contains several models with multiple lengths.
        # The tests were failing.
        structure = parser.get_structure("example", "PDB/2OFG.cif")
        self.assertEqual(len(structure), 3)
示例#6
0
    def test_insertions(self):
        """Test file with residue insertion codes"""
        parser = MMCIFParser(QUIET=1)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            structure = parser.get_structure("example", "PDB/4ZHL.cif")
        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            self.assertEqual(len(polypeptides), 2)
            pp = polypeptides[0]
            # Check the start and end positions (first segment only)
            self.assertEqual(pp[0].get_id()[1], 16)
            self.assertEqual(pp[-1].get_id()[1], 244)
            # Check the sequence
            refseq = (
                "IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATHCFIDYPKKEDYIVYLGR"
                "SRLNSNTQGEMKFEVENLILHKDYSADTLAYHNDIALLKIRSKEGRCAQPSRTIQTIALPSMY"
                "NDPQFGTSCEITGFGKEQSTDYLYPEQLKMTVVKLISHRECQQPHYYGSEVTTKMLCAADPQW"
                "KTDSCQGDSGGPLVCSLQGRMTLTGIVSWGRGCALKDKPGVYTRVSHFLPWIRSHTKE"
            )

            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual(refseq, str(s))
def CIF2PDB(ciffile, pdbfile, verbose=False):

    #Not sure why biopython needs this to read a cif file
    strucid = ciffile[:4] if len(ciffile) > 4 else "1xxx"

    # Read file
    parser = MMCIFParser()
    structure = parser.get_structure(strucid, ciffile)

    # rename long chains
    try:
        chainmap = rename_chains(structure)
    except OutOfChainsError:
        logging.error("Too many chains to represent in PDB format")
        sys.exit(1)

    if verbose:
        for new, old in chainmap.items():
            if new != old:
                logging.info("Renaming chain {0} to {1}".format(old, new))

    #Write PDB
    io = PDBIO()
    io.set_structure(structure)
    #TODO What happens with large structures?
    io.save(pdbfile)

    return pdbfile
示例#8
0
def get_info_mmcif(file):
    parser = MMCIFParser()
    structure = parser.get_structure(file.split('.')[0], file)
    coord_ca = {}
    bary = {}
    for chain in structure[0]:
        coord_ca[chain] = []
        bary[chain] = 0
        for residue in chain:
            if residue.has_id('CA'):
                coord_ca[chain].append(residue['CA'].get_coord())
            else:
                coord_moy = [0, 0, 0]
                for atom in residue:
                    coord_at = atom.get_coord()
                    coord_moy = [coord_at[i] / len(residue) for i in range(3)]
                coord_ca[chain].append(coord_moy)
        coord_ca[chain] = np.asarray(coord_ca[chain])
        bary[chain] = np.array([np.mean(coord_ca[chain][i]) for i in range(3)])
    enf = {}
    for chain in structure[0]:
        enf[chain] = []
        for coord in coord_ca[chain]:
            enf[chain].append(np.linalg.norm(coord - bary[chain]))
    #ppb = PPBuilder()
    #seqpdb = ppb.build_peptides(chain)[0].get_sequence()
    return bary, enf
示例#9
0
 def test_insertions(self):
     """Test file with residue insertion codes."""
     parser = MMCIFParser(QUIET=1)
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", PDBConstructionWarning)
         structure = parser.get_structure("example", "PDB/4ZHL.cif")
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         # First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 2)
         pp = polypeptides[0]
         # Check the start and end positions (first segment only)
         self.assertEqual(pp[0].get_id()[1], 16)
         self.assertEqual(pp[-1].get_id()[1], 244)
         # Check the sequence
         refseq = (
             "IIGGEFTTIENQPWFAAIYRRHRGGSVTYVCGGSLISPCWVISATHCFIDYPKKEDYIVYLGR"
             "SRLNSNTQGEMKFEVENLILHKDYSADTLAYHNDIALLKIRSKEGRCAQPSRTIQTIALPSMY"
             "NDPQFGTSCEITGFGKEQSTDYLYPEQLKMTVVKLISHRECQQPHYYGSEVTTKMLCAADPQW"
             "KTDSCQGDSGGPLVCSLQGRMTLTGIVSWGRGCALKDKPGVYTRVSHFLPWIRSHTKE"
         )
         s = pp.get_sequence()
         self.assertIsInstance(s, Seq)
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual(refseq, str(s))
示例#10
0
    def check_mmtf_vs_cif(self, mmtf_filename, cif_filename):
        """Compare parsed structures for MMTF and CIF files."""
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            mmtf_struct = MMTFParser.get_structure(mmtf_filename)
        mmcif_parser = MMCIFParser()
        mmcif_struct = mmcif_parser.get_structure("4CUP", cif_filename)
        self.mmcif_atoms = list(mmcif_struct.get_atoms())
        self.mmtf_atoms = list(mmtf_struct.get_atoms())
        self.check_atoms()
        mmcif_chains = list(mmcif_struct.get_chains())
        mmtf_chains = list(mmtf_struct.get_chains())
        self.assertEqual(len(mmcif_chains), len(mmtf_chains))
        for i, e in enumerate(mmcif_chains):
            self.mmcif_res = list(mmcif_chains[i].get_residues())
            self.mmtf_res = list(mmtf_chains[i].get_residues())
            self.check_residues()

        self.mmcif_res = list(mmcif_struct.get_residues())
        self.mmtf_res = list(mmtf_struct.get_residues())
        self.check_residues()
        self.assertEqual(
            sum(1 for _ in mmcif_struct.get_models()),
            sum(1 for _ in mmtf_struct.get_models()),
        )
示例#11
0
 def test_mmtf(self):
     """Parse mmCIF file."""
     with warnings.catch_warnings():
         mmcif_parser = MMCIFParser()
         warnings.simplefilter('ignore', PDBConstructionWarning)
         structure = mmcif_parser.get_structure("MICR", "PDB/1EJG.cif")
         print(structure)
示例#12
0
    def test_with_anisotrop(self):
        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/4CUP.cif")
        f_structure = fast_parser.get_structure("example", "PDB/4CUP.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        self.assertEqual(len(s_atoms), len(f_atoms))

        for atoms in [s_atoms, f_atoms]:
            atom_names = ['N', 'CA', 'C', 'O', 'CB']
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]], [1., 1., 1., 1., 1.])
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([50.346, 19.287, 17.288], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)
            self.assertEqual(atoms[0].get_bfactor(), 32.02)

            ansiou = numpy.array([0.4738, -0.0309, -0.0231, 0.4524, 0.0036, 0.2904], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_anisou(), ansiou)
            ansiou = numpy.array([1.1242, 0.2942, -0.0995, 1.1240, -0.1088, 0.8221], dtype=numpy.float32)
            atom_937 = list(f_structure[0]['A'])[114]['CB']
            numpy.testing.assert_array_equal(atom_937.get_anisou(), ansiou)
示例#13
0
    def test_conversion(self):
        """Parse 1A8O.cif, write 1A8O.pdb, parse again and compare"""

        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/1LCD.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()
        pdb_writer.save(filename)

        pdb_parser = PDBParser(QUIET=1)
        pdb_struct = pdb_parser.get_structure('example_pdb', filename)

        # comparisons
        self.assertEqual(len(pdb_struct), len(cif_struct))

        pdb_atom_names = [a.name for a in pdb_struct.get_atoms()]
        cif_atom_names = [a.name for a in cif_struct.get_atoms()]
        self.assertEqual(len(pdb_atom_names), len(cif_atom_names))
        self.assertSequenceEqual(pdb_atom_names, cif_atom_names)

        pdb_atom_elems = [a.element for a in pdb_struct.get_atoms()]
        cif_atom_elems = [a.element for a in cif_struct.get_atoms()]
        self.assertSequenceEqual(pdb_atom_elems, cif_atom_elems)
示例#14
0
    def get_structure(self, *args):
        if len(args) == 2:
            pdbId, fileName = args
        elif len(args) == 1:
            fileName = args[0]
            pdbId, fileName = str(fileName), fileName
        else:
            raise ValueError(
                "Error, input should be (id, fileName) or (fileName))")

        if re.match("http(s?)://", fileName):
            r = requests.get(fileName)
            if r.ok:
                fileName = StringIO(r.text)
            else:
                raise Exception("Error downloading pdb")

        try:
            if not isinstance(fileName, str) or not fileName.endswith(".gz"):
                structure = PDBParser.get_structure(self, pdbId, fileName)
            else:
                with gzip.open(fileName) as f:
                    structure = PDBParser.get_structure(self, pdbId, f)
        except Exception as e:
            print(e)
            structure = MMCIFParser.get_structure(self, pdbId, fileName)
        if self.removeHeteroDuplicated:
            structure = self.filterOutDuplicated(structure)
        return structure
    def test_conversion(self):
        """Parse 1A8O.cif, write 1A8O.pdb, parse again and compare"""

        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/1LCD.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()
        pdb_writer.save(filename)

        pdb_parser = PDBParser(QUIET=1)
        pdb_struct = pdb_parser.get_structure('example_pdb', filename)

        # comparisons
        self.assertEqual(len(pdb_struct), len(cif_struct))

        pdb_atom_names = [a.name for a in pdb_struct.get_atoms()]
        cif_atom_names = [a.name for a in pdb_struct.get_atoms()]
        self.assertEqual(len(pdb_atom_names), len(cif_atom_names))
        self.assertSequenceEqual(pdb_atom_names, cif_atom_names)

        pdb_atom_elems = [a.element for a in pdb_struct.get_atoms()]
        cif_atom_elems = [a.element for a in pdb_struct.get_atoms()]
        self.assertSequenceEqual(pdb_atom_elems, cif_atom_elems)
示例#16
0
 def build_all(cls,
               reslist,
               reference_site,
               parent_entry,
               cif_path,
               annotate=True,
               redundancy_cutoff=None):
     """Builds all sites in using as input a list of catalytic residues.
     Returns a list of PdbSite objects"""
     # Map structure objects in every residue
     sites = []
     mmcif_dict = dict()
     try:
         if annotate:
             parser = MMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
             mmcif_dict = parser._mmcif_dict
         else:
             parser = FastMMCIFParser(QUIET=True)
             structure = parser.get_structure('', cif_path)
     except (TypeError, PDBConstructionException):
         warnings.warn('Could not parse structure {}'.format(
             cif_path, RuntimeWarning))
         return sites
     # First reduce redundant residues with multiple function locations
     reslist = PdbSite._cleanup_list(reslist)
     # We want all equivalent residues from identical assembly chains
     reslist = PdbSite._get_assembly_residues(reslist, structure)
     # Get seeds to build active sites
     seeds = PdbSite._get_seeds(reslist)
     # Build a site from each seed
     for seed in seeds:
         sites.append(cls.build(seed, reslist, reference_site,
                                parent_entry))
     # Reduce redundancy
     sites = PdbSite._remove_redundant_sites(sites,
                                             cutoff=redundancy_cutoff)
     # Add ligands and annotations
     if annotate and structure:
         for site in sites:
             site.parent_structure = structure
             site.mmcif_dict = mmcif_dict
             site.find_ligands()
     # Flag unclustered sites
     PdbSite._mark_unclustered(sites)
     return sites
示例#17
0
    def test_header(self):
        """Test if the parser populates header data."""
        parser = MMCIFParser()

        structure = parser.get_structure("example", "PDB/a_structure.cif")
        self.assertEqual("", structure.header["idcode"])
        self.assertEqual("", structure.header["head"])
        self.assertEqual("", structure.header["deposition_date"])
        self.assertEqual("", structure.header["structure_method"])
        self.assertEqual(0.0, structure.header["resolution"])

        structure = parser.get_structure("example", "PDB/1A8O.cif")
        self.assertEqual("1A8O", structure.header["idcode"])
        self.assertEqual("Viral protein", structure.header["head"])
        self.assertEqual("", structure.header["deposition_date"])
        self.assertEqual("X-RAY DIFFRACTION", structure.header["structure_method"])
        self.assertEqual(1.7, structure.header["resolution"])
 def __init__(self, path):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     parser = MMCIFParser()
     self.structure = parser.get_structure('PHA-L', path)
 def __init__( self, path ):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     CIF_PARSER = MMCIFParser()
     self.structure = CIF_PARSER.get_structure('PHA-L',path) # Parse the structure once and re-use it in the functions below
示例#20
0
    def test_conversion_not_preserve_numbering(self):
        """Convert mmCIF to PDB and renumber atom serials."""
        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/a_structure.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()

        pdb_writer.save(filename, preserve_atom_numbering=False)
示例#21
0
 def __init__(self, path):
     """
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     """
     # parser object for reading in structure in CIF format
     parser = MMCIFParser()
     # Parse the structure once and re-use it in the functions below
     self.structure = parser.get_structure('some structure string here, e.g. 7AHL', path)
示例#22
0
    def test_conversion_preserve_numbering(self):
        """Convert mmCIF to PDB and preserve original serial numbering."""
        cif_parser = MMCIFParser(QUIET=1)
        cif_struct = cif_parser.get_structure("example", "PDB/a_structure.cif")

        pdb_writer = PDBIO()
        pdb_writer.set_structure(cif_struct)
        filenumber, filename = tempfile.mkstemp()

        with self.assertRaises(ValueError):
            pdb_writer.save(filename, preserve_atom_numbering=True)
示例#23
0
    def test_compare_to_mmcif(self):
        """Compre the MMTF and mmCIF parsed structrues"""
        def test_atoms(parse_mmtf):
            """Test that all atoms in self.mmtf_atoms and self.mmcif_atoms are equivalent"""
            parse_mmtf.assertEqual(len(parse_mmtf.mmcif_atoms), len(parse_mmtf.mmtf_atoms))
            for i, e in enumerate(parse_mmtf.mmcif_atoms):
                mmtf_atom = parse_mmtf.mmtf_atoms[i]
                mmcif_atom = parse_mmtf.mmcif_atoms[i]
                parse_mmtf.assertEqual(mmtf_atom.name, mmcif_atom.name)  # eg. CA, spaces are removed from atom name
                parse_mmtf.assertEqual(mmtf_atom.fullname, mmcif_atom.fullname)  # e.g. " CA ", spaces included
                parse_mmtf.assertAlmostEqual(mmtf_atom.coord[0], mmcif_atom.coord[0], places=3)
                parse_mmtf.assertAlmostEqual(mmtf_atom.coord[1], mmcif_atom.coord[1], places=3)
                parse_mmtf.assertAlmostEqual(mmtf_atom.coord[2], mmcif_atom.coord[2], places=3)
                parse_mmtf.assertEqual(mmtf_atom.bfactor, mmcif_atom.bfactor)
                parse_mmtf.assertEqual(mmtf_atom.occupancy, mmcif_atom.occupancy)
                parse_mmtf.assertEqual(mmtf_atom.altloc, mmcif_atom.altloc)
                parse_mmtf.assertEqual(mmtf_atom.full_id,
                                       mmcif_atom.full_id)  # (structure id, model id, chain id, residue id, atom id)
                parse_mmtf.assertEqual(mmtf_atom.id, mmcif_atom.name)  # id of atom is the atom name (e.g. "CA")
                # self.assertEqual(mmtf_atom.serial_number,mmcif_atom.serial_number) # mmCIF serial number is none
        def test_residues(parse_mmtf):
            """Test that all residues in self.mmcif_res and self.mmtf_res are equivalent"""
            parse_mmtf.assertEqual(len(parse_mmtf.mmcif_res), len(parse_mmtf.mmtf_res))
            for i, e in enumerate(parse_mmtf.mmcif_res):
                mmcif_r = parse_mmtf.mmcif_res[i]
                mmtf_r = parse_mmtf.mmtf_res[i]
                parse_mmtf.assertEqual(mmtf_r.level, mmcif_r.level)
                parse_mmtf.assertEqual(mmtf_r.disordered, mmcif_r.disordered)
                parse_mmtf.assertEqual(mmtf_r.resname, mmcif_r.resname)
                parse_mmtf.assertEqual(mmtf_r.segid, mmcif_r.segid)
                parse_mmtf.mmcif_atoms = [x for x in mmcif_r.get_atom()]
                parse_mmtf.mmtf_atoms = [x for x in mmtf_r.get_atom()]
                test_atoms(parse_mmtf=parse_mmtf)

        with warnings.catch_warnings():
            warnings.simplefilter('ignore', PDBConstructionWarning)
            mmtf_struct = MMTFParser.get_structure("PDB/4CUP.mmtf")
        mmcif_parser = MMCIFParser()
        mmcif_struct = mmcif_parser.get_structure("example", "PDB/4CUP.cif")
        self.mmcif_atoms = [x for x in mmcif_struct.get_atoms()]
        self.mmtf_atoms = [x for x in mmtf_struct.get_atoms()]
        test_atoms(self)
        mmcif_chains = [x for x in mmcif_struct.get_chains()]
        mmtf_chains = [x for x in mmtf_struct.get_chains()]
        self.assertEqual(len(mmcif_chains), len(mmtf_chains))
        for i, e in enumerate(mmcif_chains):
            self.mmcif_res = [x for x in mmcif_chains[i].get_residues()]
            self.mmtf_res = [x for x in mmtf_chains[i].get_residues()]
            test_residues(self)

        self.mmcif_res = [x for x in mmcif_struct.get_residues()]
        self.mmtf_res = [x for x in mmtf_struct.get_residues()]
        test_residues(self)
        self.assertEqual(len([x for x in mmcif_struct.get_models()]), len([x for x in mmtf_struct.get_models()]))
示例#24
0
 def __init__( self, path ):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     CIF_PARSER     = MMCIFParser() # parser object for reading in structure in CIF format
     i=0
     self.structure = CIF_PARSER.get_structure("Structure",path) # Parse the structure once and re-use it in the functions below
     print(self.get_number_of_water_molecules("D"))
示例#25
0
 def __init__(self, path):
     '''
         Initialize every PDB_Parser with a path to a structure-file in CIF format.
         An example file is included in the repository (7ahl.cif).
         Tip: Store the parsed structure in an object variable instead of parsing it
         again & again ...
     '''
     cif_parser = MMCIFParser(QUIET=True)  # parser object for reading in structure in CIF format
     self.structure = cif_parser.get_structure('structure', path)
     self.model = self.structure[0]
     self.residue_dict = {k.upper(): v for d in [protein_letters_3to1, {'HOH': ''}] for k, v in d.items()}
示例#26
0
def clean_pdb(file_input, file_output, chain_to_keep, parameters):
    parser = MMCIFParser()

    structure = parser.get_structure(file_input[:-4].upper(), file_input)

    structure = remove_chains(structure, chain_to_keep)
    structure = remove_extra_atoms(structure, parameters)

    io = MMCIFIO()
    io.set_structure(structure)
    io.save(file_output)
示例#27
0
 def test_parser(self):
     """Extract polypeptides from 1A80."""
     parser = MMCIFParser()
     structure = parser.get_structure("example", "PDB/1A8O.cif")
     self.assertEqual(len(structure), 1)
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         # ==========================================================
         # Check that serial_num (model column) is stored properly
         self.assertEqual(structure[0].serial_num, 1)
         # First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 151)
         self.assertEqual(pp[-1].get_id()[1], 220)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         # Here non-standard MSE are shown as M
         self.assertEqual(
             "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
             "NANPDCKTILKALGPGATLEEMMTACQG", str(s))
         # ==========================================================
         # Now try strict version with only standard amino acids
         # Should ignore MSE 151 at start, and then break the chain
         # at MSE 185, and MSE 214,215
         polypeptides = ppbuild.build_peptides(structure[0], True)
         self.assertEqual(len(polypeptides), 3)
         # First fragment
         pp = polypeptides[0]
         self.assertEqual(pp[0].get_id()[1], 152)
         self.assertEqual(pp[-1].get_id()[1], 184)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))
         # Second fragment
         pp = polypeptides[1]
         self.assertEqual(pp[0].get_id()[1], 186)
         self.assertEqual(pp[-1].get_id()[1], 213)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))
         # Third fragment
         pp = polypeptides[2]
         self.assertEqual(pp[0].get_id()[1], 216)
         self.assertEqual(pp[-1].get_id()[1], 220)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TACQG", str(s))
示例#28
0
 def test_parser(self):
     """Extract polypeptides from 1A80."""
     parser = MMCIFParser()
     structure = parser.get_structure("example", "PDB/1A8O.cif")
     self.assertEqual(len(structure), 1)
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         #==========================================================
         # Check that serial_num (model column) is stored properly
         self.assertEqual(structure[0].serial_num, 1)
         #First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 151)
         self.assertEqual(pp[-1].get_id()[1], 220)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         #Here non-standard MSE are shown as M
         self.assertEqual("MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
                          "NANPDCKTILKALGPGATLEEMMTACQG", str(s))
         #==========================================================
         #Now try strict version with only standard amino acids
         #Should ignore MSE 151 at start, and then break the chain
         #at MSE 185, and MSE 214,215
         polypeptides = ppbuild.build_peptides(structure[0], True)
         self.assertEqual(len(polypeptides), 3)
         #First fragment
         pp = polypeptides[0]
         self.assertEqual(pp[0].get_id()[1], 152)
         self.assertEqual(pp[-1].get_id()[1], 184)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))
         #Second fragment
         pp = polypeptides[1]
         self.assertEqual(pp[0].get_id()[1], 186)
         self.assertEqual(pp[-1].get_id()[1], 213)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))
         #Third fragment
         pp = polypeptides[2]
         self.assertEqual(pp[0].get_id()[1], 216)
         self.assertEqual(pp[-1].get_id()[1], 220)
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual("TACQG", str(s))
def get_atoms(file):
    parser = MMCIFParser()
    structure = parser.get_structure(file.split('.')[0], file)
    pos = []
    model = structure[0]
    for chain in model:
        pos_c = []
        for residue in chain:
            if residue.has_id('CA'):
                vca = residue['CA'].get_vector()
                pos_c.append((residue.get_resname(), vca))
        pos.append(pos_c)
    return pos
示例#30
0
    def test_header(self):
        """Test if the parser populates header data."""
        parser = MMCIFParser(QUIET=1)

        # test default values
        structure = parser.get_structure("example", "PDB/a_structure.cif")
        self.assertEqual("", structure.header["idcode"])
        self.assertEqual("", structure.header["head"])
        self.assertEqual("", structure.header["deposition_date"])
        self.assertEqual("", structure.header["structure_method"])
        self.assertIsNone(structure.header["resolution"])

        # test extracting fields
        structure = parser.get_structure("example", "PDB/1A8O.cif")
        self.assertEqual("1A8O", structure.header["idcode"])
        self.assertEqual("Viral protein", structure.header["head"])
        self.assertEqual("", structure.header["deposition_date"])
        self.assertEqual("X-RAY DIFFRACTION", structure.header["structure_method"])
        self.assertEqual(1.7, structure.header["resolution"])

        # test not confused by '.'
        structure = parser.get_structure("example", "PDB/1SSU_mod.cif")
        self.assertIsNone(structure.header["resolution"])
示例#31
0
    def setUp(self):

        # Silence!
        warnings.simplefilter('ignore', PDBConstructionWarning)

        pdbparser = PDBParser(QUIET=1)
        cifparser = MMCIFParser(QUIET=1)

        modpath = os.path.abspath(os.path.dirname(__file__))

        pdb_file = os.path.join(modpath, "PDB", "1LCD.pdb")
        cif_file = os.path.join(modpath, "PDB", "1LCD.cif")

        self.pdbo = pdbparser.get_structure('pdb', pdb_file)
        self.cifo = cifparser.get_structure('pdb', cif_file)
    def setUp(self):

        # Silence!
        warnings.simplefilter("ignore", PDBConstructionWarning)

        pdbparser = PDBParser(QUIET=1)
        cifparser = MMCIFParser(QUIET=1)

        modpath = os.path.abspath(os.path.dirname(__file__))

        pdb_file = os.path.join(modpath, "PDB", "1LCD.pdb")
        cif_file = os.path.join(modpath, "PDB", "1LCD.cif")

        self.pdbo = pdbparser.get_structure("pdb", pdb_file)
        self.cifo = cifparser.get_structure("pdb", cif_file)
示例#33
0
 def test_write(self):
     """Test a simple structure object is written out correctly to MMTF."""
     parser = MMCIFParser()
     struc = parser.get_structure("1A8O", "PDB/1A8O.cif")
     io = MMTFIO()
     io.set_structure(struc)
     filenumber, filename = tempfile.mkstemp()
     os.close(filenumber)
     try:
         io.save(filename)
         struc_back = MMTFParser.get_structure(filename)
         dict_back = mmtf.parse(filename)
         self.assertEqual(dict_back.structure_id, "1A8O")
         self.assertEqual(dict_back.num_models, 1)
         self.assertEqual(dict_back.num_chains, 2)
         self.assertEqual(dict_back.num_groups, 158)
         self.assertEqual(dict_back.num_atoms, 644)
         self.assertEqual(len(dict_back.x_coord_list), 644)
         self.assertEqual(len(dict_back.y_coord_list), 644)
         self.assertEqual(len(dict_back.z_coord_list), 644)
         self.assertEqual(len(dict_back.b_factor_list), 644)
         self.assertEqual(len(dict_back.occupancy_list), 644)
         self.assertEqual(dict_back.x_coord_list[5], 20.022)
         self.assertEqual(set(dict_back.ins_code_list), {"\x00"})
         self.assertEqual(set(dict_back.alt_loc_list), {"\x00"})
         self.assertEqual(list(dict_back.atom_id_list), list(range(1, 645)))
         self.assertEqual(list(dict_back.sequence_index_list),
                          list(range(70)) + [-1] * 88)
         self.assertEqual(dict_back.chain_id_list, ["A", "B"])
         self.assertEqual(dict_back.chain_name_list, ["A", "A"])
         self.assertEqual(dict_back.chains_per_model, [2])
         self.assertEqual(len(dict_back.group_list), 21)
         self.assertEqual(len(dict_back.group_id_list), 158)
         self.assertEqual(len(dict_back.group_type_list), 158)
         self.assertEqual(dict_back.groups_per_chain, [70, 88])
         self.assertEqual(len(dict_back.entity_list), 2)
         self.assertEqual(dict_back.entity_list[0]["type"], "polymer")
         self.assertEqual(dict_back.entity_list[0]["chainIndexList"], [0])
         self.assertEqual(
             dict_back.entity_list[0]["sequence"],
             "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG",
         )
         self.assertEqual(dict_back.entity_list[1]["type"], "water")
         self.assertEqual(dict_back.entity_list[1]["chainIndexList"], [1])
         self.assertEqual(dict_back.entity_list[1]["sequence"], "")
     finally:
         os.remove(filename)
def get_descriptors(file):
    parser = MMCIFParser()
    structure = parser.get_structure(file.split('.')[0], file)
    pos = []
    model = structure[0]
    hse = HSExposureCB(model)
    for chain in model:
        pos_c = []
        for residue in chain:
            dic = {}
            dic["name"] = residue.get_resname()
            if residue.has_id('CA'):
                vca = residue['CA'].get_vector()
                dic["coord"] = vca
                hse_ = hse[(chain.id, residue.id)]
                dic["hse"] = (hse_[0], hse_[1])
            pos_c.append(dic)
        pos = pos + pos_c
    return pos
示例#35
0
def func1():
    import sys
    import re
    import gzip
    from Bio.PDB.MMCIFParser import MMCIFParser
    parser = MMCIFParser(QUIET=True)
    from Bio.PDB.PDBParser import PDBParser
    parser1 = PDBParser(PERMISSIVE=0, QUIET=True)

    from Bio.PDB.PDBIO import PDBIO

    #pathmmcif = "/Users/tarun/Documents/mmCIF"
    #pathmmcif = "/data/pdb/divided/mmCIF"
    pathmmcif = "/Volumes/BIOINFO/mmCIF"
    #pathmmcif = "/Volumes/RCSB_DATA/pdb"

    #count = 0
    #if count == 0:
    try:
        pdb1 = "{}".format(sys.argv[2])
        fol = pdb1[1:3]
        c1 = "{}".format(sys.argv[3])
        pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb1)
        #pdbfile = "{}/{}/pdb{}.ent.gz".format(pathmmcif,fol,pdb1)
        tar = gzip.open("{}".format(pdbfile), "rb")
        out = open("pdbprocess.cif", "wb")
        #out = open("pdbprocess.pdb","wb")
        out.write(tar.read())
        tar.close()
        out.close()
        structure_id = "{}".format(pdb1)
        filename = "pdbprocess.cif"
        #filename = "pdbprocess.pdb"
        structure = parser.get_structure(structure_id, filename)
        model = structure[0]
        chain = model["{}".format(c1)]

        io = PDBIO()
        io.set_structure(chain)
        io.save("chain1.pdb")
    except:
        print("FILE NOT FOUND")
示例#36
0
    def check_mmtf_vs_cif(self, mmtf_filename, cif_filename):
        """Compare parsed structures for MMTF and CIF files."""
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', PDBConstructionWarning)
            mmtf_struct = MMTFParser.get_structure(mmtf_filename)
        mmcif_parser = MMCIFParser()
        mmcif_struct = mmcif_parser.get_structure("example", cif_filename)
        self.mmcif_atoms = [x for x in mmcif_struct.get_atoms()]
        self.mmtf_atoms = [x for x in mmtf_struct.get_atoms()]
        self.check_atoms()
        mmcif_chains = [x for x in mmcif_struct.get_chains()]
        mmtf_chains = [x for x in mmtf_struct.get_chains()]
        self.assertEqual(len(mmcif_chains), len(mmtf_chains))
        for i, e in enumerate(mmcif_chains):
            self.mmcif_res = [x for x in mmcif_chains[i].get_residues()]
            self.mmtf_res = [x for x in mmtf_chains[i].get_residues()]
            self.check_residues()

        self.mmcif_res = [x for x in mmcif_struct.get_residues()]
        self.mmtf_res = [x for x in mmtf_struct.get_residues()]
        self.check_residues()
        self.assertEqual(len([x for x in mmcif_struct.get_models()]), len([x for x in mmtf_struct.get_models()]))
示例#37
0
 def testModels(self):
     """Test file with multiple models"""
     parser = MMCIFParser()
     structure = parser.get_structure("example", "PDB/1LCD.cif")
     self.assertEqual(len(structure), 3)
     for ppbuild in [PPBuilder(), CaPPBuilder()]:
         #==========================================================
         # Check that serial_num (model column) is stored properly
         self.assertEqual(structure[0].serial_num, 1)
         self.assertEqual(structure[1].serial_num, 2)
         self.assertEqual(structure[2].serial_num, 3)
         #First try allowing non-standard amino acids,
         polypeptides = ppbuild.build_peptides(structure[0], False)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 1)
         self.assertEqual(pp[-1].get_id()[1], 51)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         #Here non-standard MSE are shown as M
         self.assertEqual(
             "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s))
         #==========================================================
         #Now try strict version with only standard amino acids
         polypeptides = ppbuild.build_peptides(structure[0], True)
         self.assertEqual(len(polypeptides), 1)
         pp = polypeptides[0]
         # Check the start and end positions
         self.assertEqual(pp[0].get_id()[1], 1)
         self.assertEqual(pp[-1].get_id()[1], 51)
         # Check the sequence
         s = pp.get_sequence()
         self.assertTrue(isinstance(s, Seq))
         self.assertEqual(s.alphabet, generic_protein)
         self.assertEqual(
             "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s))
示例#38
0
    def test_with_anisotrop(self):
        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/4CUP.cif")
        f_structure = fast_parser.get_structure("example", "PDB/4CUP.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        self.assertEqual(len(s_atoms), len(f_atoms))

        for atoms in [s_atoms, f_atoms]:
            atom_names = ['N', 'CA', 'C', 'O', 'CB']
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]],
                                     atom_names)
            self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]],
                                     [1., 1., 1., 1., 1.])
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([50.346, 19.287, 17.288], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)
            self.assertEqual(atoms[0].get_bfactor(), 32.02)

            ansiou = numpy.array(
                [0.4738, -0.0309, -0.0231, 0.4524, 0.0036, 0.2904],
                dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_anisou(), ansiou)
            ansiou = numpy.array(
                [1.1242, 0.2942, -0.0995, 1.1240, -0.1088, 0.8221],
                dtype=numpy.float32)
            atom_937 = list(f_structure[0]['A'])[114]['CB']
            numpy.testing.assert_array_equal(atom_937.get_anisou(), ansiou)
def process_monomer_data(PDB_PROTEIN_PATH, PROCESSED_DATA_PATH):
    """check if pdb protein is a monomer

    Parameters
    ----------
    PDB_PROTEIN_PATH : str
        location of the directory with full pdb files
    PROCESSED_DATA_PATH : str
        output file

    """

    protein_files = [
        f for f in listdir(PDB_PROTEIN_PATH)
        if isfile(join(PDB_PROTEIN_PATH, f))
    ]
    results = {'id': [], 'monomer': []}

    #for all files in directory
    for protein_file in protein_files:
        protein_id = protein_file[:-4]
        pdb_protein_file = PDB_PROTEIN_PATH + protein_file

        # if only 1 chain, pdb = monomer
        try:
            p = MMCIFParser(QUIET=1)
            structure = p.get_structure(protein_id, pdb_protein_file)
            results['id'].append(protein_id)
            if len(structure[0]) == 1:
                results['monomer'].append(True)
            else:
                results['monomer'].append(False)
        except:
            continue

    df = pd.DataFrame(results)
    df.to_csv(PROCESSED_DATA_PATH, index=False)
示例#40
0
def get_pdb():

    import gzip
    from Bio.PDB.MMCIFParser import MMCIFParser
    parser = MMCIFParser(QUIET=True)

    from Bio.PDB.Polypeptide import one_to_three as ott
    from Bio.PDB.PDBIO import PDBIO
    from Bio.PDB.PDBIO import Select

    #pathmmcif = "/Users/tarun/Documents/mmCIF"
    pathmmcif = "/Volumes/BIOINFO/mmCIF"

    pdb = sys.argv[1]
    C = sys.argv[2]  # CHAIN

    fol = pdb[1:3]

    pdbfile = "{}/{}/{}.cif.gz".format(pathmmcif, fol, pdb)
    tar = gzip.open("{}".format(pdbfile), "rb")
    out = open("pdbprocess.cif", "wb")
    out.write(tar.read())
    tar.close()
    out.close()

    structure_id = "{}".format(pdb)
    filename = "pdbprocess.cif"
    structure = parser.get_structure(structure_id, filename)

    model = structure[0]

    chain = model["{}".format(C)]
    c1 = chain.get_list()  # LIST ALL THE RESIDUES

    io = PDBIO()
    io.set_structure(chain)
    io.save("WT.pdb")
示例#41
0
    def test_parsers(self):
        """Extract polypeptides from 1A80."""

        parser = MMCIFParser()
        fast_parser = FastMMCIFParser()

        structure = parser.get_structure("example", "PDB/1A8O.cif")
        f_structure = fast_parser.get_structure("example", "PDB/1A8O.cif")

        self.assertEqual(len(structure), 1)
        self.assertEqual(len(f_structure), 1)

        for ppbuild in [PPBuilder(), CaPPBuilder()]:
            # ==========================================================
            # Check that serial_num (model column) is stored properly
            self.assertEqual(structure[0].serial_num, 1)
            self.assertEqual(f_structure[0].serial_num, structure[0].serial_num)

            # First try allowing non-standard amino acids,
            polypeptides = ppbuild.build_peptides(structure[0], False)
            f_polypeptides = ppbuild.build_peptides(f_structure[0], False)

            self.assertEqual(len(polypeptides), 1)
            self.assertEqual(len(f_polypeptides), 1)

            pp = polypeptides[0]
            f_pp = f_polypeptides[0]

            # Check the start and end positions
            self.assertEqual(pp[0].get_id()[1], 151)
            self.assertEqual(pp[-1].get_id()[1], 220)

            self.assertEqual(f_pp[0].get_id()[1], 151)
            self.assertEqual(f_pp[-1].get_id()[1], 220)

            # Check the sequence
            s = pp.get_sequence()
            f_s = f_pp.get_sequence()

            self.assertEqual(s, f_s)  # enough to test this

            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)

            # Here non-standard MSE are shown as M
            self.assertEqual("MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ"
                             "NANPDCKTILKALGPGATLEEMMTACQG", str(s))

            # ==========================================================
            # Now try strict version with only standard amino acids
            # Should ignore MSE 151 at start, and then break the chain
            # at MSE 185, and MSE 214,215
            polypeptides = ppbuild.build_peptides(structure[0], True)
            self.assertEqual(len(polypeptides), 3)

            # First fragment
            pp = polypeptides[0]
            self.assertEqual(pp[0].get_id()[1], 152)
            self.assertEqual(pp[-1].get_id()[1], 184)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s))

            # Second fragment
            pp = polypeptides[1]
            self.assertEqual(pp[0].get_id()[1], 186)
            self.assertEqual(pp[-1].get_id()[1], 213)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s))

            # Third fragment
            pp = polypeptides[2]
            self.assertEqual(pp[0].get_id()[1], 216)
            self.assertEqual(pp[-1].get_id()[1], 220)
            s = pp.get_sequence()
            self.assertTrue(isinstance(s, Seq))
            self.assertEqual(s.alphabet, generic_protein)
            self.assertEqual("TACQG", str(s))

        s_atoms = list(structure.get_atoms())
        f_atoms = list(f_structure.get_atoms())

        for atoms in [s_atoms, f_atoms]:
            self.assertEqual(len(atoms), 644)
            atom_names = ['N', 'CA', 'C', 'O', 'CB']
            self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names)
            self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]], [1., 1., 1., 1., 1.])
            self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray)
            coord = numpy.array([19.594, 32.367, 28.012], dtype=numpy.float32)
            numpy.testing.assert_array_equal(atoms[0].get_coord(), coord)

            self.assertEqual(atoms[0].get_bfactor(), 18.03)
            for atom in atoms:
                self.assertIsNone(atom.get_anisou())