def test_with_anisotrop(self): parser = MMCIFParser() fast_parser = FastMMCIFParser() structure = parser.get_structure("example", "PDB/4CUP.cif") f_structure = fast_parser.get_structure("example", "PDB/4CUP.cif") self.assertEqual(len(structure), 1) self.assertEqual(len(f_structure), 1) s_atoms = list(structure.get_atoms()) f_atoms = list(f_structure.get_atoms()) self.assertEqual(len(s_atoms), len(f_atoms)) for atoms in [s_atoms, f_atoms]: atom_names = ['N', 'CA', 'C', 'O', 'CB'] self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names) self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names) self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names) self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]], [1., 1., 1., 1., 1.]) self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray) coord = numpy.array([50.346, 19.287, 17.288], dtype=numpy.float32) numpy.testing.assert_array_equal(atoms[0].get_coord(), coord) self.assertEqual(atoms[0].get_bfactor(), 32.02) ansiou = numpy.array([0.4738, -0.0309, -0.0231, 0.4524, 0.0036, 0.2904], dtype=numpy.float32) numpy.testing.assert_array_equal(atoms[0].get_anisou(), ansiou) ansiou = numpy.array([1.1242, 0.2942, -0.0995, 1.1240, -0.1088, 0.8221], dtype=numpy.float32) atom_937 = list(f_structure[0]['A'])[114]['CB'] numpy.testing.assert_array_equal(atom_937.get_anisou(), ansiou)
def testModels(self): """Test file with multiple models.""" parser = MMCIFParser(QUIET=1) f_parser = FastMMCIFParser(QUIET=1) with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) structure = parser.get_structure("example", "PDB/1LCD.cif") f_structure = f_parser.get_structure("example", "PDB/1LCD.cif") self.assertEqual(len(structure), 3) self.assertEqual(len(f_structure), 3) for ppbuild in [PPBuilder(), CaPPBuilder()]: # ========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) self.assertEqual(structure[1].serial_num, 2) self.assertEqual(structure[2].serial_num, 3) # First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 1) self.assertEqual(pp[-1].get_id()[1], 51) # Check the sequence s = pp.get_sequence() self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) # Here non-standard MSE are shown as M self.assertEqual( "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s) ) # ========================================================== # Now try strict version with only standard amino acids polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 1) self.assertEqual(pp[-1].get_id()[1], 51) # Check the sequence s = pp.get_sequence() self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) self.assertEqual( "MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s) ) # This structure contains several models with multiple lengths. # The tests were failing. structure = parser.get_structure("example", "PDB/2OFG.cif") self.assertEqual(len(structure), 3)
def testModels(self): """Test file with multiple models""" parser = MMCIFParser(QUIET=1) f_parser = FastMMCIFParser(QUIET=1) with warnings.catch_warnings(): warnings.simplefilter('ignore', PDBConstructionWarning) structure = parser.get_structure("example", "PDB/1LCD.cif") f_structure = f_parser.get_structure("example", "PDB/1LCD.cif") self.assertEqual(len(structure), 3) self.assertEqual(len(f_structure), 3) for ppbuild in [PPBuilder(), CaPPBuilder()]: # ========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) self.assertEqual(structure[1].serial_num, 2) self.assertEqual(structure[2].serial_num, 3) # First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 1) self.assertEqual(pp[-1].get_id()[1], 51) # Check the sequence s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) # Here non-standard MSE are shown as M self.assertEqual("MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)) # ========================================================== # Now try strict version with only standard amino acids polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 1) pp = polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 1) self.assertEqual(pp[-1].get_id()[1], 51) # Check the sequence s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("MKPVTLYDVAEYAGVSYQTVSRVVNQASHVSAKTREKVEAAMAELNYIPNR", str(s)) # This structure contains several models with multiple lengths. # The tests were failing. structure = parser.get_structure("example", "PDB/2OFG.cif") self.assertEqual(len(structure), 3)
def __init__(self, file_path, config): self.config = config pdb_id = uf.parse_pdb_id_from_file(file_path) self.bio_struct = None if file_path[-4:] == '.pdb' or file_path[-4:] == '.ent': self.bio_struct = PDBParser(QUIET=True).get_structure( pdb_id, file_path) elif file_path[-4:] == '.cif': self.bio_struct = FastMMCIFParser(QUIET=True).get_structure( pdb_id, file_path) if self.bio_struct == None: logging.error( "{} Flipper unable to parse structure file: {}".format( pdb_id, file_path))
def from_list(cls, reslist, cif_path, parent_entry, annotate=True): """Construct PdbSite object directly from residue list""" mmcif_dict = dict() # First reduce redundant residues with multiple function locations reslist = PdbSite._cleanup_list(reslist) site = cls() site.parent_entry = parent_entry try: if annotate: parser = MMCIFParser(QUIET=True) structure = parser.get_structure('', cif_path) mmcif_dict = parser._mmcif_dict else: parser = FastMMCIFParser(QUIET=True) structure = parser.get_structure('', cif_path) except (TypeError, PDBConstructionException): warnings.warn( 'Could not build site from residue list. Check entry', RuntimeWarning) return for res in reslist: if structure: res.add_structure(structure) site.add(res) if annotate: site.parent_structure = structure site.mmcif_dict = mmcif_dict site.find_ligands() return site
def test_with_anisotrop(self): parser = MMCIFParser() fast_parser = FastMMCIFParser() structure = parser.get_structure("example", "PDB/4CUP.cif") f_structure = fast_parser.get_structure("example", "PDB/4CUP.cif") self.assertEqual(len(structure), 1) self.assertEqual(len(f_structure), 1) s_atoms = list(structure.get_atoms()) f_atoms = list(f_structure.get_atoms()) self.assertEqual(len(s_atoms), len(f_atoms)) for atoms in [s_atoms, f_atoms]: atom_names = ['N', 'CA', 'C', 'O', 'CB'] self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names) self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names) self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names) self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]], [1., 1., 1., 1., 1.]) self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray) coord = numpy.array([50.346, 19.287, 17.288], dtype=numpy.float32) numpy.testing.assert_array_equal(atoms[0].get_coord(), coord) self.assertEqual(atoms[0].get_bfactor(), 32.02) ansiou = numpy.array( [0.4738, -0.0309, -0.0231, 0.4524, 0.0036, 0.2904], dtype=numpy.float32) numpy.testing.assert_array_equal(atoms[0].get_anisou(), ansiou) ansiou = numpy.array( [1.1242, 0.2942, -0.0995, 1.1240, -0.1088, 0.8221], dtype=numpy.float32) atom_937 = list(f_structure[0]['A'])[114]['CB'] numpy.testing.assert_array_equal(atom_937.get_anisou(), ansiou)
def fetchStructure(pdbid:str, custom_path='default') -> Structure: """ Returns an open PDB.Bio.Structure.Structure object corresponding to <pdbid> from the default repository(specified in the .env) or if custom_path is provided -- from there. """ pathToFile = custom_path if custom_path != 'default' else path.join(os.getenv('STATIC_ROOT'), pdbid.upper(), pdbid.upper()+'.cif' ) if not path.exists(pathToFile): print(f"File does not exits at the provided path {pathToFile}") raise FileNotFoundError(pathToFile) parser:FastMMCIFParser = FastMMCIFParser(QUIET=True) struct:Structure.Structure = parser.get_structure(pdbid.upper(), pathToFile) return struct
def build_all(cls, reslist, reference_site, parent_entry, cif_path, annotate=True, redundancy_cutoff=None): """Builds all sites in using as input a list of catalytic residues. Returns a list of PdbSite objects""" # Map structure objects in every residue sites = [] mmcif_dict = dict() try: if annotate: parser = MMCIFParser(QUIET=True) structure = parser.get_structure('', cif_path) mmcif_dict = parser._mmcif_dict else: parser = FastMMCIFParser(QUIET=True) structure = parser.get_structure('', cif_path) except (TypeError, PDBConstructionException): warnings.warn('Could not parse structure {}'.format( cif_path, RuntimeWarning)) return sites # First reduce redundant residues with multiple function locations reslist = PdbSite._cleanup_list(reslist) # We want all equivalent residues from identical assembly chains reslist = PdbSite._get_assembly_residues(reslist, structure) # Get seeds to build active sites seeds = PdbSite._get_seeds(reslist) # Build a site from each seed for seed in seeds: sites.append(cls.build(seed, reslist, reference_site, parent_entry)) # Reduce redundancy sites = PdbSite._remove_redundant_sites(sites, cutoff=redundancy_cutoff) # Add ligands and annotations if annotate and structure: for site in sites: site.parent_structure = structure site.mmcif_dict = mmcif_dict site.find_ligands() # Flag unclustered sites PdbSite._mark_unclustered(sites) return sites
# Version 2.0 # It is compatible with Python 3.6 # # Author: Monika Wiech import gzip import glob import os import sys import time # select parser class # from Bio.PDB.MMCIFParser import MMCIFParser from Bio.PDB.MMCIFParser import FastMMCIFParser # parser = MMCIFParser() parser = FastMMCIFParser() # set data dir and limit root_dir = './' data_dir = root_dir + 'data/' grp_dir_limit = 100 str_dir_limit = 1000 grp_dir_count = 0 # define logging system log_name = 'tester__%s' err_ext = '.err' log_ext = '.log' out_ext = '.out'
class StructureBuilder: # constructor, sets the path of the pdb file and configuration parameters def __init__(self, file_path, config): self.config = config pdb_id = uf.parse_pdb_id_from_file(file_path) self.bio_struct = None if file_path[-4:] == '.pdb' or file_path[-4:] == '.ent': self.bio_struct = PDBParser(QUIET=True).get_structure( pdb_id, file_path) elif file_path[-4:] == '.cif': self.bio_struct = FastMMCIFParser(QUIET=True).get_structure( pdb_id, file_path) if self.bio_struct == None: logging.error( "{} Flipper unable to parse structure file: {}".format( pdb_id, file_path)) #sys.exit("ERROR! unable to parse structure: pdb_id: {} file: {}".format(pdb_id, file_path)) # add a residue to the structure def add_residue(self, fl_chain, res): # create an empty residue fl_res = FlipperResidue() ### extract res info ### fl_res.pdb_id = res_pdb_id(res) fl_res.model_id = res_model_id(res) fl_res.pdb_index = res_pdb_index(res) fl_res.pdb_insertion_code = res_insertion_code(res) fl_res.chain_id = fl_chain.id_label fl_res.name3 = res.get_resname() # if the name in one letter does not exists, let default 'X' n = uf.aa_3to1.get(fl_res.name3) if n: fl_res.name1 = n # if the chain we are inserting in is not a DNA or RNA chain if not fl_chain.rna_dna_chain: # get the atoms coordinates of the residue, and the alpha carbon also separately for atom in res: fl_res.atoms_coord.append(atom.get_coord()) if atom.id == 'CA': fl_res.c_alpha_coord = atom.get_coord() if fl_res.c_alpha_coord is not None: # residues are inserted in order, so give it position equal to the length of the list fl_res.pos_in_chain = len(fl_chain.residues) # calculate distance between aplah carbons of this new residue and the previous one, to insert gap flags eventually if fl_chain.residues and uf.distance_3D( fl_chain.residues[-1].c_alpha_coord, fl_res. c_alpha_coord) < self.config["open_gap_threshold"]: fl_chain.residues[-1].has_next = True fl_res.has_prev = True # if the chain we are inserting in is a DNA or RNA chain else: # set the uniprot identifier as a string "DNA-RNA" fl_res.uniprot_id = "DNA-RNA" # add residue inside the chain object if fl_res.c_alpha_coord is not None or fl_chain.rna_dna_chain: fl_chain.string_index_map[fl_res.string_index()] = len( fl_chain.residues) fl_chain.residues.append(fl_res) # build the structure, giving it pdb_id as identigier def build_structure(self, pdb_id, model_id=0): # create Bio.PDB structure fl_struct = FlipperStructure(pdb_id, self.bio_struct[model_id].id) # for each chain from this structure (first model) for chain in self.bio_struct[model_id]: # create a FLipper chain with same id fl_chain = FlipperChain(chain.id) # if this chain is a DNA-RNA chain, set flag if is_DNA(chain): fl_chain.rna_dna_chain = True # for each residue in the chain for residue in chain: # if it is not hetero, add it (so DNA-RNA residues too) if is_good_res(residue): self.add_residue(fl_chain, residue) # add chain to the structure fl_struct.chains[chain.id] = fl_chain # return the structure return fl_struct # create the neighbors network for given struture def make_neighbors(self, fl_struct): # create an empty NeighborsNet nn = NeighborsNet() # use NeighborSearch from Bio.PDB to compute distances ns = NeighborSearch(list(self.bio_struct.get_atoms())) # for each chain in structure that is not a dna-rna one for fl_chain in fl_struct.get_chains(): if not fl_chain.rna_dna_chain: # for each residue in this chain for fl_res in fl_chain.residues: # add a default entry nn.add_default(fl_res) # keep track of already inserted nieghbors (the search is mate for each atom in the residue) already_have = [] # for each atom (coordinates) in the residue for atom_coord in fl_res.atoms_coord: # for each residue in range for res in ns.search(atom_coord, self.config["neighbors_range"], level='R'): # check if it is good atom and the same model, cause sometimes NS computes all models if is_good_res( res) and fl_res.model_id == res_model_id( res) and not res.get_full_id( ) in already_have: # try to get FlipperResidueAssociated pos_2 = fl_struct.chains[res_chain_id( res)].string_index_map.get( res_string_index(res)) # print(fl_res.get_full_identifier(), res.get_full_id(), fl_struct.chains[res_chain_id(res)].string_index_map.get(res_string_index(res))) if not pos_2 == None: fl_res_2 = fl_struct.chains[res_chain_id( res)].residues[pos_2] # if the chain is the same if fl_res.chain_id == fl_res_2.chain_id: already_have.append(res.get_full_id()) if fl_res.pos_in_chain == fl_res_2.pos_in_chain: continue # if distance (as residue number) is less than threshold, then it is a short range neighbor if abs( fl_res.pos_in_chain - fl_res_2.pos_in_chain ) < self.config[ "long_short_threshold"] and not fl_chain.have_gaps( fl_res, fl_res_2): nn.add_short(fl_res, fl_res_2) # else it is a long rage neighbor else: nn.add_long(fl_res, fl_res_2) # if it is not in the same chain it is an inter chain neighbor else: nn.add_inter(fl_res, fl_res_2) already_have.append(res.get_full_id()) return nn
def test_parsers(self): """Extract polypeptides from 1A80.""" parser = MMCIFParser() fast_parser = FastMMCIFParser() structure = parser.get_structure("example", "PDB/1A8O.cif") f_structure = fast_parser.get_structure("example", "PDB/1A8O.cif") self.assertEqual(len(structure), 1) self.assertEqual(len(f_structure), 1) for ppbuild in [PPBuilder(), CaPPBuilder()]: # ========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) self.assertEqual(f_structure[0].serial_num, structure[0].serial_num) # First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) f_polypeptides = ppbuild.build_peptides(f_structure[0], False) self.assertEqual(len(polypeptides), 1) self.assertEqual(len(f_polypeptides), 1) pp = polypeptides[0] f_pp = f_polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 151) self.assertEqual(pp[-1].get_id()[1], 220) self.assertEqual(f_pp[0].get_id()[1], 151) self.assertEqual(f_pp[-1].get_id()[1], 220) # Check the sequence s = pp.get_sequence() f_s = f_pp.get_sequence() self.assertEqual(s, f_s) # enough to test this self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) # Here non-standard MSE are shown as M self.assertEqual( "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ" "NANPDCKTILKALGPGATLEEMMTACQG", str(s)) # ========================================================== # Now try strict version with only standard amino acids # Should ignore MSE 151 at start, and then break the chain # at MSE 185, and MSE 214,215 polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 3) # First fragment pp = polypeptides[0] self.assertEqual(pp[0].get_id()[1], 152) self.assertEqual(pp[-1].get_id()[1], 184) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s)) # Second fragment pp = polypeptides[1] self.assertEqual(pp[0].get_id()[1], 186) self.assertEqual(pp[-1].get_id()[1], 213) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s)) # Third fragment pp = polypeptides[2] self.assertEqual(pp[0].get_id()[1], 216) self.assertEqual(pp[-1].get_id()[1], 220) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TACQG", str(s))
def test_parsers(self): """Extract polypeptides from 1A80.""" parser = MMCIFParser() fast_parser = FastMMCIFParser() structure = parser.get_structure("example", "PDB/1A8O.cif") f_structure = fast_parser.get_structure("example", "PDB/1A8O.cif") self.assertEqual(len(structure), 1) self.assertEqual(len(f_structure), 1) for ppbuild in [PPBuilder(), CaPPBuilder()]: # ========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) self.assertEqual(f_structure[0].serial_num, structure[0].serial_num) # First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) f_polypeptides = ppbuild.build_peptides(f_structure[0], False) self.assertEqual(len(polypeptides), 1) self.assertEqual(len(f_polypeptides), 1) pp = polypeptides[0] f_pp = f_polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 151) self.assertEqual(pp[-1].get_id()[1], 220) self.assertEqual(f_pp[0].get_id()[1], 151) self.assertEqual(f_pp[-1].get_id()[1], 220) # Check the sequence s = pp.get_sequence() f_s = f_pp.get_sequence() self.assertEqual(s, f_s) # enough to test this self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) # Here non-standard MSE are shown as M self.assertEqual("MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ" "NANPDCKTILKALGPGATLEEMMTACQG", str(s)) # ========================================================== # Now try strict version with only standard amino acids # Should ignore MSE 151 at start, and then break the chain # at MSE 185, and MSE 214,215 polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 3) # First fragment pp = polypeptides[0] self.assertEqual(pp[0].get_id()[1], 152) self.assertEqual(pp[-1].get_id()[1], 184) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s)) # Second fragment pp = polypeptides[1] self.assertEqual(pp[0].get_id()[1], 186) self.assertEqual(pp[-1].get_id()[1], 213) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s)) # Third fragment pp = polypeptides[2] self.assertEqual(pp[0].get_id()[1], 216) self.assertEqual(pp[-1].get_id()[1], 220) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TACQG", str(s)) s_atoms = list(structure.get_atoms()) f_atoms = list(f_structure.get_atoms()) for atoms in [s_atoms, f_atoms]: self.assertEqual(len(atoms), 644) atom_names = ['N', 'CA', 'C', 'O', 'CB'] self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names) self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names) self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names) self.assertSequenceEqual([a.get_occupancy() for a in atoms[:5]], [1., 1., 1., 1., 1.]) self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray) coord = numpy.array([19.594, 32.367, 28.012], dtype=numpy.float32) numpy.testing.assert_array_equal(atoms[0].get_coord(), coord) self.assertEqual(atoms[0].get_bfactor(), 18.03) for atom in atoms: self.assertIsNone(atom.get_anisou())
def test_parsers(self): """Extract polypeptides from 1A80.""" parser = MMCIFParser() fast_parser = FastMMCIFParser() structure = parser.get_structure("example", "PDB/1A8O.cif") f_structure = fast_parser.get_structure("example", "PDB/1A8O.cif") self.assertEqual(len(structure), 1) self.assertEqual(len(f_structure), 1) for ppbuild in [PPBuilder(), CaPPBuilder()]: # ========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) self.assertEqual(f_structure[0].serial_num, structure[0].serial_num) # First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) f_polypeptides = ppbuild.build_peptides(f_structure[0], False) self.assertEqual(len(polypeptides), 1) self.assertEqual(len(f_polypeptides), 1) pp = polypeptides[0] f_pp = f_polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 151) self.assertEqual(pp[-1].get_id()[1], 220) self.assertEqual(f_pp[0].get_id()[1], 151) self.assertEqual(f_pp[-1].get_id()[1], 220) # Check the sequence s = pp.get_sequence() f_s = f_pp.get_sequence() self.assertEqual(s, f_s) # enough to test this self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) # Here non-standard MSE are shown as M self.assertEqual("MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ" "NANPDCKTILKALGPGATLEEMMTACQG", str(s)) # ========================================================== # Now try strict version with only standard amino acids # Should ignore MSE 151 at start, and then break the chain # at MSE 185, and MSE 214,215 polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 3) # First fragment pp = polypeptides[0] self.assertEqual(pp[0].get_id()[1], 152) self.assertEqual(pp[-1].get_id()[1], 184) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s)) # Second fragment pp = polypeptides[1] self.assertEqual(pp[0].get_id()[1], 186) self.assertEqual(pp[-1].get_id()[1], 213) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s)) # Third fragment pp = polypeptides[2] self.assertEqual(pp[0].get_id()[1], 216) self.assertEqual(pp[-1].get_id()[1], 220) s = pp.get_sequence() self.assertTrue(isinstance(s, Seq)) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TACQG", str(s))
def test_point_mutations_fast(self): """Test if FastMMCIFParser can parse point mutations correctly.""" self._run_point_mutation_tests(FastMMCIFParser(QUIET=True))
def openStructutre(pdbid: str, cifpath: str) -> Structure: return FastMMCIFParser(QUIET=True).get_structure(pdbid, cifpath)
def test_parsers(self): """Extract polypeptides from 1A80.""" parser = MMCIFParser() fast_parser = FastMMCIFParser() structure = parser.get_structure("example", "PDB/1A8O.cif") f_structure = fast_parser.get_structure("example", "PDB/1A8O.cif") self.assertEqual(len(structure), 1) self.assertEqual(len(f_structure), 1) for ppbuild in [PPBuilder(), CaPPBuilder()]: # ========================================================== # Check that serial_num (model column) is stored properly self.assertEqual(structure[0].serial_num, 1) self.assertEqual(f_structure[0].serial_num, structure[0].serial_num) # First try allowing non-standard amino acids, polypeptides = ppbuild.build_peptides(structure[0], False) f_polypeptides = ppbuild.build_peptides(f_structure[0], False) self.assertEqual(len(polypeptides), 1) self.assertEqual(len(f_polypeptides), 1) pp = polypeptides[0] f_pp = f_polypeptides[0] # Check the start and end positions self.assertEqual(pp[0].get_id()[1], 151) self.assertEqual(pp[-1].get_id()[1], 220) self.assertEqual(f_pp[0].get_id()[1], 151) self.assertEqual(f_pp[-1].get_id()[1], 220) # Check the sequence s = pp.get_sequence() f_s = f_pp.get_sequence() self.assertEqual(s, f_s) # enough to test this self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) # Here non-standard MSE are shown as M self.assertEqual( "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQ" "NANPDCKTILKALGPGATLEEMMTACQG", str(s), ) # ========================================================== # Now try strict version with only standard amino acids # Should ignore MSE 151 at start, and then break the chain # at MSE 185, and MSE 214,215 polypeptides = ppbuild.build_peptides(structure[0], True) self.assertEqual(len(polypeptides), 3) # First fragment pp = polypeptides[0] self.assertEqual(pp[0].get_id()[1], 152) self.assertEqual(pp[-1].get_id()[1], 184) s = pp.get_sequence() self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW", str(s)) # Second fragment pp = polypeptides[1] self.assertEqual(pp[0].get_id()[1], 186) self.assertEqual(pp[-1].get_id()[1], 213) s = pp.get_sequence() self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TETLLVQNANPDCKTILKALGPGATLEE", str(s)) # Third fragment pp = polypeptides[2] self.assertEqual(pp[0].get_id()[1], 216) self.assertEqual(pp[-1].get_id()[1], 220) s = pp.get_sequence() self.assertIsInstance(s, Seq) self.assertEqual(s.alphabet, generic_protein) self.assertEqual("TACQG", str(s)) s_atoms = list(structure.get_atoms()) f_atoms = list(f_structure.get_atoms()) for atoms in [s_atoms, f_atoms]: self.assertEqual(len(atoms), 644) atom_names = ["N", "CA", "C", "O", "CB"] self.assertSequenceEqual([a.get_name() for a in atoms[:5]], atom_names) self.assertSequenceEqual([a.get_id() for a in atoms[:5]], atom_names) self.assertSequenceEqual([a.get_fullname() for a in atoms[:5]], atom_names) self.assertSequenceEqual( [a.get_occupancy() for a in atoms[:5]], [1.0, 1.0, 1.0, 1.0, 1.0] ) self.assertIsInstance(atoms[0].get_coord(), numpy.ndarray) coord = numpy.array([19.594, 32.367, 28.012], dtype=numpy.float32) numpy.testing.assert_array_equal(atoms[0].get_coord(), coord) self.assertEqual(atoms[0].get_bfactor(), 18.03) for atom in atoms: self.assertIsNone(atom.get_anisou())
from asyncio import run def root_self(rootname: str = '') -> str: """Returns the rootpath for the project if it's unique in the current folder tree.""" root = os.path.abspath( __file__)[:os.path.abspath(__file__).find(rootname) + len(rootname)] sys.path.append(root) load_dotenv(os.path.join(root, '.env')) root_self('ribxz') from ciftools.Neoget import _neoget prs = FastMMCIFParser(QUIET=True) io = MMCIFIO() for pdbid in [ '1vy4', ]: pdbid = pdbid.upper() struct: Structure = prs.get_structure(f'{pdbid}', f'{pdbid}.cif') for chain in struct[0].child_list: strand_id = chain.id nomclass_result = _neoget( f"""match (r:RibosomeStructure{{rcsb_id: "{pdbid.upper()}"}})-[]-(rp:RibosomalProtein)-[]-(n:NomenclatureClass) where rp.entity_poly_strand_id = "{strand_id}" return n.class_id""") print(nomclass_result) if len(nomclass_result) > 0: