def setUpClass(self): self.io = MMCIFIO() self.mmcif_parser = MMCIFParser() self.pdb_parser = PDBParser() with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) self.structure = self.pdb_parser.get_structure( "example", "PDB/1A8O.pdb") self.mmcif_file = "PDB/1A8O.cif" self.mmcif_multimodel_pdb_file = "PDB/1SSU_mod.pdb" self.mmcif_multimodel_mmcif_file = "PDB/1SSU_mod.cif"
def _writeLowLevel(self, fileName, dict): """ write a dictionary as cif file """ if fileName.endswith(".pdb"): print("Low level access to PDB is not implemented") else: if self.ioCIF is None: self.ioCIF = MMCIFIO() io = self.ioCIF io.set_dict(dict) io.save(fileName)
def test_mmcifio_multimodel(self): """Write a multi-model, multi-chain mmCIF file.""" pdb_struct = self.pdb_parser.get_structure( "1SSU_mod_pdb", self.mmcif_multimodel_pdb_file) mmcif_struct = self.mmcif_parser.get_structure( "1SSU_mod_mmcif", self.mmcif_multimodel_mmcif_file) io = MMCIFIO() for struct in [pdb_struct, mmcif_struct]: self.io.set_structure(struct) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct_in = self.mmcif_parser.get_structure( "1SSU_mod_in", filename) self.assertEqual(len(struct_in), 2) self.assertEqual(len(struct_in[1]), 2) self.assertEqual( round(float(struct_in[1]["B"][1]["N"].get_coord()[0]), 3), 6.259) finally: os.remove(filename)
def visualize_2DA(apo_2DA, holo_2DA, paper_apo_spans): """ Writes superimposed holo structure to a file, prints Pymol script which can be directly pasted in pymol. Printed Pymol script will: 1) automatically load both structures (superimposed holo from filesystem, apo from the internet) 2) create objects and selections for domains, and the two-domain arrangements 3) color the selections by domain, apo/holo and paper/ours - colors - ours more saturation, paper faded - red, yellow apo (first and second domain respectively) - green, blue holo 4) provide example usage in the last script paragraph """ # load the structure from file a = parse_mmcif(apo_2DA.pdb_code) h = parse_mmcif(holo_2DA.pdb_code) apo = a.structure holo = h.structure ###### vlozene z mainu apo_mapping = a.bio_to_mmcif_mappings[0][apo_2DA.d1.chain_id] holo_mapping = h.bio_to_mmcif_mappings[0][holo_2DA.d1.chain_id] # crop polypeptides to longest common substring c1_common_seq, c2_common_seq = get_longest_common_polypeptide(a.poly_seqs[apo_mapping.entity_poly_id], h.poly_seqs[holo_mapping.entity_poly_id]) c1_label_seq_ids = list(c1_common_seq.keys()) c2_label_seq_ids = list(c2_common_seq.keys()) label_seq_id_offset = c2_label_seq_ids[0] - c1_label_seq_ids[0] ###### end vlozene # get residues of the first domain, in both apo and holo structures apo_d1 = DomainResidues.from_domain(apo_2DA.d1, apo[0], apo_mapping) holo_d1 = DomainResidues.from_domain(holo_2DA.d1, holo[0], holo_mapping) # superimpose holo onto apo, using the first domain superimposed_holo_model = superimpose_structure(holo[0], holo_d1, apo_d1) # save the structure name = holo.id + f'_{holo_d1.domain_id}onto_{apo_d1.domain_id}' io = MMCIFIO() superimposed_holo = Structure(name) superimposed_holo.add(superimposed_holo_model) io.set_structure(superimposed_holo) sholo_file_path = Path(OUTPUT_DIR, name + '.cif') io.save(str(sholo_file_path), preserve_atom_numbering=True) def get_resi_selection(spans): selection = [] for from_, to in spans: selection.append(f'resi {from_}-{to}') return '(' + ' or '.join(selection) + ')' # convert paper spans to label seqs, so we can show them in Pymol def get_paper_domain(d: DomainResidueMapping, paper_spans, residue_id_mapping): # translate spans to label seq ids and return a domain object segment_beginnings = list(map(residue_id_mapping.find_label_seq, np.array(paper_spans)[:, 0].tolist())) segment_ends = list(map(residue_id_mapping.find_label_seq, np.array(paper_spans)[:, 1].tolist())) logger.debug(segment_beginnings) logger.debug(segment_ends) return DomainResidueMapping(d.domain_id, d.chain_id, segment_beginnings, segment_ends) logger.debug(paper_apo_spans) # [d1, d2] where d1 [(), (),...] paper_apo_drm1 = get_paper_domain(apo_2DA.d1, paper_apo_spans[0], apo_mapping) paper_apo_drm2 = get_paper_domain(apo_2DA.d2, paper_apo_spans[1], apo_mapping) label_seq_id_offset = c2_label_seq_ids[0] - c1_label_seq_ids[0] paper_holo_drm1 = DomainResidueMapping.from_domain_on_another_chain(paper_apo_drm1, holo_d1.chain_id, label_seq_id_offset) paper_holo_drm2 = DomainResidueMapping.from_domain_on_another_chain(paper_apo_drm2, holo_d1.chain_id, label_seq_id_offset) # same chain, for now, as in d1 # create highlight script (by the spans, or just create multiple selections) # copy the 2 structures to 4 (paper spans vs our spans), so we can color them differently # select only the domains (2), and make only them visible sholo = superimposed_holo pymol_script = f""" fetch {apo.id} load {sholo_file_path.absolute()} sele apo_d1, {apo.id} and chain {apo_2DA.d1.chain_id} and {get_resi_selection(apo_2DA.d1.get_spans())} sele apo_d2, {apo.id} and chain {apo_2DA.d2.chain_id} and {get_resi_selection(apo_2DA.d2.get_spans())} sele apo_2DA, apo_d1 or apo_d2 sele holo_d1, {sholo.id} and chain {holo_2DA.d1.chain_id} and {get_resi_selection(holo_2DA.d1.get_spans())} sele holo_d2, {sholo.id} and chain {holo_2DA.d2.chain_id} and {get_resi_selection(holo_2DA.d2.get_spans())} sele holo_2DA, holo_d1 or holo_d2 # copy objects, so we can color them differently copy paper_{apo.id}, {apo.id} copy paper_{sholo.id}, {sholo.id} sele paper_apo_d1, paper_{apo.id} and chain {apo_2DA.d1.chain_id} and {get_resi_selection(paper_apo_drm1.get_spans())} sele paper_apo_d2, paper_{apo.id} and chain {apo_2DA.d2.chain_id} and {get_resi_selection(paper_apo_drm2.get_spans())} sele paper_apo_2DA, paper_apo_d1 or paper_apo_d2 sele paper_holo_d1, paper_{sholo.id} and chain {holo_2DA.d1.chain_id} and {get_resi_selection(paper_holo_drm1.get_spans())} sele paper_holo_d2, paper_{sholo.id} and chain {holo_2DA.d2.chain_id} and {get_resi_selection(paper_holo_drm2.get_spans())} sele paper_holo_2DA, paper_holo_d1 or paper_holo_d2 color red, apo_d1 color yellow, apo_d2 color green, holo_d1 color blue, holo_d2 color salmon, paper_apo_d1 color paleyellow, paper_apo_d2 color palegreen, paper_holo_d1 color lightblue, paper_holo_d2 # example usage: hide; show surface, apo_2DA hide; show surface, paper_apo_2DA hide; show surface, holo_2DA hide; show surface, paper_holo_2DA hide; show surface, apo_2DA or holo_2DA or paper_apo_2DA or paper_holo_2DA """ print(pymol_script)
from Bio.PDB import PDBParser, MMCIFIO from Bio.PDB import PDBIO, MMCIFParser test_structures = ['1r70', '1zbl', '1zir', '3wu2'] for structure in test_structures: p = PDBParser() struc = p.get_structure("", f"../data/{structure}.pdb") io = MMCIFIO() io.set_structure(struc) io.save(f"pdb2cif_{structure}.cif") for structure in test_structures: p = PDBParser() struc = p.get_structure("", f"../data/{structure}.pdb") io = PDBIO() io.set_structure(struc) io.save(f"pdb2pdb{structure}.pdb") for structure in test_structures: p = MMCIFParser() struc = p.get_structure("", f"../data/{structure}.cif") io = MMCIFIO() io.set_structure(struc) io.save(f"cif2cif_{structure}.cif") for structure in test_structures: p = MMCIFParser() struc = p.get_structure("", f"../data/{structure}.cif") io = PDBIO() io.set_structure(struc)
def main(): ap = argparse.ArgumentParser(description=__doc__) ap.add_argument('infmt', choices=['pdb', 'mmcif'], help='File format of input files.') ap.add_argument('folder', type=pathlib.Path, help='Top-level folder with input files') ap.add_argument('--no-continue', action='store_true', default=False, help='Parses all input files, ignoring existing results.') ap.add_argument('--strict', action='store_true', default=False, help='Parse with PDBParser PERMISSIVE=0') args = ap.parse_args() # Setup logging setup_logging() permissive_bool = not args.strict if args.infmt == 'pdb': parser = PDBParser(PERMISSIVE=permissive_bool, QUIET=1) writer = PDBIO() elif args.infmt == 'mmcif': parser = MMCIFParser(QUIET=1) writer = MMCIFIO() flist = sorted(args.folder.rglob('*.gz')) xmllist = sorted(args.folder.rglob('*.xml')) if not args.no_continue and xmllist: logging.info(f'Found {len(xmllist)} existing result files') xmlset = {f.stem: f for f in xmllist} fset = {f.stem: f for f in flist} remainder = set(fset.keys()) - set(xmlset.keys()) logging.info(f'Resuming benchmark: {len(remainder)} files left') flist = sorted(fset[f] for f in remainder) else: logging.info(f'Found {len(flist)} files') n_digits = len(str(len(flist))) # for fmting for idx, fpath in enumerate(flist, start=1): try: # Parse with gzip.open(fpath, mode='rt') as handle: t0 = time.time() s = parser.get_structure(fpath.name, handle) t1 = time.time() read_time = t1 - t0 data = summarize_structure(s) # Write writer.set_structure(s) t0 = time.time() writer.save('io.temp') t1 = time.time() write_time = t1 - t0 # Round-trip s2 = parser.get_structure('new', 'io.temp') data2 = summarize_structure(s2) assert data == data2, f'Summaries differ: {data} != {data2}' test_element_assignment(s) # raises assert if failed except Exception as err: with fpath.with_suffix('.failed').open('w') as f: print(err, file=f) print(traceback.format_exc(), file=f) status = 'failed' else: # Write XML file with numbers root = Element('structure') root.set('path', fpath.name) root.set('parse_time', f'{read_time:5.3f}') root.set('write_time', f'{write_time:5.3f}') for key, value in data.items(): child = SubElement(root, key) child.text = str(value) # Reparse for pretty print xml = minidom.parseString(tostring(root, 'utf-8')) # Write to file with fpath.with_suffix('.xml').open('w') as f: f.write(xml.toprettyxml(indent=' ')) # Clear XML memory root.clear() xml.unlink() del root, xml status = 'ok' finally: try: os.remove('io.temp') except Exception: pass memusage = psutil.virtual_memory().percent logging.info( f'{idx:>{n_digits}d}/{len(flist)} {fpath.parent.name}/{fpath.name}: {status} | mem% = {memusage}', ) # to check for leaks
class WriteTest(unittest.TestCase): @classmethod def setUpClass(self): self.io = MMCIFIO() self.mmcif_parser = MMCIFParser() self.pdb_parser = PDBParser() with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) self.structure = self.pdb_parser.get_structure( "example", "PDB/1A8O.pdb") self.mmcif_file = "PDB/1A8O.cif" self.mmcif_multimodel_pdb_file = "PDB/1SSU_mod.pdb" self.mmcif_multimodel_mmcif_file = "PDB/1SSU_mod.cif" def test_mmcifio_write_structure(self): """Write a full structure using MMCIFIO.""" struct1 = self.structure # Write full model to temp file self.io.set_structure(struct1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.mmcif_parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(len(struct2), 1) self.assertEqual(nresidues, 158) finally: os.remove(filename) def test_mmcifio_write_residue(self): """Write a single residue using MMCIFIO.""" struct1 = self.structure residue1 = list(struct1.get_residues())[0] # Write full model to temp file self.io.set_structure(residue1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.mmcif_parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 1) finally: os.remove(filename) def test_mmcifio_write_residue_w_chain(self): """Write a single residue (chain id == X) using MMCIFIO.""" struct1 = self.structure.copy() # make copy so we can change it residue1 = list(struct1.get_residues())[0] # Modify parent id parent = residue1.parent parent.id = "X" # Write full model to temp file self.io.set_structure(residue1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.mmcif_parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 1) # Assert chain remained the same chain_id = [c.id for c in struct2.get_chains()][0] self.assertEqual(chain_id, "X") finally: os.remove(filename) def test_mmcifio_write_residue_wout_chain(self): """Write a single orphan residue using MMCIFIO.""" struct1 = self.structure residue1 = list(struct1.get_residues())[0] residue1.parent = None # detach residue # Write full model to temp file self.io.set_structure(residue1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.mmcif_parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 1) # Assert chain is default: "A" chain_id = [c.id for c in struct2.get_chains()][0] self.assertEqual(chain_id, "A") finally: os.remove(filename) def test_mmcifio_write_custom_residue(self): """Write a chainless residue using PDBIO.""" res = Residue.Residue((" ", 1, " "), "DUM", "") atm = Atom.Atom("CA", [0.1, 0.1, 0.1], 1.0, 1.0, " ", "CA", 1, "C") res.add(atm) # Ensure that set_structure doesn't alter parent parent = res.parent # Write full model to temp file self.io.set_structure(res) self.assertIs(parent, res.parent) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct2 = self.mmcif_parser.get_structure("res", filename) latoms = list(struct2.get_atoms()) self.assertEqual(len(latoms), 1) self.assertEqual(latoms[0].name, "CA") self.assertEqual(latoms[0].parent.resname, "DUM") self.assertEqual(latoms[0].parent.parent.id, "A") finally: os.remove(filename) def test_mmcifio_select(self): """Write a selection of the structure using a Select subclass.""" # Selection class to filter all alpha carbons class CAonly(Select): """Accepts only CA residues.""" def accept_atom(self, atom): if atom.name == "CA" and atom.element == "C": return 1 struct1 = self.structure # Write to temp file self.io.set_structure(struct1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename, CAonly()) struct2 = self.mmcif_parser.get_structure("1a8o", filename) nresidues = len(list(struct2.get_residues())) self.assertEqual(nresidues, 70) finally: os.remove(filename) def test_mmcifio_write_dict(self): """Write an mmCIF dictionary out, read it in and compare them.""" d1 = MMCIF2Dict(self.mmcif_file) # Write to temp file self.io.set_dict(d1) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) d2 = MMCIF2Dict(filename) k1 = sorted(d1.keys()) k2 = sorted(d2.keys()) self.assertEqual(k1, k2) for key in k1: self.assertEqual(d1[key], d2[key]) finally: os.remove(filename) def test_mmcifio_multimodel(self): """Write a multi-model, multi-chain mmCIF file.""" pdb_struct = self.pdb_parser.get_structure( "1SSU_mod_pdb", self.mmcif_multimodel_pdb_file) mmcif_struct = self.mmcif_parser.get_structure( "1SSU_mod_mmcif", self.mmcif_multimodel_mmcif_file) io = MMCIFIO() for struct in [pdb_struct, mmcif_struct]: self.io.set_structure(struct) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: self.io.save(filename) struct_in = self.mmcif_parser.get_structure( "1SSU_mod_in", filename) self.assertEqual(len(struct_in), 2) self.assertEqual(len(struct_in[1]), 2) self.assertEqual( round(float(struct_in[1]["B"][1]["N"].get_coord()[0]), 3), 6.259) finally: os.remove(filename)