def test_pdb_element_names_from_amber(self): st = gemmi.read_pdb_string(AMBER_FRAGMENT) residue = st[0][''][0] self.assertEqual(residue.sole_atom('CB').element, gemmi.Element('C')) self.assertEqual(residue.sole_atom('HB').element, gemmi.Element('H')) self.assertEqual(residue.sole_atom('CG1').element, gemmi.Element('C')) self.assertEqual(residue.sole_atom('HG11').element, gemmi.Element('H')) chain = gemmi.read_pdb_string(FRAGMENT_WITH_HG)[0]['P'] self.assertEqual(chain[0].sole_atom('HG').element, gemmi.Element('Hg')) self.assertEqual(chain[1].sole_atom('HG1').element, gemmi.Element('Hg'))
def test_pdb_element_names_from_amber(self): st = gemmi.read_pdb_string(AMBER_FRAGMENT) residue = st[0][''][0] self.assertEqual(residue.sole_atom('CB').element, gemmi.Element('C')) self.assertEqual(residue.sole_atom('HB').element, gemmi.Element('H')) self.assertEqual(residue.sole_atom('CG1').element, gemmi.Element('C')) self.assertEqual(residue.sole_atom('HG11').element, gemmi.Element('H')) lines = AMBER_FRAGMENT.splitlines() for n, atom in enumerate(residue): self.assertEqual(atom.padded_name(), lines[n][12:16].rstrip()) chain = gemmi.read_pdb_string(FRAGMENT_WITH_HG)[0]['P'] self.assertEqual(chain[0].sole_atom('HG').element, gemmi.Element('Hg')) self.assertEqual(chain[1].sole_atom('HG1').element, gemmi.Element('Hg'))
def test_short_ssbond(self): st = gemmi.read_pdb_string(SHORT_SSBOND) out = st.make_pdb_headers() self.assertEqual( out.splitlines()[0], SHORT_SSBOND.splitlines()[0] + " 1555 1555 2.06 ")
def test_add_remove(self): st = gemmi.read_pdb_string(SSBOND_FRAGMENT) st.add_model(st[0]) st.renumber_models() res = st[0].sole_residue('A', gemmi.SeqId('310')) self.assertEqual(len(res), 1) res.remove_atom('SG', ' ') self.assertEqual(len(res), 0) res = st[1].sole_residue('A', gemmi.SeqId('310')) self.assertEqual(len(res), 1) self.assertEqual(len(st[0]['A']), 7) del st[0]['A'][3] self.assertEqual(len(st[0]['A']), 6) self.assertEqual(len(st), 2) self.assertEqual(st[0].name, '1') del st['1'] self.assertEqual(len(st), 1) self.assertEqual(st[0].name, '2') st.renumber_models() self.assertEqual(st[0].name, '1') st.add_model(st[0]) st.add_model(st[0]) st.renumber_models() self.assertEqual(st[0].name, '1') self.assertEqual(st[-1].name, '3') del st[:-1] self.assertEqual(len(st), 1) self.assertEqual(st[0].name, '3') del st[0] self.assertEqual(len(st), 0)
def get_reference_models(project_code): print("Project code is: {}".format(project_code)) summary = xcextracter(project_code) print(summary) print("Number of records is: {}".format(len(summary))) pdb_grabber = GetPdbData() reference_models = {} for index, row in summary.iterrows(): protein_code = row["protein_code"] # print(protein_code) dtag = protein_code_to_dtag(protein_code) # print(dtag) pdb_block = pdb_grabber.get_bound_pdb_file(protein_code) try: # print(pdb_block) structure = gemmi.read_pdb_string(pdb_block) model = structure reference_models[dtag] = model except Exception as e: print(e) reference_models[dtag] = None return reference_models
def test_pdb_misaligned_element(self): pdb_line = "ATOM 7 S SUB A 7 34.489 -14.293 34.343" \ " 0.29 43.77 S" for line in [pdb_line, pdb_line + '\n', pdb_line + '\r\n']: st = gemmi.read_pdb_string(line) atom = st[0].sole_residue('A', gemmi.SeqId('7')).sole_atom('S') self.assertEqual(atom.element.name, 'S')
def test_pdb_fragment(self): pdb_line = "HETATM 4154 MG MG A 341 1.384 19.340 11.968" \ " 1.00 67.64 MG" for line in [pdb_line, pdb_line.strip(' MG'), pdb_line[:-2] + ' ']: st = gemmi.read_pdb_string(line) mg_atom = st[0].sole_residue('A', 341, ' ')['MG'] self.assertEqual(mg_atom.element.name, 'Mg') self.assertAlmostEqual(mg_atom.b_iso, 67.64, delta=1e-6)
def test_pdb_element_names_from_trjconv(self): st = gemmi.read_pdb_string(TRJCONV_FRAGMENT) residue = st[0][''][0] expected = ['C', 'H', 'H', 'O', 'P', 'O', 'O', 'O'] lines = TRJCONV_FRAGMENT.splitlines() for n, atom in enumerate(residue): self.assertEqual(atom.element.name, expected[n]) self.assertEqual(atom.padded_name(), lines[n][12:16].rstrip())
def test_1gtv(self): st = gemmi.read_pdb_string(FRAGMENT_1GTV) a1 = st[0].sole_residue('A', 85, ' ')[0] subcells = gemmi.SubCells(st[0], st.cell, 5) marks = subcells.find_atoms(a1.pos, a1.altloc, 3) self.assertEqual(len(marks), 2) for mark in marks: d = subcells.dist(a1.pos, mark.pos()) self.assertAlmostEqual(d, 0, delta=5e-6)
def test_blank_chain(self): st = gemmi.read_pdb_string(BLANK_CHAIN_FRAGMENT) out_name = get_path_for_tempfile() st.write_minimal_pdb(out_name) out = read_lines_and_remove(out_name) # CRYST1 differs (50.000 not 50.00 and added P1). # ATOM lines have added element names. trimmed_out = [line[:66] for line in out[1:]] self.assertEqual(trimmed_out, BLANK_CHAIN_FRAGMENT.splitlines()[1:])
def test_remove_atom(self): st = gemmi.read_pdb_string(SSBOND_FRAGMENT) res = st[0].sole_residue('A', 310, ' ') self.assertEqual(len(res), 1) del res['SG'] self.assertEqual(len(res), 0) self.assertEqual(len(st), 1) self.assertEqual(st[0].name, '1') del st['1'] self.assertEqual(len(st), 0)
def test_pdb_element_names(self): pdb_line = "HETATM 4154 MG MG A 341 1.384 19.340 11.968" \ " 1.00 67.64 MG" for line in [pdb_line, pdb_line.strip(' MG'), pdb_line[:-2] + ' ']: st = gemmi.read_pdb_string(line) residue = st[0].sole_residue('A', gemmi.SeqId(341, ' ')) mg_atom = residue.sole_atom('MG') self.assertEqual(mg_atom.element.name, 'Mg') self.assertAlmostEqual(mg_atom.b_iso, 67.64, delta=1e-6) mg_atom.element = gemmi.Element('Cu') self.assertEqual(mg_atom.element.name, 'Cu')
def test_5a11(self): st = gemmi.read_pdb_string(FRAGMENT_5A11) a1 = st[0].sole_residue('A', 37, ' ')[0] sc = gemmi.SubCells(st[0], st.cell, 5) marks = sc.find_atoms(a1.pos, a1.altloc, 3) m1, m2 = sorted(marks, key=lambda m: sc.dist(a1.pos, m.pos())) self.assertAlmostEqual(sc.dist(a1.pos, m1.pos()), 0, delta=5e-6) self.assertAlmostEqual(sc.dist(a1.pos, m2.pos()), 0.13, delta=5e-3) cra2 = m2.to_cra(st[0]) self.assertEqual(cra2.chain.name, 'B') self.assertEqual(str(cra2.residue.seqid), '37') self.assertEqual(cra2.atom.name, 'SG')
def test_4hhh_frag(self): path = full_path('4hhh_frag.pdb') with open(path) as f: frag = f.read() st = gemmi.read_pdb_string(frag) in_headers = frag.splitlines() out_headers = st.make_pdb_headers().splitlines() self.assertEqual(in_headers[0], out_headers[0]) # the difference 4555 vs 2555 doesn't matter for us self.assertEqual(in_headers[1], out_headers[1].replace(' 4555 ', ' 2555 ')) self.assertEqual(in_headers[2], out_headers[2])
def test_1gtv(self): st = gemmi.read_pdb_string(FRAGMENT_1GTV) a1 = st[0].sole_residue('A', gemmi.SeqId(85, ' '))[0] ns = gemmi.NeighborSearch(st[0], st.cell, 5) ns.populate() marks = ns.find_atoms(a1.pos, a1.altloc, 3) self.assertEqual(len(marks), 2) for mark in marks: d = ns.dist(a1.pos, mark.pos()) self.assertAlmostEqual(d, 0, delta=5e-6) marks2 = ns.find_neighbors(a1, 0.1, 3) self.assertEqual(len(marks2), 0)
def parsestr(instream): """Parse structure file from a string. :param instream: Imported-to-string structure file. :type instream: str :return: Parsed structure :rtype: :obj:`gemmi.Structure` """ strout = gemmi.read_pdb_string(instream) return strout
def test_different_altloc_order(self): st = gemmi.read_pdb_string(UNORDERED_ALTLOC_FRAGMENT) chain = st[0]['A'] cb = chain['9']['SER']['CB'] cb_numbers = [atom.serial for atom in cb] self.assertEqual(cb_numbers, [59, 65]) self.assertEqual(cb[0].serial, 59) self.assertEqual(cb[1].serial, 65) self.assertEqual(cb[-1].serial, 65) self.assertEqual(cb[-2].serial, 59) self.assertEqual(chain.count_atom_sites(), 14) self.assertEqual(chain.count_occupancies(), 8) st.remove_alternative_conformations() self.assertEqual(chain.count_atom_sites(), 8)
def parse_structure3d(struct_str, filename, logger): t_start = timer() output_filename = filename if filename.endswith('.gz'): struct_str = gzip.decompress(struct_str) output_filename = filename[:-3] if output_filename.endswith('.cif'): cif_block = gemmi.cif.read_string(struct_str)[0] output_string = gemmi.make_structure_from_block(cif_block) elif output_filename.endswith('.pdb'): output_string = gemmi.read_pdb_string(struct_str) else: # Should never reach here raise RnaspiderUserError('Unknown file format was provided.') t_end = timer() logger.info( f"Finished parsing input. Elapsed time: {int((t_end - t_start) * 1000)} ms" ) return output_string
def test_5a11(self, use_populate=True): st = gemmi.read_pdb_string(FRAGMENT_5A11) a1 = st[0].sole_residue('A', gemmi.SeqId(37, ' '))[0] ns = gemmi.NeighborSearch(st[0], st.cell, 5) if use_populate: ns.populate() else: for n_ch, chain in enumerate(st[0]): for n_res, res in enumerate(chain): for n_atom, atom in enumerate(res): ns.add_atom(atom, n_ch, n_res, n_atom) marks = ns.find_atoms(a1.pos, a1.altloc, 3) m1, m2 = sorted(marks, key=lambda m: ns.dist(a1.pos, m.pos())) self.assertAlmostEqual(ns.dist(a1.pos, m1.pos()), 0, delta=5e-6) self.assertAlmostEqual(ns.dist(a1.pos, m2.pos()), 0.13, delta=5e-3) cra2 = m2.to_cra(st[0]) self.assertEqual(cra2.chain.name, 'B') self.assertEqual(str(cra2.residue.seqid), '37') self.assertEqual(cra2.atom.name, 'SG') marks2 = ns.find_neighbors(a1, 0.1, 3) self.assertEqual(len(marks2), 1) self.assertEqual(marks2[0], m2)
def test_ssbond_again(self): st = gemmi.read_pdb_string(SSBOND_FRAGMENT) doc = st.make_mmcif_document() st2 = gemmi.make_structure_from_block(doc[0]) out = st2.make_pdb_headers() self.assertEqual(out.splitlines(), SSBOND_FRAGMENT.splitlines()[:3])
def test_ssbond(self): st = gemmi.read_pdb_string(SSBOND_FRAGMENT) out = st.make_pdb_headers() self.assertEqual(out.splitlines(), SSBOND_FRAGMENT.splitlines()[:3])
def from_pdb_string(pdb_string: str): structure = gemmi.read_pdb_string(pdb_string) return Structure(structure)
def to_gemmi(self): structure = gemmi.read_pdb_string(self.string) return structure