def test_from_url(self): """Check parser can fetch a record from its PDB ID.""" parser = MMTFParser() with warnings.catch_warnings(): warnings.simplefilter('ignore', PDBConstructionWarning) struct = parser.get_structure_from_url("4ZHL") atoms = [x for x in struct.get_atoms()] self.assertEqual(len(atoms), 2080)
def test_multi_model_write(self): """Test multiple models are written out correctly to MMTF.""" parser = PDBParser() struc = parser.get_structure("1SSU_mod", "PDB/1SSU_mod.pdb") io = MMTFIO() io.set_structure(struc) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: io.save(filename) struc_back = MMTFParser.get_structure(filename) dict_back = mmtf.parse(filename) self.assertEqual(dict_back.num_models, 2) self.assertEqual(dict_back.num_chains, 4) self.assertEqual(dict_back.num_groups, 4) self.assertEqual(dict_back.num_atoms, 4) self.assertEqual(list(dict_back.x_coord_list), [-1.058, -0.025, 7.024, 6.259]) self.assertEqual(dict_back.chain_id_list, ["A", "B", "A", "B"]) self.assertEqual(dict_back.chain_name_list, ["A", "B", "A", "B"]) self.assertEqual(dict_back.chains_per_model, [2, 2]) self.assertEqual(len(dict_back.group_list), 1) self.assertEqual(len(dict_back.group_id_list), 4) self.assertEqual(len(dict_back.group_type_list), 4) self.assertEqual(dict_back.groups_per_chain, [1, 1, 1, 1]) self.assertEqual(len(dict_back.entity_list), 4) finally: os.remove(filename)
def check_mmtf_vs_cif(self, mmtf_filename, cif_filename): """Compare parsed structures for MMTF and CIF files.""" with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) mmtf_struct = MMTFParser.get_structure(mmtf_filename) mmcif_parser = MMCIFParser() mmcif_struct = mmcif_parser.get_structure("4CUP", cif_filename) self.mmcif_atoms = list(mmcif_struct.get_atoms()) self.mmtf_atoms = list(mmtf_struct.get_atoms()) self.check_atoms() mmcif_chains = list(mmcif_struct.get_chains()) mmtf_chains = list(mmtf_struct.get_chains()) self.assertEqual(len(mmcif_chains), len(mmtf_chains)) for i, e in enumerate(mmcif_chains): self.mmcif_res = list(mmcif_chains[i].get_residues()) self.mmtf_res = list(mmtf_chains[i].get_residues()) self.check_residues() self.mmcif_res = list(mmcif_struct.get_residues()) self.mmtf_res = list(mmtf_struct.get_residues()) self.check_residues() self.assertEqual( sum(1 for _ in mmcif_struct.get_models()), sum(1 for _ in mmtf_struct.get_models()), )
def downloadUsingMmtf(pdbId, fnameOut, maxNumberOfChains=MAX_NUMBER_OF_CHAINS): print("downloadUsingMmtf") try: parser = MMTFParser() struct = parser.get_structure_from_url(pdbId) if not 0 in struct: return False if len(struct[0]) > maxNumberOfChains: raise NoValidPDBFile( "The maximun number of allowed chains is %d (%d) for %s" % (maxNumberOfChains, len(struct[0]), pdbId)) writter = PDBIO() writter.set_structure(struct) writter.save(fnameOut) return True except (Exception, ValueError, HTTPError) as e: print(e) if isinstance(e, NoValidPDBFile): raise e return False
def test_compare_to_mmcif(self): """Compre the MMTF and mmCIF parsed structrues""" def test_atoms(parse_mmtf): """Test that all atoms in self.mmtf_atoms and self.mmcif_atoms are equivalent""" parse_mmtf.assertEqual(len(parse_mmtf.mmcif_atoms), len(parse_mmtf.mmtf_atoms)) for i, e in enumerate(parse_mmtf.mmcif_atoms): mmtf_atom = parse_mmtf.mmtf_atoms[i] mmcif_atom = parse_mmtf.mmcif_atoms[i] parse_mmtf.assertEqual(mmtf_atom.name, mmcif_atom.name) # eg. CA, spaces are removed from atom name parse_mmtf.assertEqual(mmtf_atom.fullname, mmcif_atom.fullname) # e.g. " CA ", spaces included parse_mmtf.assertAlmostEqual(mmtf_atom.coord[0], mmcif_atom.coord[0], places=3) parse_mmtf.assertAlmostEqual(mmtf_atom.coord[1], mmcif_atom.coord[1], places=3) parse_mmtf.assertAlmostEqual(mmtf_atom.coord[2], mmcif_atom.coord[2], places=3) parse_mmtf.assertEqual(mmtf_atom.bfactor, mmcif_atom.bfactor) parse_mmtf.assertEqual(mmtf_atom.occupancy, mmcif_atom.occupancy) parse_mmtf.assertEqual(mmtf_atom.altloc, mmcif_atom.altloc) parse_mmtf.assertEqual(mmtf_atom.full_id, mmcif_atom.full_id) # (structure id, model id, chain id, residue id, atom id) parse_mmtf.assertEqual(mmtf_atom.id, mmcif_atom.name) # id of atom is the atom name (e.g. "CA") # self.assertEqual(mmtf_atom.serial_number,mmcif_atom.serial_number) # mmCIF serial number is none def test_residues(parse_mmtf): """Test that all residues in self.mmcif_res and self.mmtf_res are equivalent""" parse_mmtf.assertEqual(len(parse_mmtf.mmcif_res), len(parse_mmtf.mmtf_res)) for i, e in enumerate(parse_mmtf.mmcif_res): mmcif_r = parse_mmtf.mmcif_res[i] mmtf_r = parse_mmtf.mmtf_res[i] parse_mmtf.assertEqual(mmtf_r.level, mmcif_r.level) parse_mmtf.assertEqual(mmtf_r.disordered, mmcif_r.disordered) parse_mmtf.assertEqual(mmtf_r.resname, mmcif_r.resname) parse_mmtf.assertEqual(mmtf_r.segid, mmcif_r.segid) parse_mmtf.mmcif_atoms = [x for x in mmcif_r.get_atom()] parse_mmtf.mmtf_atoms = [x for x in mmtf_r.get_atom()] test_atoms(parse_mmtf=parse_mmtf) with warnings.catch_warnings(): warnings.simplefilter('ignore', PDBConstructionWarning) mmtf_struct = MMTFParser.get_structure("PDB/4CUP.mmtf") mmcif_parser = MMCIFParser() mmcif_struct = mmcif_parser.get_structure("example", "PDB/4CUP.cif") self.mmcif_atoms = [x for x in mmcif_struct.get_atoms()] self.mmtf_atoms = [x for x in mmtf_struct.get_atoms()] test_atoms(self) mmcif_chains = [x for x in mmcif_struct.get_chains()] mmtf_chains = [x for x in mmtf_struct.get_chains()] self.assertEqual(len(mmcif_chains), len(mmtf_chains)) for i, e in enumerate(mmcif_chains): self.mmcif_res = [x for x in mmcif_chains[i].get_residues()] self.mmtf_res = [x for x in mmtf_chains[i].get_residues()] test_residues(self) self.mmcif_res = [x for x in mmcif_struct.get_residues()] self.mmtf_res = [x for x in mmtf_struct.get_residues()] test_residues(self) self.assertEqual(len([x for x in mmcif_struct.get_models()]), len([x for x in mmtf_struct.get_models()]))
def test_selection_write(self): """Test the use of a Select subclass when writing MMTF files.""" struc = MMTFParser.get_structure("PDB/4CUP.mmtf") io = MMTFIO() io.set_structure(struc) filenumber, filename = tempfile.mkstemp() os.close(filenumber) class CAonly(Select): """Accepts only CA residues.""" def accept_atom(self, atom): if atom.name == "CA" and atom.element == "C": return 1 try: io.save(filename, CAonly()) struc_back = MMTFParser.get_structure(filename) dict_back = mmtf.parse(filename) self.assertEqual(dict_back.num_atoms, 116) self.assertEqual(len(dict_back.x_coord_list), 116) self.assertEqual(set(dict_back.alt_loc_list), {"\x00", "A", "B"}) finally: os.remove(filename)
def read_inputs(in_file, file_format, curr_model, chains): # Infer file format from extension file_format = file_format or os.path.basename(in_file).rsplit(".", 1)[-1] # Handle stdin if in_file == "-": contents = sys.stdin.read() struct_file = StringIO(contents) try: # Redirect stdin from pipe back to terminal sys.stdin = open("/dev/tty", "r") except: print( "Piping structures not supported on this system (no /dev/tty)") return None, None else: struct_file = in_file # Use Biopython parser by default get_coords = get_coords_biopython if file_format.lower() == "pdb": from Bio.PDB import PDBParser p = PDBParser() struc = p.get_structure("", struct_file) elif file_format.lower() in ("mmcif", "cif"): from Bio.PDB.MMCIFParser import MMCIFParser p = MMCIFParser() struc = p.get_structure("", struct_file) elif file_format.lower() == "mmtf": from Bio.PDB.mmtf import MMTFParser struc = MMTFParser.get_structure(struct_file) elif file_format.lower() in ("mae", "maegz"): from schrodinger import structure struc = list(structure.StructureReader(struct_file)) get_coords = get_coords_schrodinger else: print("Unrecognised file format") return None, None coords, info = get_coords(struc, chains) if coords is None or curr_model > len(coords): print("Nothing to show") return None, None return np.array(coords), info
def test_write(self): """Test a simple structure object is written out correctly to MMTF.""" parser = MMCIFParser() struc = parser.get_structure("1A8O", "PDB/1A8O.cif") io = MMTFIO() io.set_structure(struc) filenumber, filename = tempfile.mkstemp() os.close(filenumber) try: io.save(filename) struc_back = MMTFParser.get_structure(filename) dict_back = mmtf.parse(filename) self.assertEqual(dict_back.structure_id, "1A8O") self.assertEqual(dict_back.num_models, 1) self.assertEqual(dict_back.num_chains, 2) self.assertEqual(dict_back.num_groups, 158) self.assertEqual(dict_back.num_atoms, 644) self.assertEqual(len(dict_back.x_coord_list), 644) self.assertEqual(len(dict_back.y_coord_list), 644) self.assertEqual(len(dict_back.z_coord_list), 644) self.assertEqual(len(dict_back.b_factor_list), 644) self.assertEqual(len(dict_back.occupancy_list), 644) self.assertEqual(dict_back.x_coord_list[5], 20.022) self.assertEqual(set(dict_back.ins_code_list), {"\x00"}) self.assertEqual(set(dict_back.alt_loc_list), {"\x00"}) self.assertEqual(list(dict_back.atom_id_list), list(range(1, 645))) self.assertEqual(list(dict_back.sequence_index_list), list(range(70)) + [-1] * 88) self.assertEqual(dict_back.chain_id_list, ["A", "B"]) self.assertEqual(dict_back.chain_name_list, ["A", "A"]) self.assertEqual(dict_back.chains_per_model, [2]) self.assertEqual(len(dict_back.group_list), 21) self.assertEqual(len(dict_back.group_id_list), 158) self.assertEqual(len(dict_back.group_type_list), 158) self.assertEqual(dict_back.groups_per_chain, [70, 88]) self.assertEqual(len(dict_back.entity_list), 2) self.assertEqual(dict_back.entity_list[0]["type"], "polymer") self.assertEqual(dict_back.entity_list[0]["chainIndexList"], [0]) self.assertEqual( dict_back.entity_list[0]["sequence"], "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG", ) self.assertEqual(dict_back.entity_list[1]["type"], "water") self.assertEqual(dict_back.entity_list[1]["chainIndexList"], [1]) self.assertEqual(dict_back.entity_list[1]["sequence"], "") finally: os.remove(filename)
def loop_parsing(file_type, proteins, rep=10): cwd = os.getcwd() if file_type == 'mmtf': parser = MMTFParser() elif file_type == 'fast_cif': parser = FastMMCIFParser() elif file_type == 'cif': parser = MMCIFParser() else: parser = PDBParser() for p in proteins: if file_type == "fast_cif": file_type = "cif" directory = "%s/%s/%s.%s" % (cwd, file_type, p, file_type) try: if file_type == 'mmtf': protein = parser.get_structure(directory) else: protein = parser.get_structure(random.randint(0, 100), directory) except Exception: print("Having trouble parsing %s" % (p)) break return
def check_mmtf_vs_cif(self, mmtf_filename, cif_filename): """Compare parsed structures for MMTF and CIF files.""" with warnings.catch_warnings(): warnings.simplefilter('ignore', PDBConstructionWarning) mmtf_struct = MMTFParser.get_structure(mmtf_filename) mmcif_parser = MMCIFParser() mmcif_struct = mmcif_parser.get_structure("example", cif_filename) self.mmcif_atoms = [x for x in mmcif_struct.get_atoms()] self.mmtf_atoms = [x for x in mmtf_struct.get_atoms()] self.check_atoms() mmcif_chains = [x for x in mmcif_struct.get_chains()] mmtf_chains = [x for x in mmtf_struct.get_chains()] self.assertEqual(len(mmcif_chains), len(mmtf_chains)) for i, e in enumerate(mmcif_chains): self.mmcif_res = [x for x in mmcif_chains[i].get_residues()] self.mmtf_res = [x for x in mmtf_chains[i].get_residues()] self.check_residues() self.mmcif_res = [x for x in mmcif_struct.get_residues()] self.mmtf_res = [x for x in mmtf_struct.get_residues()] self.check_residues() self.assertEqual(len([x for x in mmcif_struct.get_models()]), len([x for x in mmtf_struct.get_models()]))
def get_structure_from_file(filename, **kwargs): # format argument is unused force_format = format = 'auto' if 'format' in kwargs.keys(): force_format = kwargs['format'] (file_id, filename_format, zip_flag) = parse_structure_filename(filename) cif_parser = MMCIFParser() pdb_parser = PDBParser() mmtf_parser = MMTFParser() if force_format != 'auto': format = force_format if format == 'auto': format = filename_format if format == 'pdb' or format == 'auto': parsers = [pdb_parser, cif_parser] elif format == 'mmtf' or format == 'auto': parsers = [mmtf_parser] else: parsers = [cif_parser, pdb_parser, mmtf_parser] for parser in parsers: try: if zip_flag: open_function = opengz else: open_function = open if format == 'mmtf': structure = parser.get_structure(filename) if structure: return structure with open_function(filename, 'r') as handle: structure = parser.get_structure(file_id, handle) if structure: return structure except: continue eprint("File {} could not be read".format(filename)) return None
def test_1A80(self): """Parse 1A8O.mmtf.""" with warnings.catch_warnings(): warnings.simplefilter("ignore", PDBConstructionWarning) structure = MMTFParser.get_structure("PDB/1A8O.mmtf")
def test_4ZHL(self): """Parse 4ZHL.mmtf""" with warnings.catch_warnings(): warnings.simplefilter('ignore', PDBConstructionWarning) structure = MMTFParser.get_structure("PDB/4ZHL.mmtf")
from Bio.PDB.PDBIO import PDBIO from Bio.PDB.PDBIO import Select from Bio.PDB.PDBParser import PDBParser from Bio.PDB.mmtf import MMTFParser from Bio.PDB.PDBExceptions import PDBConstructionWarning import os.path as op import logging import warnings import ssbio.utils from ssbio.biopython.bp_mmcifparser import MMCIFParserFix log = logging.getLogger(__name__) cifp = MMCIFParserFix(QUIET=True) pdbp = PDBParser(PERMISSIVE=True, QUIET=True) mmtfp = MMTFParser() def as_protein(structure, filter_residues=True): """ Exposes methods in the Bio.Struct.Protein module. Parameters: - filter_residues boolean; removes non-aa residues through Bio.PDB.Polypeptide is_aa function [Default: True] Returns a new structure object. """ from ssbio.biopython.Bio.Struct.Protein import Protein return Protein.from_structure(structure, filter_residues) class StructureIO(PDBIO):
def test_compare_to_mmcif(self): """Compre the MMTF and mmCIF parsed structrues""" def test_atoms(parse_mmtf): """Test that all atoms in self.mmtf_atoms and self.mmcif_atoms are equivalent""" parse_mmtf.assertEqual(len(parse_mmtf.mmcif_atoms), len(parse_mmtf.mmtf_atoms)) for i, e in enumerate(parse_mmtf.mmcif_atoms): mmtf_atom = parse_mmtf.mmtf_atoms[i] mmcif_atom = parse_mmtf.mmcif_atoms[i] parse_mmtf.assertEqual( mmtf_atom.name, mmcif_atom.name ) # eg. CA, spaces are removed from atom name parse_mmtf.assertEqual( mmtf_atom.fullname, mmcif_atom.fullname) # e.g. " CA ", spaces included parse_mmtf.assertAlmostEqual(mmtf_atom.coord[0], mmcif_atom.coord[0], places=3) parse_mmtf.assertAlmostEqual(mmtf_atom.coord[1], mmcif_atom.coord[1], places=3) parse_mmtf.assertAlmostEqual(mmtf_atom.coord[2], mmcif_atom.coord[2], places=3) parse_mmtf.assertEqual(mmtf_atom.bfactor, mmcif_atom.bfactor) parse_mmtf.assertEqual(mmtf_atom.occupancy, mmcif_atom.occupancy) parse_mmtf.assertEqual(mmtf_atom.altloc, mmcif_atom.altloc) parse_mmtf.assertEqual( mmtf_atom.full_id, mmcif_atom.full_id ) # (structure id, model id, chain id, residue id, atom id) parse_mmtf.assertEqual( mmtf_atom.id, mmcif_atom.name) # id of atom is the atom name (e.g. "CA") # self.assertEqual(mmtf_atom.serial_number,mmcif_atom.serial_number) # mmCIF serial number is none def test_residues(parse_mmtf): """Test that all residues in self.mmcif_res and self.mmtf_res are equivalent""" parse_mmtf.assertEqual(len(parse_mmtf.mmcif_res), len(parse_mmtf.mmtf_res)) for i, e in enumerate(parse_mmtf.mmcif_res): mmcif_r = parse_mmtf.mmcif_res[i] mmtf_r = parse_mmtf.mmtf_res[i] parse_mmtf.assertEqual(mmtf_r.level, mmcif_r.level) parse_mmtf.assertEqual(mmtf_r.disordered, mmcif_r.disordered) parse_mmtf.assertEqual(mmtf_r.resname, mmcif_r.resname) parse_mmtf.assertEqual(mmtf_r.segid, mmcif_r.segid) parse_mmtf.mmcif_atoms = [x for x in mmcif_r.get_atom()] parse_mmtf.mmtf_atoms = [x for x in mmtf_r.get_atom()] test_atoms(parse_mmtf=parse_mmtf) with warnings.catch_warnings(): warnings.simplefilter('ignore', PDBConstructionWarning) mmtf_struct = MMTFParser.get_structure("PDB/4CUP.mmtf") mmcif_parser = MMCIFParser() mmcif_struct = mmcif_parser.get_structure("example", "PDB/4CUP.cif") self.mmcif_atoms = [x for x in mmcif_struct.get_atoms()] self.mmtf_atoms = [x for x in mmtf_struct.get_atoms()] test_atoms(self) mmcif_chains = [x for x in mmcif_struct.get_chains()] mmtf_chains = [x for x in mmtf_struct.get_chains()] self.assertEqual(len(mmcif_chains), len(mmtf_chains)) for i, e in enumerate(mmcif_chains): self.mmcif_res = [x for x in mmcif_chains[i].get_residues()] self.mmtf_res = [x for x in mmtf_chains[i].get_residues()] test_residues(self) self.mmcif_res = [x for x in mmcif_struct.get_residues()] self.mmtf_res = [x for x in mmtf_struct.get_residues()] test_residues(self) self.assertEqual(len([x for x in mmcif_struct.get_models()]), len([x for x in mmtf_struct.get_models()]))
def test_parser(): """Simply test that """ with warnings.catch_warnings(): warnings.simplefilter('ignore', PDBConstructionWarning) structure = MMTFParser.get_structure("PDB/4CUP.mmtf")
def test_cif(self): """Parse MMTF file.""" with warnings.catch_warnings(): warnings.simplefilter('ignore', PDBConstructionWarning) structure = MMTFParser.get_structure("PDB/1EJG.mmtf") print(structure)
def view_protein(in_file, file_format=None, curr_model=1, chains=[], box_size=100.0): if box_size < 10.0 or box_size > 400.0: print("Box size must be between 10 and 400") return zoom_speed = 1.1 trans_speed = 1.0 rot_speed = 0.1 spin_speed = 0.01 action_count = 500 auto_spin = False cycle_models = False # Infer file format from extension if file_format is None: file_format = os.path.basename(in_file).rsplit(".", 1)[-1] # Handle stdin if in_file == "-": contents = sys.stdin.read() struct_file = StringIO(contents) try: # Redirect stdin from pipe back to terminal sys.stdin = open("/dev/tty", "r") except: print( "Piping structures not supported on this system (no /dev/tty)") return else: struct_file = in_file if file_format.lower() == "pdb": from Bio.PDB import PDBParser p = PDBParser() struc = p.get_structure("", struct_file) elif file_format.lower() in ("mmcif", "cif"): from Bio.PDB.MMCIFParser import MMCIFParser p = MMCIFParser() struc = p.get_structure("", struct_file) elif file_format.lower() == "mmtf": from Bio.PDB.mmtf import MMTFParser struc = MMTFParser.get_structure(struct_file) else: print("Unrecognised file format") return # Get backbone coordinates coords = [] connections = [] atom_counter, res_counter = 0, 0 chain_ids = [] for mi, model in enumerate(struc): model_coords = [] for chain in model: chain_id = chain.get_id() if len(chains) > 0 and chain_id not in chains: continue if mi == 0: chain_ids.append(chain_id) for res in chain: if mi == 0: res_counter += 1 res_n = res.get_id()[1] for atom in res: if mi == 0: atom_counter += 1 if atom.get_name() in ( "N", "CA", "C", # Protein "P", "O5'", "C5'", "C4'", "C3'", "O3'", # Nucleic acid ): if mi == 0 and len(model_coords) > 0: # Determine if the atom is connected to the previous atom connections.append(chain_id == last_chain_id and (res_n == (last_res_n + 1) or res_n == last_res_n)) model_coords.append(atom.get_coord()) last_chain_id, last_res_n = chain_id, res_n model_coords = np.array(model_coords) if mi == 0: if model_coords.shape[0] == 0: print("Nothing to show") return coords_mean = model_coords.mean(0) model_coords -= coords_mean # Center on origin of first model coords.append(model_coords) coords = np.array(coords) if curr_model > len(struc): print("Can't find that model") return info_str = "{} with {} models, {} chains ({}), {} residues, {} atoms".format( os.path.basename(in_file), len(struc), len(chain_ids), "".join(chain_ids), res_counter, atom_counter) # Make square bounding box of a set size and determine zoom x_min, x_max = float(coords[curr_model - 1, :, 0].min()), float(coords[curr_model - 1, :, 0].max()) y_min, y_max = float(coords[curr_model - 1, :, 1].min()), float(coords[curr_model - 1, :, 1].max()) x_diff, y_diff = x_max - x_min, y_max - y_min box_bound = float(np.max([x_diff, y_diff])) + 2.0 zoom = box_size / box_bound x_min = zoom * (x_min - (box_bound - x_diff) / 2.0) x_max = zoom * (x_max + (box_bound - x_diff) / 2.0) y_min = zoom * (y_min - (box_bound - y_diff) / 2.0) y_max = zoom * (y_max + (box_bound - y_diff) / 2.0) # See https://stackoverflow.com/questions/13207678/whats-the-simplest-way-of-detecting-keyboard-input-in-python-from-the-terminal/13207724 fd = sys.stdin.fileno() oldterm = termios.tcgetattr(fd) newattr = termios.tcgetattr(fd) newattr[3] = newattr[3] & ~termios.ICANON & ~termios.ECHO termios.tcsetattr(fd, termios.TCSANOW, newattr) oldflags = fcntl.fcntl(fd, fcntl.F_GETFL) fcntl.fcntl(fd, fcntl.F_SETFL, oldflags | os.O_NONBLOCK) canvas = Canvas() trans_x, trans_y = 0.0, 0.0 rot_x, rot_y = 0.0, 0.0 try: while True: os.system("clear") points = [] for x_start, y_start, x_end, y_end in ( (x_min, y_min, x_max, y_min), (x_max, y_min, x_max, y_max), (x_max, y_max, x_min, y_max), (x_min, y_max, x_min, y_min), ): for x, y in line(x_start, y_start, x_end, y_end): points.append([x, y]) rot_mat_x = np.array([ [1.0, 0.0, 0.0], [0.0, np.cos(rot_x), -np.sin(rot_x)], [0.0, np.sin(rot_x), np.cos(rot_x)], ], dtype=np.float32) rot_mat_y = np.array([ [np.cos(rot_y), 0.0, np.sin(rot_y)], [0.0, 1.0, 0.0], [-np.sin(rot_y), 0.0, np.cos(rot_y)], ], dtype=np.float32) trans_coords = coords[curr_model - 1] + np.array( [trans_x, trans_y, 0.0], dtype=np.float32) zoom_rot_coords = zoom * np.matmul( rot_mat_y, np.matmul(rot_mat_x, trans_coords.T)).T for i in range(coords.shape[1] - 1): if connections[i]: x_start, x_end = float(zoom_rot_coords[i, 0]), float( zoom_rot_coords[i + 1, 0]) y_start, y_end = float(zoom_rot_coords[i, 1]), float( zoom_rot_coords[i + 1, 1]) if x_min < x_start < x_max and x_min < x_end < x_max and y_min < y_start < y_max and y_min < y_end < y_max: for x, y in line(x_start, y_start, x_end, y_end): points.append([x, y]) print(info_str) print( "W/A/S/D rotates, T/F/G/H moves, I/O zooms, U spins, P cycles models, Q quits" ) canvas.clear() for x, y in points: canvas.set(x, y) print(canvas.frame()) counter = 0 while True: if auto_spin or cycle_models: counter += 1 if counter == action_count: if auto_spin: rot_y += spin_speed if cycle_models: curr_model += 1 if curr_model > len(struc): curr_model = 1 break try: k = sys.stdin.read(1) if k: if k.upper() == "O": zoom /= zoom_speed elif k.upper() == "I": zoom *= zoom_speed elif k.upper() == "F": trans_x -= trans_speed elif k.upper() == "H": trans_x += trans_speed elif k.upper() == "G": trans_y -= trans_speed elif k.upper() == "T": trans_y += trans_speed elif k.upper() == "S": rot_x -= rot_speed elif k.upper() == "W": rot_x += rot_speed elif k.upper() == "A": rot_y -= rot_speed elif k.upper() == "D": rot_y += rot_speed elif k.upper() == "U": auto_spin = not auto_spin elif k.upper() == "P" and len(struc) > 1: cycle_models = not cycle_models elif k.upper() == "Q": return break except IOError: pass finally: termios.tcsetattr(fd, termios.TCSAFLUSH, oldterm) fcntl.fcntl(fd, fcntl.F_SETFL, oldflags)
from Bio.PDB.mmtf import MMTFParser # read structure from file structure = MMTFParser.get_structure("PDB/4CUP.mmtf") # read structure from PDB structure = MMTFParser.get_structure_from_url("4CUP")
from Bio.SeqRecord import SeqRecord from Bio.PDB import Selection from Bio.PDB import NeighborSearch from Bio.PDB.DSSP import DSSP from Bio.PDB.NACCESS import * from Bio.PDB import PDBIO thr = 6 pdb_list = open('../437_dimers_list.merge.tsv') for pdb_chi_chj in pdb_list: print(pdb_chi_chj) x = pdb_chi_chj.rstrip().split("\t") pdb = x[0] chi = x[1] chj = x[2] p = MMTFParser() structure = MMTFParser.get_structure_from_url(pdb) s = structure[0] atom_list = [atom for atom in s[chi].get_atoms() if atom.name != 'H'] atom_list.extend([atom for atom in s[chj].get_atoms() if atom.name != 'H']) RRI = NeighborSearch(atom_list).search_all(thr, 'A') MAP = {} for rri in RRI: if (rri[0].get_parent().get_id()[0][0:2] == "H_" or rri[0].get_parent().get_id()[0] == 'W'): #print(rri[0].get_parent().get_id()[0][0:2]) continue if (rri[1].get_parent().get_id()[0][0:2] == "H_" or rri[1].get_parent().get_id()[0] == 'W'):
def test_1A80(self): """Parse 1A8O.mmtf""" with warnings.catch_warnings(): warnings.simplefilter('ignore', PDBConstructionWarning) structure = MMTFParser.get_structure("PDB/1A8O.mmtf")
class Rebuild(unittest.TestCase): """Read PDB and mmCIF structures, convert to/from internal coordinates.""" PDB_parser = PDBParser(PERMISSIVE=True, QUIET=True) CIF_parser = MMCIFParser(QUIET=True) MMTF_parser = MMTFParser() pdb_1LCD = PDB_parser.get_structure("1LCD", "PDB/1LCD.pdb") # cif_1A7G = CIF_parser.get_structure("1A7G", "PDB/1A7G.cif") # cif_1A7G2 = CIF_parser.get_structure("1A7G", "PDB/1A7G.cif") pdb_2XHE = PDB_parser.get_structure("2XHE", "PDB/2XHE.pdb") pdb_2XHE2 = PDB_parser.get_structure("2XHE", "PDB/2XHE.pdb") cif_3JQH = CIF_parser.get_structure("3JQH", "PDB/3JQH.cif") cif_4CUP = CIF_parser.get_structure("4CUP", "PDB/4CUP.cif") cif_4CUP2 = CIF_parser.get_structure("4CUP", "PDB/4CUP.cif") cif_4ZHL = CIF_parser.get_structure("4ZHL", "PDB/4ZHL.cif") cif_4ZHL2 = CIF_parser.get_structure("4ZHL", "PDB/4ZHL.cif") with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", PDBConstructionWarning) mmtf_1A8O = MMTF_parser.get_structure("PDB/1A8O.mmtf") def test_mmtf(self): chain = next(self.mmtf_1A8O.get_chains()) ic_chain = IC_Chain(chain) self.assertEqual(len(ic_chain.ordered_aa_ic_list), 70) def test_rebuild_multichain_missing(self): """Convert multichain missing atom struct to, from internal coords.""" # 2XHE has regions of missing chain, last residue has only N r = structure_rebuild_test(self.pdb_2XHE, False) self.assertEqual(r["residues"], 787) self.assertEqual(r["rCount"], 835) self.assertEqual(r["rMatchCount"], 835) self.assertEqual(r["aCount"], 6267) self.assertEqual(r["disAtmCount"], 0) self.assertEqual(r["aCoordMatchCount"], 6267) self.assertEqual(len(r["chains"]), 2) self.assertTrue(r["pass"]) def test_rebuild_disordered_atoms_residues(self): """Convert disordered protein to internal coordinates and back.""" # 3jqh has both disordered residues # and disordered atoms in ordered residues with warnings.catch_warnings(record=True): warnings.simplefilter("always", PDBConstructionWarning) r = structure_rebuild_test(self.cif_3JQH, False) # print(r) self.assertEqual(r["residues"], 26) self.assertEqual(r["rCount"], 47) self.assertEqual(r["rMatchCount"], 47) self.assertEqual(r["aCount"], 217) self.assertEqual(r["disAtmCount"], 50) self.assertEqual(r["aCoordMatchCount"], 217) self.assertEqual(len(r["chains"]), 1) self.assertTrue(r["pass"]) def test_no_crosstalk(self): """Deep copy, change few internal coords, test nothing else changes.""" # IC_Chain.ParallelAssembleResidues = False self.cif_4CUP.atom_to_internal_coordinates() cpy4cup = copy.deepcopy(self.cif_4CUP) cic0 = self.cif_4CUP.child_list[0].child_list[0].internal_coord cic1 = cpy4cup.child_list[0].child_list[0].internal_coord alist = [ "omg", "phi", "psi", "chi1", "chi2", "chi3", "chi4", "chi5", "tau", ] delta = 33 # degrees to change tdelta = delta / 10.0 # more realistic for bond angle targPos = 1 for ang in alist: # skip by 2's with alist along original chain changing angle spec ricTarg = cic0.chain.child_list[targPos].internal_coord # print(targPos + 1, ricTarg.lc, ang) targPos += 2 try: edr = ricTarg.pick_angle(ang) andx = edr.ndx if ang == "tau": cic0.hedraAngle[andx] += tdelta cic0.hAtoms_needs_update[andx] = True cic0.atomArrayValid[cic0.h2aa[andx]] = False cic0.hAtoms_needs_update[:] = True cic0.atomArrayValid[:] = False cic0.dAtoms_needs_update[:] = True else: cic0.dihedraAngle[andx] += delta if cic0.dihedraAngle[andx] > 180.0: cic0.dihedraAngle[andx] -= 360.0 cic0.dihedraAngleRads[andx] = np.deg2rad(cic0.dihedraAngle[andx]) cic0.dAtoms_needs_update[andx] = True cic0.atomArrayValid[cic0.d2aa[andx]] = False # test Dihedron.bits() pfd = IC_Residue.picFlagsDict if ricTarg.rbase[2] == "P" and ang == "omg": self.assertEqual(edr.bits(), (pfd["omg"] | pfd["pomg"])) else: self.assertEqual(edr.bits(), pfd[ang]) except AttributeError: pass # skip if residue does not have e.g. chi5 cic0.internal_to_atom_coordinates() # move atoms cic0.atom_to_internal_coordinates() # get new internal coords # generate hdelta and ddelta difference arrays so can look for what # changed hdelta = cic0.hedraAngle - cic1.hedraAngle hdelta[np.abs(hdelta) < 0.00001] = 0.0 ddelta = cic0.dihedraAngle - cic1.dihedraAngle ddelta[np.abs(ddelta) < 0.00001] = 0.0 ddelta[ddelta < -180.0] += 360.0 # wrap around circle values targPos = 1 for ang in alist: # same skip along original chain looking at hdelta and ddelta # if change is as specified, set difference to 0 then we can test # for any remaining (spurious) changes ricTarg = cic0.chain.child_list[targPos].internal_coord # print(targPos + 1, ricTarg.lc, ang) targPos += 2 try: andx = ricTarg.pick_angle(ang).ndx if ang == "tau": self.assertAlmostEqual(hdelta[andx], tdelta, places=4) hdelta[andx] = 0.0 # some other angle has to change to accommodate tau change # N-Ca-Cb is artifact of choices in ic_data # expected change so clear relevant hdelta here adjAngNdx = ricTarg.pick_angle("N:CA:CB").ndx self.assertNotAlmostEqual(hdelta[adjAngNdx], 0.0, places=1) hdelta[adjAngNdx] = 0.0 else: self.assertAlmostEqual(ddelta[andx], delta, places=4) ddelta[andx] = 0.0 except AttributeError: pass # if residue does not have e.g. chi5 hsum = hdelta.sum() self.assertEqual(hsum, 0.0) dsum = ddelta.sum() self.assertEqual(dsum, 0.0) def test_model_change_internal_coords(self): """Get model internal coords, modify psi and chi1 values and check.""" mdl = self.pdb_1LCD[1] mdl.atom_to_internal_coordinates() # other tests show can build with arbitrary internal coords # build here so changes below trigger more complicated # Atoms_needs_update mask arrays mdl.internal_to_atom_coordinates() nvt = {} nvc1 = {} nvpsi = {} nvlen = {} tcount = 0 c1count = 0 psicount = 0 lcount = 0 for r in mdl.get_residues(): ric = r.internal_coord if ric: # hedra change tau = ric.get_angle("tau") if ric.rprev != [] and tau is not None: tcount += 1 nv = tau + 0.5 ric.set_angle("tau", nv) nvt[str(r)] = nv # sidechain dihedron change chi1 = ric.get_angle("chi1") if chi1 is not None: c1count += 1 nv = chi1 + 90 if nv > 180.0: nv -= 360.0 # ric.set_angle("chi1", nv) ric.bond_set("chi1", nv) nvc1[str(r)] = nv # backbone dihedron change psi = ric.get_angle("psi") if psi is not None: psicount += 1 nv = psi - 90 if nv < -180.0: nv += 360.0 ric.set_angle("psi", nv) nvpsi[str(r)] = nv leng = ric.get_length("CA:CB") if leng is not None: lcount += 1 nv = leng + 0.05 ric.set_length("CA:CB", nv) nvlen[str(r)] = nv mdl.internal_to_atom_coordinates() # prove not using stored results for chn in mdl.get_chains(): if hasattr(chn, "hedraLen"): delattr(chn.internal_coord, "hedraLen") delattr(chn.internal_coord, "dihedraLen") delattr(chn.internal_coord, "hedraAngle") delattr(chn.internal_coord, "dihedraAngle") for r in chn.get_residues(): r.internal_coord.hedra = {} r.internal_coord.dihedra = {} mdl.atom_to_internal_coordinates() ttcount = 0 c1tcount = 0 psitcount = 0 ltcount = 0 for r in mdl.get_residues(): ric = r.internal_coord if ric: tau = ric.get_angle("tau") if ric.rprev != [] and tau is not None: ttcount += 1 # print(str(r), "tau", tau, nvt[str(r)]) self.assertAlmostEqual(tau, nvt[str(r)], places=3) chi1 = ric.get_angle("chi1") if chi1 is not None: c1tcount += 1 # print(str(r), "chi1", chi1, nvc1[str(r)]) self.assertAlmostEqual(chi1, nvc1[str(r)], places=3) psi = ric.get_angle("psi") if psi is not None: psitcount += 1 # print(str(r), "psi", psi, nvpsi[str(r)]) self.assertAlmostEqual(psi, nvpsi[str(r)], places=3) leng = ric.get_length("CA:CB") if leng is not None: ltcount += 1 self.assertAlmostEqual(leng, nvlen[str(r)], places=3) self.assertEqual(tcount, ttcount) self.assertEqual(c1count, c1tcount) self.assertEqual(psicount, psitcount) self.assertEqual(lcount, ltcount) self.assertGreater(ttcount, 0) self.assertGreater(c1count, 0) self.assertGreater(psicount, 0) self.assertGreater(lcount, 0) def test_write_SCAD(self): """Check SCAD output plus MaxPeptideBond and Gly CB. SCAD tests: scaling, transform mtx, extra bond created (allBonds) """ sf = StringIO() write_SCAD( self.cif_4CUP2, sf, 10.0, pdbid="4cup", backboneOnly=True, includeCode=False, ) sf.seek(0) next_one = False with as_handle(sf, mode="r") as handle: for aline in handle.readlines(): if "// (1856_S_CB, 1856_S_CA, 1856_S_C)" in aline: m = re.search(r"\[\s+(\d+\.\d+)\,", aline) if m: # test correctly scaled atom bond length self.assertAlmostEqual(float(m.group(1)), 15.30582, places=3) else: self.fail("scaled atom bond length not found") elif '[ 1, "1857M",' in aline: next_one = True elif next_one: next_one = False # test last residue transform looks roughly correct # some differences due to sorting issues on different # python versions target = [-12.413, -3.303, 35.771, 1.0] ms = re.findall( # last column of each row r"\s+(-?\d+\.\d+)\s+\]", aline ) if ms: for i in range(0, 3): self.assertAlmostEqual(float(ms[i]), target[i], places=0) else: self.fail("transform not found") sf.seek(0) IC_Residue.gly_Cbeta = True IC_Chain.MaxPeptideBond = 100.0 chn = self.pdb_2XHE2[0]["A"] chn.atom_to_internal_coordinates() rt0 = chn.internal_coord.ordered_aa_ic_list[12] rt1 = chn.internal_coord.ordered_aa_ic_list[16] rt0.set_flexible() rt1.set_hbond() write_SCAD( self.pdb_2XHE2[0]["A"], sf, 10.0, pdbid="2xhe", # maxPeptideBond=100.0, includeCode=False, start=10, fin=570, ) sf.seek(0) allBondsPass = False maxPeptideBondPass = False glyCbetaFound = False startPass = True finPass = True flexPass = False hbPass = False with as_handle(sf, mode="r") as handle: for aline in handle.readlines(): # test extra bond created in TRP (allBonds is True) if '"Cres", 0, 0, 1, 0, StdBond, "W", 24, "CD2CE3CZ3"' in aline: allBondsPass = True # test 509_K-561_E long bond created if "509_K" in aline and "561_E" in aline: maxPeptideBondPass = True if "(21_G_CB, 21_G_CA, 21_G_C)" in aline: glyCbetaFound = True target = [15.33630, 110.17513, 15.13861] ms = re.findall(r"\s+(-?\d+\.\d+)", aline) if ms: for i in range(0, 3): self.assertAlmostEqual(float(ms[i]), target[i], places=0) else: self.fail("Cbeta internal coords not found") if "8_K_CA" in aline: startPass = False if "572_N_CA" in aline: finPass = False if 'FemaleJoinBond, FemaleJoinBond, "N", 13, "NCAC"' in aline: flexPass = True if 'HBond, "R", 16, "CACO"' in aline: hbPass = True self.assertTrue(allBondsPass, msg="missing extra ring close bonds") self.assertTrue(glyCbetaFound, msg="gly CB not created") self.assertTrue(maxPeptideBondPass, msg="ignored maxPeptideBond setting") self.assertTrue(startPass, msg="writeSCAD wrote residue before start") self.assertTrue(finPass, msg="writeSCAD wrote residue past fin") self.assertTrue(flexPass, msg="writeSCAD residue 12 not flexible") self.assertTrue(hbPass, msg="writeSCAD residue 16 no hbond") def test_i2a_start_fin(self): """Test assemble start/fin, default NCaC coordinates, IC_duplicate.""" chn = self.pdb_1LCD[2]["A"] cpy = IC_duplicate(chn)[2]["A"] # generates internal coords as needed cpy.internal_to_atom_coordinates(start=31, fin=45) cdict = compare_residues(chn, cpy, quick=True) self.assertFalse(cdict["pass"]) # transform source coordinates to put res 31 tau at origin like # fragment res = chn[31] psi = res.internal_coord.pick_angle("psi") cst = np.transpose(psi.cst) chn.internal_coord.atomArray[:] = chn.internal_coord.atomArray.dot(cst) cdict = compare_residues(chn, cpy, rtol=1e-03, atol=1e-05) self.assertEqual(cdict["residues"], 51) self.assertEqual(cdict["rMatchCount"], 77) self.assertEqual(cdict["aCount"], 497) self.assertEqual(cdict["disAtmCount"], 0) self.assertEqual(cdict["aCoordMatchCount"], 140) self.assertEqual(cdict["aFullIdMatchCount"], 140) self.assertEqual(len(cdict["chains"]), 1) self.assertEqual(cdict["rCount"], 77) self.assertFalse(cdict["pass"]) def test_distplot_rebuild(self): """Build identical structure from distplot and chirality data.""" # load input chain for _chn1 in self.cif_4ZHL.get_chains(): break # create atomArray and compute distplot and dihedral signs array _chn1.atom_to_internal_coordinates() _c1ic = _chn1.internal_coord atmNameNdx = AtomKey.fields.atm CaSelect = [ _c1ic.atomArrayIndex.get(k) for k in _c1ic.atomArrayIndex.keys() if k.akl[atmNameNdx] == "CA" ] dplot0 = _chn1.internal_coord.distance_plot(filter=CaSelect) self.assertAlmostEqual( dplot0[3, 9], 16.296, places=3, msg="fail generate distance plot with filter", ) dplot1 = _chn1.internal_coord.distance_plot() dsigns = _chn1.internal_coord.dihedral_signs() # load second copy (same again) input chain for _chn2 in self.cif_4ZHL2.get_chains(): break # create internal coord structures but do not compute di/hedra cic2 = _chn2.internal_coord = IC_Chain(_chn2) cic2.init_edra() # load relevant interatomic distances from chn1 distance plot cic2.distplot_to_dh_arrays(dplot1) # compute di/hedra angles from dh_arrays cic2.distance_to_internal_coordinates(dsigns) # clear chn2 atom coordinates cic2.atomArrayValid[:] = False # initialize values but this is redundant to Valid=False above cic2.atomArray = np.zeros((cic2.AAsiz, 4), dtype=np.float64) cic2.atomArray[:, 3] = 1.0 # 4zhl has chain breaks so copy initial coords of each segment cic2.copy_initNCaCs(_chn1.internal_coord) # compute chn2 atom coords from di/hedra data cic2.internal_to_atom_coordinates() # generate distance plot from second chain, confirm minimal distance # from original dp2 = cic2.distance_plot() dpdiff = np.abs(dplot1 - dp2) # print(np.amax(dpdiff)) self.assertTrue(np.amax(dpdiff) < 0.000001) def test_seq_as_PIC(self): """Read protein sequence, generate default PIC data, test various.""" seqIter = SeqIO.parse("Fasta/f001", "fasta") for _record in seqIter: break pdb_structure = read_PIC_seq(_record) pdb_structure.internal_to_atom_coordinates() for _chn in pdb_structure.get_chains(): break cic = _chn.internal_coord self.assertEqual( len(cic.atomArrayValid), 575, msg="wrong number atoms from Fasta/f001" ) cic.update_dCoordSpace() rt = cic.ordered_aa_ic_list[10] # pick a residue chi1 = rt.pick_angle("chi1") # chi1 coord space puts CA at origin rt.applyMtx(chi1.cst) coord = rt.residue.child_dict["CA"].coord # Biopython API Atom coords self.assertTrue( np.allclose(coord, [0.0, 0.0, 0.0]), msg="dCoordSpace transform error" ) psi = rt.pick_angle("psi") self.assertEqual( psi.__repr__(), "4-11_M_N:11_M_CA:11_M_C:12_A_N MNMCAMCAN 179.0 ('gi|3318709|pdb|1A91|', 0, 'A', (' ', 11, ' '))", msg="dihedron __repr__ error for M11 psi", ) m = "Edron rich comparison failed" self.assertTrue(chi1 != psi, msg=m) self.assertFalse(chi1 == psi, msg=m) self.assertTrue(psi < chi1, msg=m) self.assertTrue(psi <= chi1, msg=m) self.assertTrue(chi1 > psi, msg=m) self.assertTrue(chi1 >= psi, msg=m) tau = rt.pick_angle("tau") self.assertEqual( tau.__repr__(), "3-11_M_N:11_M_CA:11_M_C MNMCAMC 1.46091 110.97184 1.52499", msg="hedron __repr__ error for M11 tau", ) # some specific AtomKey compsrisons missed in other tests a0, a1 = tau.aks[0], tau.aks[1] m = "AtomKey rich comparison failed" self.assertTrue(a1 > a0, msg=m) self.assertTrue(a1 >= a0, msg=m) self.assertTrue(a0 <= a1, msg=m) def test_angle_fns(self): """Test angle_dif and angle_avg across +/-180 boundaries.""" arr1 = np.array([179.0, 90.0, 88.0, 1.0]) arr2 = np.array([-179.0, -90.0, -91.0, -1.0]) assert ( Dihedron.angle_dif(arr1, arr2) == np.array([2.0, 180.0, -179.0, -2.0]) ).all() assert Dihedron.angle_avg(np.array([179.0, -179.0])) == 180.0 assert Dihedron.angle_avg(np.array([1.0, -1.0])) == 0.0 assert Dihedron.angle_avg(np.array([90.0, -90.0])) == 0.0 assert Dihedron.angle_avg(np.array([91.0, -91.0])) == 180.0
# Benchmark the parsing of a MMTF file given as an argument import sys import time from Bio.PDB.mmtf import MMTFParser mmtf_filepath = sys.argv[1] start = time.time() MMTFParser.get_structure(mmtf_filepath) end = time.time() print(end - start)