def _fix(self, atoms): try: from pdbfixer import PDBFixer from openmm.app import PDBFile except ImportError: raise ImportError('Please install PDBFixer and OpenMM 7.6 in order to use ClustENM.') stream = createStringIO() title = atoms.getTitle() writePDBStream(stream, atoms) stream.seek(0) fixed = PDBFixer(pdbfile=stream) stream.close() fixed.missingResidues = {} fixed.findNonstandardResidues() fixed.replaceNonstandardResidues() fixed.removeHeterogens(False) fixed.findMissingAtoms() fixed.addMissingAtoms() fixed.addMissingHydrogens(self._ph) stream = createStringIO() PDBFile.writeFile(fixed.topology, fixed.positions, stream, keepIds=True) stream.seek(0) self._atoms = parsePDBStream(stream) self._atoms.setTitle(title) stream.close() self._topology = fixed.topology self._positions = fixed.positions
def to_pdb(item, atom_indices='all', structure_indices='all', topology_item=None, trajectory_item=None, coordinates_item=None, box_item=None, output_filename=None): from io import StringIO from openmm.app import PDBFile #from openmm.version import short_version from molsysmt import __version__ as msm_version from openmm import Platform # the openmm version is taken from this module (see: openmm/app/pdbfile.py) tmp_io = StringIO() positions = get_coordinates_from_system(item)[0] PDBFile.writeFile(item.topology, positions, tmp_io, keepIds=True) openmm_version = Platform.getOpenMMVersion() filedata = filedata.replace( 'WITH OPENMM ' + openmm_version, 'WITH OPENMM ' + openmm_version + ' BY MOLSYSMT ' + msm_version) tmp_io.close() del (tmp_io) if output_filename == '.pdb': return filedata else: with open(output_filename, 'w') as file: file.write(filedata) pass
def to_file_pdb(item, molecular_system=None, atom_indices='all', structure_indices='all', output_filename=None): from io import StringIO from openmm.app import PDBFile from molsysmt import __version__ as msm_version from openmm import Platform # the openmm version is taken from this module (see: openmm/app/pdbfile.py) tmp_io = StringIO() PDBFile.writeFile(item.topology, item.positions, tmp_io, keepIds=True) filedata = tmp_io.getvalue() openmm_version = Platform.getOpenMMVersion() filedata = filedata.replace( 'WITH OPENMM ' + openmm_version, 'WITH OPENMM ' + openmm_version + ' BY MOLSYSMT ' + msm_version) tmp_io.close() del (tmp_io) with open(output_filename, 'w') as file: file.write(filedata) tmp_item = output_filename if molecular_system is not None: tmp_molecular_system = molecular_system.combine_with_items( tmp_item, atom_indices=atom_indices, structure_indices=structure_indices) else: tmp_molecular_system = None return tmp_item, tmp_molecular_system
def addHydrogens(): if 'addHydrogens' in request.form: pH = float(request.form.get('ph', '7')) fixer.addMissingHydrogens(pH) if 'addWater' in request.form: padding, boxSize, boxShape = None, None, None if request.form['boxType'] == 'geometry': padding = float(request.form['geomPadding']) * unit.nanometer boxShape = request.form['geometryDropdown'] else: boxSize = (float(request.form['boxx']), float( request.form['boxy']), float( request.form['boxz'])) * unit.nanometer ionicStrength = float(request.form['ionicstrength']) * unit.molar positiveIon = request.form['positiveion'] + '+' negativeIon = request.form['negativeion'] + '-' fixer.addSolvent(boxSize=boxSize, padding=padding, boxShape=boxShape, positiveIon=positiveIon, negativeIon=negativeIon, ionicStrength=ionicStrength) elif 'addMembrane' in request.form: lipidType = request.form['lipidType'] padding = float(request.form['membranePadding']) * unit.nanometer ionicStrength = float(request.form['ionicstrength']) * unit.molar positiveIon = request.form['positiveion'] + '+' negativeIon = request.form['negativeion'] + '-' fixer.addMembrane(lipidType=lipidType, minimumPadding=padding, positiveIon=positiveIon, negativeIon=negativeIon, ionicStrength=ionicStrength) # Save the new PDB file. uploadedFiles['originalFile'] = uploadedFiles['file'] pdb = StringIO() if session['pdbType'] == 'pdb': try: PDBFile.writeFile(fixer.topology, fixer.positions, pdb, True) except: # This can happen if the ids are too large to fit in the allowed space. pdb = StringIO() PDBFile.writeFile(fixer.topology, fixer.positions, pdb, False) else: PDBxFile.writeFile(fixer.topology, fixer.positions, pdb, True) temp = tempfile.TemporaryFile() temp.write(pdb.getvalue().encode('utf-8')) name = uploadedFiles['file'][0][1] dotIndex = name.rfind('.') if dotIndex == -1: prefix = name suffix = '' else: prefix = name[:dotIndex] suffix = name[dotIndex:] uploadedFiles['file'] = [(temp, prefix + '-processed' + suffix)] return showSimulationOptions()
def writePDBFixed(self): 'Write the fixed (initial) structure to a pdb file.' try: from openmm.app import PDBFile except ImportError: raise ImportError('Please install PDBFixer and OpenMM 7.6 in order to use ClustENM.') PDBFile.writeFile(self._topology, self._positions, open(self.getTitle()[:-8] + 'fixed.pdb', 'w'), keepIds=True)
def to_openmm_PDBFile(item, atom_indices='all', coordinates=None, check=True): if check: try: is_openmm_Topology(item) except: raise WrongFormError('openmm.Topology') try: atom_indices = digest_atom_indices(atom_indices) except: raise WrongAtomIndicesError() try: coordinates = digest_coordinates(coordinates) except: raise WrongCoordinatesError() from . import to_string_pdb_text from io import StringIO from openmm.app import PDBFile string_pdb_text = to_string_pdb_text(item, atom_indices=atom_indices, coordinates=coordinates, check=False) tmp_io = StringIO() tmp_io.read(string_pdb_text) tmp_item = PDBFile.readFile(tmp_io) return tmp_item
def report(self, simulation, state): """Generate a report. Parameters ---------- simulation : Simulation The Simulation to generate a report for state : State The current state of the simulation """ if self._nextModel == 0: PDBFile.writeHeader(simulation.topology, self._out) self._topology = simulation.topology self._nextModel += 1 PDBFile.writeModel(simulation.topology, state.getPositions(), self._out, self._nextModel) self._nextModel += 1 if hasattr(self._out, 'flush') and callable(self._out.flush): self._out.flush()
def to_string_pdb_text(item, atom_indices='all', coordinates=None, box=None, check=True): if check: digest_item(item, 'openmm.Topology') atom_indices = digest_atom_indices(atom_indices) coordinates = digest_coordinates(coordinates) box = digest_box(box) from io import StringIO from openmm.app import PDBFile from molsysmt import __version__ as msm_version from openmm import Platform # the openmm version is taken from this module (see: openmm/app/pdbfile.py) from molsysmt import puw n_structures = coordinates.shape[0] if n_structures > 1: import warnings warnings.warn( "Openmm.Topology/to_string_pdb_text got more than a single structure. Only the 0-th is taken." ) tmp_io = StringIO() coordinates = puw.convert(coordinates[0], 'nm', to_form='openmm.unit') PDBFile.writeFile(item, coordinates, tmp_io, keepIds=True) filedata = tmp_io.getvalue() openmm_version = Platform.getOpenMMVersion() filedata = filedata.replace( 'WITH OPENMM ' + openmm_version, 'WITH OPENMM ' + openmm_version + ' BY MOLSYSMT ' + msm_version) tmp_io.close() del (tmp_io) tmp_item = filedata return tmp_item
def test_loaded_topology(self): """ Test load_rosetta against OpenMM topology""" init() pose = pose_from_sequence(3 * 'A') struct = load_rosetta(pose) pdb = PDBFile(get_fn('ala_ala_ala.pdb')) self.assertEqual(len(list(struct.topology.atoms())), len(list(pdb.topology.atoms()))) self.assertEqual(len(list(struct.topology.bonds())), len(list(pdb.topology.bonds()))) self.assertEqual(len(list(struct.topology.residues())), len(list(pdb.topology.residues())))
def __init__(self, file): """Load a prmtop file.""" ## The Topology read from the prmtop file self.topology = top = Topology() self.elements = [] # Load the prmtop file prmtop = amber_file_parser.PrmtopLoader(file) self._prmtop = prmtop # Add atoms to the topology PDBFile._loadNameReplacementTables() lastResidue = None c = top.addChain() for index in range(prmtop.getNumAtoms()): resNumber = prmtop.getResidueNumber(index) if resNumber != lastResidue: lastResidue = resNumber resName = prmtop.getResidueLabel(iAtom=index).strip() if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] r = top.addResidue(resName, c) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[resName] else: atomReplacements = {} atomName = prmtop.getAtomName(index).strip() if atomName in atomReplacements: atomName = atomReplacements[atomName] # Get the element from the prmtop file if available if prmtop.has_atomic_number: try: element = elem.Element.getByAtomicNumber( int(prmtop._raw_data['ATOMIC_NUMBER'][index])) except KeyError: element = None else: # Try to guess the element from the atom name. upper = atomName.upper() if upper.startswith('CL'): element = elem.chlorine elif upper.startswith('NA'): element = elem.sodium elif upper.startswith('MG'): element = elem.magnesium elif upper.startswith('ZN'): element = elem.zinc else: try: element = elem.get_by_symbol(atomName[0]) except KeyError: element = None top.addAtom(atomName, element, r) self.elements.append(element) # Add bonds to the topology atoms = list(top.atoms()) for bond in prmtop.getBondsWithH(): top.addBond(atoms[bond[0]], atoms[bond[1]]) for bond in prmtop.getBondsNoH(): top.addBond(atoms[bond[0]], atoms[bond[1]]) # Set the periodic box size. if prmtop.getIfBox(): box = prmtop.getBoxBetaAndDimensions() top.setPeriodicBoxVectors( computePeriodicBoxVectors(*(box[1:4] + box[0:1] * 3)))
def __init__(self, file): """Load a PDBx/mmCIF file. The atom positions and Topology can be retrieved by calling getPositions() and getTopology(). Parameters ---------- file : string the name of the file to load. Alternatively you can pass an open file object. """ top = Topology() ## The Topology read from the PDBx/mmCIF file self.topology = top self._positions = [] PDBFile._loadNameReplacementTables() # Load the file. inputFile = file ownHandle = False if isinstance(file, str): inputFile = open(file) ownHandle = True reader = PdbxReader(inputFile) data = [] reader.read(data) if ownHandle: inputFile.close() block = data[0] # Build the topology. atomData = block.getObj('atom_site') atomNameCol = atomData.getAttributeIndex('auth_atom_id') if atomNameCol == -1: atomNameCol = atomData.getAttributeIndex('label_atom_id') atomIdCol = atomData.getAttributeIndex('id') resNameCol = atomData.getAttributeIndex('auth_comp_id') if resNameCol == -1: resNameCol = atomData.getAttributeIndex('label_comp_id') resNumCol = atomData.getAttributeIndex('auth_seq_id') if resNumCol == -1: resNumCol = atomData.getAttributeIndex('label_seq_id') resInsertionCol = atomData.getAttributeIndex('pdbx_PDB_ins_code') chainIdCol = atomData.getAttributeIndex('auth_asym_id') if chainIdCol == -1: chainIdCol = atomData.getAttributeIndex('label_asym_id') altChainIdCol = -1 else: altChainIdCol = atomData.getAttributeIndex('label_asym_id') if altChainIdCol != -1: # Figure out which column is best to use for chain IDs. idSet = set(row[chainIdCol] for row in atomData.getRowList()) altIdSet = set(row[altChainIdCol] for row in atomData.getRowList()) if len(altIdSet) > len(idSet): chainIdCol, altChainIdCol = (altChainIdCol, chainIdCol) elementCol = atomData.getAttributeIndex('type_symbol') altIdCol = atomData.getAttributeIndex('label_alt_id') modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num') xCol = atomData.getAttributeIndex('Cartn_x') yCol = atomData.getAttributeIndex('Cartn_y') zCol = atomData.getAttributeIndex('Cartn_z') lastChainId = None lastAltChainId = None lastResId = None lastInsertionCode = '' atomTable = {} atomsInResidue = set() models = [] for row in atomData.getRowList(): atomKey = ((row[resNumCol], row[chainIdCol], row[atomNameCol])) model = ('1' if modelCol == -1 else row[modelCol]) if model not in models: models.append(model) self._positions.append([]) modelIndex = models.index(model) if row[altIdCol] != '.' and atomKey in atomTable and len( self._positions[modelIndex]) > atomTable[atomKey].index: # This row is an alternate position for an existing atom, so ignore it. continue if modelIndex == 0: # This row defines a new atom. if resInsertionCol == -1: insertionCode = '' else: insertionCode = row[resInsertionCol] if insertionCode in ('.', '?'): insertionCode = '' if lastChainId != row[chainIdCol] or ( altChainIdCol != -1 and lastAltChainId != row[altChainIdCol]): # The start of a new chain. chain = top.addChain(row[chainIdCol]) lastChainId = row[chainIdCol] lastResId = None if altChainIdCol != -1: lastAltChainId = row[altChainIdCol] if lastResId != row[resNumCol] or lastChainId != row[ chainIdCol] or lastInsertionCode != insertionCode or ( lastResId == '.' and row[atomNameCol] in atomsInResidue): # The start of a new residue. resId = (None if resNumCol == -1 else row[resNumCol]) resIC = insertionCode resName = row[resNameCol] if resName in PDBFile._residueNameReplacements: resName = PDBFile._residueNameReplacements[resName] res = top.addResidue(resName, chain, resId, resIC) if resName in PDBFile._atomNameReplacements: atomReplacements = PDBFile._atomNameReplacements[ resName] else: atomReplacements = {} lastResId = row[resNumCol] lastInsertionCode = insertionCode atomsInResidue.clear() element = None try: element = elem.get_by_symbol(row[elementCol]) except KeyError: pass atomName = row[atomNameCol] if atomName in atomReplacements: atomName = atomReplacements[atomName] atom = top.addAtom(atomName, element, res, row[atomIdCol]) atomTable[atomKey] = atom atomsInResidue.add(atomName) else: # This row defines coordinates for an existing atom in one of the later models. try: atom = atomTable[atomKey] except KeyError: raise ValueError( 'Unknown atom %s in residue %s %s for model %s' % (row[atomNameCol], row[resNameCol], row[resNumCol], model)) if atom.index != len(self._positions[modelIndex]): raise ValueError( 'Atom %s for model %s does not match the order of atoms for model %s' % (row[atomIdCol], model, models[0])) self._positions[modelIndex].append( Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol])) * 0.1) for i in range(len(self._positions)): self._positions[i] = self._positions[i] * nanometers ## The atom positions read from the PDBx/mmCIF file. If the file contains multiple frames, these are the positions in the first frame. self.positions = self._positions[0] self.topology.createStandardBonds() self._numpyPositions = None # Record unit cell information, if present. cell = block.getObj('cell') if cell is not None and cell.getRowCount() > 0: row = cell.getRow(0) (a, b, c) = [ float(row[cell.getAttributeIndex(attribute)]) * 0.1 for attribute in ('length_a', 'length_b', 'length_c') ] (alpha, beta, gamma) = [ float(row[cell.getAttributeIndex(attribute)]) * math.pi / 180.0 for attribute in ('angle_alpha', 'angle_beta', 'angle_gamma') ] self.topology.setPeriodicBoxVectors( computePeriodicBoxVectors(a, b, c, alpha, beta, gamma)) # Add bonds based on struct_conn records. connectData = block.getObj('struct_conn') if connectData is not None: res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id') res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id') atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id') atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id') asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id') asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id') typeCol = connectData.getAttributeIndex('conn_type_id') connectBonds = [] for row in connectData.getRowList(): type = row[typeCol][:6] if type in ('covale', 'disulf', 'modres'): key1 = (row[res1Col], row[asym1Col], row[atom1Col]) key2 = (row[res2Col], row[asym2Col], row[atom2Col]) if key1 in atomTable and key2 in atomTable: connectBonds.append((atomTable[key1], atomTable[key2])) if len(connectBonds) > 0: # Only add bonds that don't already exist. existingBonds = set(top.bonds()) for bond in connectBonds: if bond not in existingBonds and ( bond[1], bond[0]) not in existingBonds: top.addBond(bond[0], bond[1]) existingBonds.add(bond)
def cleanProtein(structure, mutator=None, regexes=None, hydrogens=True, run_pdb2pqr=True, quiet=False, remove_numerical_chain_id=False, method="geobind", **kwargs): """ Perform any operations needed to modify the structure or sequence of a protein chain. """ prefix = structure.name # used for file names if remove_numerical_chain_id: # APBS and TABI-PB does not process numerical chain IDs correctly. This is a work-around available_ids = list( "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") # find current chain ids taken_ids = set() for chain in structure.get_chains(): cid = chain.get_id() taken_ids.add(cid) # iterate over chains and update chain_map = {} for chain in structure.get_chains(): cid = chain.get_id() if cid.isnumeric(): # we want to replace this chain id while len(available_ids) > 0: new_id = available_ids.pop() if new_id in taken_ids: continue else: break chain_map[cid] = new_id chain.id = new_id else: chain_map[cid] = cid if method == "geobind": # set up needed objects if regexes is None: regexes = data.regexes if mutator is None: mutator = ResidueMutator(data.tripeptides, data.chem_components) # remove non-standard residues for chain in structure.get_chains(): replace = [] remove = [] for residue in chain: resn = residue.get_resname().strip() resid = residue.get_id() if resn in data.chem_components and heavyAtomCount(residue) / ( data.chem_components[resn]['heavy_atom_count'] - 1) < 0.6: # too many missing atoms - replace residue replace.append(resid) elif mutator.standard(resn): if resid[0] == ' ': continue else: remove.append( (resid, "removed HETATM standard residue: %s")) elif resn == 'HOH' or resn == 'WAT': remove.append((resid, None)) elif regexes["SOLVENT_COMPONENTS"].search(resn): continue elif mutator.modified(resn): replace.append(resid) else: remove.append((resid, "removed unrecognized residue: %s")) for rid, reason in remove: if reason is not None and not quiet: logging.info(reason, chain[rid].get_resname()) chain.detach_child(rid) for rid in replace: replacement = mutator.mutate(chain[rid]) if replacement: if not quiet: logging.info("replacing residue %s with %s", chain[rid].get_resname(), replacement.get_resname()) replacement.id = rid idx = chain.child_list.index(chain[rid]) chain.child_list[idx] = replacement else: if not quiet: logging.info( "could not perform replacement on %s, removing", chain[rid].get_resname()) chain.detach_child(rid) elif method == "pdbfixer": try: from pdbfixer import PDBFixer from openmm.app import PDBFile except ModuleNotFoundError: raise ModuleNotFoundError( "The dependencies 'pdbfixer' and 'openmm' are required with option 'method=\"pdbfixer\"'" ) # create a temp file tmpFile1 = tempFileName(prefix, 'pdb') structure.save(tmpFile1) # run pdbfixer fixer = PDBFixer(filename=tmpFile1) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(False) fixer.findMissingAtoms() fixer.addMissingAtoms() tmpFile2 = tempFileName(prefix, 'pdb') PDBFile.writeFile(fixer.topology, fixer.positions, open(tmpFile2, 'w'), keepIds=True) # load new fixed structure structure = StructureData(tmpFile2, name=prefix) # clean up os.remove(tmpFile1) os.remove(tmpFile2) # run PDB2PQR if requested if run_pdb2pqr: structure, pqrFile = runPDB2PQR(structure, **kwargs) # remove hydrogens if requested if not hydrogens: stripHydrogens(structure) # decide what to return rargs = [structure] if run_pdb2pqr: rargs.append(pqrFile) if remove_numerical_chain_id: rargs.append(chain_map) return tuple(rargs)
def getCurrentStructure(): pdb = StringIO() PDBFile.writeFile(fixer.topology, fixer.positions, pdb) return pdb.getvalue()
def reparm(ligands, base): print( '**Running reparameterization of ligand(s) using open force fields\'s SMIRNOFF with openff 2.0.0**' ) # Load already parm'd system in_prmtop = base + '.prmtop' in_crd = base + '.inpcrd' # Create parmed strucuture orig_structure = parmed.amber.AmberParm(in_prmtop, in_crd) # Split orig_stucuture into unique structure instances e.g. protein, water, ligand, etc. pieces = orig_structure.split() for piece in pieces: # TODO: Figure out how to know which piece is which print(f"There are {len(piece[1])} instance(s) of {piece[0]}") # Generate an openff topology for the ligand # Openff Molecule does not support mol2 so conversion is needed ligs_w_sdf = [] for ligand in ligands: obabel[ligand[0], '-O', util.get_base(ligand[0]) + '.sdf']() ligs_w_sdf.append( (ligand[0], ligand[1], util.get_base(ligand[0]) + '.sdf')) # Keep track of ligands that were successfully reparmed so we know to skip them when putting the pieces back together reparmed_pieces = [] complex_structure = parmed.Structure() force_field = ForceField("openff_unconstrained-2.0.0.offxml") for lig in ligs_w_sdf: # Set up openff topology ligand_off_molecule = Molecule(lig[2]) ligand_pdbfile = PDBFile(lig[0]) ligand_off_topology = Topology.from_openmm( ligand_pdbfile.topology, unique_molecules=[ligand_off_molecule], ) # Parameterizing the ligand # Find ligand "piece", reparm, add to the new structure for piece in pieces: new_ligand_structure = None # TODO: Figure out how to know which piece is which if (ligand_off_molecule.n_atoms == len(piece[0].atoms)): if (ligand_off_molecule.n_bonds == len(piece[0].bonds)): if ([ atom.atomic_number for atom in ligand_off_molecule.atoms ] == [atom.element for atom in piece[0].atoms]): print('Found ligand piece', piece) try: # Since the method of matching the piece to ligand is imperfect, ligands that are isomers could mess things up. # So try any piece that matches and see if we get an error print('Reparameterizing ligand using SMIRNOFF') ligand_system = force_field.create_openmm_system( ligand_off_topology) new_ligand_structure = parmed.openmm.load_topology( ligand_off_topology.to_openmm(), ligand_system, xyz=piece[0].positions, ) # A quick check to make sure things were not messed up during param if check_discrepencies(new_ligand_structure, piece): # Add the newly parameterized ligand the complex structure reparmed_pieces.append(piece) new_ligand_structure *= len(piece[1]) complex_structure += parmed.amber.AmberParm.from_structure( new_ligand_structure) break except: pass # Stick all the pieces back together for piece in pieces: if (piece not in reparmed_pieces): curr_structure = parmed.Structure() curr_structure += piece[0] curr_structure *= len(piece[1]) complex_structure += parmed.amber.AmberParm.from_structure( curr_structure) # print("Unique atom names:",sorted(list({atom.atom_type.name for atom in complex_structure})),) # print("Number of unique atom types:", len({atom.atom_type for atom in complex_structure})) # print("Number of unique epsilons:", len({atom.epsilon for atom in complex_structure})) # print("Number of unique sigmas:", len({atom.sigma for atom in complex_structure})) # # Copy over the original coordinates and box vectors complex_structure.coordinates = orig_structure.coordinates complex_structure.box_vectors = orig_structure.box_vectors # Save the newly parameterized system complex_structure.save(base + ".prmtop", overwrite=True) complex_structure.save(base + ".inpcrd", overwrite=True)
def __del__(self): if self._topology is not None: PDBFile.writeFooter(self._topology, self._out) self._out.close()