def iupac_to_oemol(iupac_name): """Create a OEMolBuilder from a iupac name. Parameters ---------- iupac_name : str IUPAC name of desired molecule. Returns ------- molecule : OEMol A normalized molecule with desired iupac name. """ oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for OEChem!")) oeiupac = import_("openeye.oeiupac") if not oeiupac.OEIUPACIsLicensed(): raise (ImportError("Need License for OEIupac!")) # Create an OEMol molecule from IUPAC name. molecule = oechem.OEMol() # create a molecule # Populate the MoleCule from the IUPAC name if not oeiupac.OEParseIUPACName(molecule, iupac_name): raise ValueError("The supplied IUPAC name '%s' could not be parsed." % iupac_name) molecule = normalize_molecule(molecule) return molecule
def iupac_to_oemol(iupac_name): """Create a OEMolBuilder from a iupac name. Parameters ---------- iupac_name : str IUPAC name of desired molecule. Returns ------- molecule : OEMol A normalized molecule with desired iupac name. """ oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for OEChem!")) oeiupac = import_("openeye.oeiupac") if not oeiupac.OEIUPACIsLicensed(): raise (ImportError("Need License for OEIupac!")) # Create an OEMol molecule from IUPAC name. molecule = oechem.OEMol() # create a molecule # Populate the MoleCule from the IUPAC name if not oeiupac.OEParseIUPACName(molecule, iupac_name): raise ValueError( "The supplied IUPAC name '%s' could not be parsed." % iupac_name ) molecule = normalize_molecule(molecule) return molecule
def assignELF10charges(molecule, max_confs: int = -1, strictStereo=True): """ This function computes atomic partial charges for an OEMol by using the ELF10 method Parameters: ----------- molecule : OEMol object The molecule that needs to be charged max_confs : integer The max number of conformers used to calculate the atomic partial charges. Select -1 to use dense conformers. strictStereo : bool a flag used to check if atoms need to have assigned stereo chemistry or not Return: ------- mol_copy : OEMol a copy of the original molecule with assigned atomic partial charges """ oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for OEChem!")) oequacpac = import_("openeye.oequacpac") if not oequacpac.OEQuacPacIsLicensed(): raise (ImportError("Need License for oequacpac!")) oeomega = import_("openeye.oeomega") mol_copy = molecule.CreateCopy() if max_confs < 0: omegaOpts = oeomega.OEOmegaOptions(oeomega.OEOmegaSampling_Dense) omega = oeomega.OEOmega(omegaOpts) omega.SetStrictStereo(strictStereo) if not omega(mol_copy): raise Exception("Omega failed.") else: if not mol_copy.GetMaxConfIdx() > max_confs: # Generate up to max_confs conformers mol_copy = generate_conformers(mol_copy, max_confs=max_confs, strictStereo=strictStereo) # Assign MMFF Atom types if not oechem.OEMMFFAtomTypes(mol_copy): raise RuntimeError("MMFF atom type assignment returned errors") # ELF10 charges status = oequacpac.OEAssignCharges(mol_copy, oequacpac.OEAM1BCCELF10Charges()) if not status: raise RuntimeError("OEAssignCharges returned error code %d" % status) return mol_copy
def assignELF10charges(molecule, max_confs: int = -1, strictStereo=True): """ This function computes atomic partial charges for an OEMol by using the ELF10 method Parameters: ----------- molecule : OEMol object The molecule that needs to be charged max_confs : integer The max number of conformers used to calculate the atomic partial charges. Select -1 to use dense conformers. strictStereo : bool a flag used to check if atoms need to have assigned stereo chemistry or not Return: ------- mol_copy : OEMol a copy of the original molecule with assigned atomic partial charges """ oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for OEChem!")) oequacpac = import_("openeye.oequacpac") if not oequacpac.OEQuacPacIsLicensed(): raise (ImportError("Need License for oequacpac!")) oeomega = import_("openeye.oeomega") mol_copy = molecule.CreateCopy() if max_confs < 0: omegaOpts = oeomega.OEOmegaOptions(oeomega.OEOmegaSampling_Dense) omega = oeomega.OEOmega(omegaOpts) omega.SetStrictStereo(strictStereo) if not omega(mol_copy): raise Exception("Omega failed.") else: if not mol_copy.GetMaxConfIdx() > max_confs: # Generate up to max_confs conformers mol_copy = generate_conformers( mol_copy, max_confs=max_confs, strictStereo=strictStereo ) # Assign MMFF Atom types if not oechem.OEMMFFAtomTypes(mol_copy): raise RuntimeError("MMFF atom type assignment returned errors") # ELF10 charges status = oequacpac.OEAssignCharges(mol_copy, oequacpac.OEAM1BCCELF10Charges()) if not status: raise RuntimeError("OEAssignCharges returned error code %d" % status) return mol_copy
def generate_conformers(molecule, max_confs=800, strictStereo=True, ewindow=15.0, rms_threshold=1.0, strictTypes = True): """Generate conformations for the supplied molecule Parameters ---------- molecule : OEMol Molecule for which to generate conformers max_confs : int, optional, default=800 Max number of conformers to generate. If None, use default OE Value. strictStereo : bool, optional, default=True If False, permits smiles strings with unspecified stereochemistry. strictTypes : bool, optional, default=True If True, requires that Omega have exact MMFF types for atoms in molecule; otherwise, allows the closest atom type of the same element to be used. Returns ------- molcopy : OEMol A multi-conformer molecule with up to max_confs conformers. Notes ----- Roughly follows http://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html """ # os.environ["OE_LICENSE"] = "/data/openeye/oe_license.txt" oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise(ImportError("Need License for OEChem!")) oeomega = import_("openeye.oeomega") if not oeomega.OEOmegaIsLicensed(): raise(ImportError("Need License for OEOmega!")) molcopy = oechem.OEMol(molecule) omega = oeomega.OEOmega() # These parameters were chosen to match http://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html omega.SetMaxConfs(max_confs) omega.SetIncludeInput(True) omega.SetCanonOrder(False) omega.SetSampleHydrogens(True) # Word to the wise: skipping this step can lead to significantly different charges! omega.SetEnergyWindow(ewindow) omega.SetRMSThreshold(rms_threshold) # Word to the wise: skipping this step can lead to significantly different charges! omega.SetStrictStereo(strictStereo) omega.SetStrictAtomTypes(strictTypes) omega.SetIncludeInput(False) # don't include input if max_confs is not None: omega.SetMaxConfs(max_confs) status = omega(molcopy) # generate conformation if not status: raise(RuntimeError("omega returned error code %d" % status)) return molcopy
def test_molecule(molecule_name, tripos_mol2_filename, charge_method="bcc"): """Create a GAFF molecule via LEAP and ffXML and compare force terms. Parameters ---------- molecule_name : str Name of the molecule tripos_mol2_filename : str Filename of input mol2 file charge_method : str, default="bcc" If None, use charges in existing MOL2. Otherwise, use a charge model when running antechamber. """ # Generate GAFF parameters. amber = import_("openmoltools.amber") (gaff_mol2_filename, frcmod_filename) = amber.run_antechamber(molecule_name, tripos_mol2_filename, charge_method=charge_method) # Create simulations. simulation_ffxml = create_ffxml_simulation(molecule_name, gaff_mol2_filename, frcmod_filename) simulation_leap = create_leap_simulation(molecule_name, gaff_mol2_filename, frcmod_filename) # Compare simulations. syscheck = system_checker.SystemChecker(simulation_ffxml, simulation_leap) syscheck.check_force_parameters() groups0, groups1 = syscheck.check_energy_groups() energy0, energy1 = syscheck.check_energies()
def get_names_to_charges(molecule): """Return a dictionary of atom names and partial charges, as well as a string representation. Parameters ---------- molecule : OEMol Molecule for which to grab charges Returns ------- data : dictionary A dictinoary whose (key, val) pairs are the atom names and partial charges, respectively. molrepr : str A string representation of data """ oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for oechem!")) molcopy = oechem.OEMol(molecule) molrepr = "" data = {} for atom in molcopy.GetAtoms(): name = atom.GetName() charge = atom.GetPartialCharge() data[name] = charge molrepr += "%s %f \n" % (name, charge) return data, molrepr
def smiles_to_oemol(smiles,title='MOL'): """Create a OEMolBuilder from a smiles string. Parameters ---------- smiles : str SMILES representation of desired molecule. Returns ------- molecule : OEMol A normalized molecule with desired smiles string. """ # os.environ["OE_LICENSE"] = "/data/openeye/oe_license.txt" oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise(ImportError("Need License for OEChem!")) molecule = oechem.OEMol() if not oechem.OEParseSmiles(molecule, smiles): raise ValueError("The supplied SMILES '%s' could not be parsed." % smiles) molecule = normalize_molecule(molecule) # Set title. molecule.SetTitle(title) return molecule
def oe_mol2_to_mol2_block(mol2_block): oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise(ImportError("Need License for OEChem!")) ifs = oechem.oemolistream() ifs.SetFormat(oechem.OEFormat_MOL2) oms = oechem.oemolostream() oms.SetFormat(oechem.OEFormat_SDF) oms.openstring() mol = oechem.OEGraphMol() while oechem.OEReadMolecule(ifs, mol): oechem.OEWriteMolecule(ofs, mol) mols = [] mol = oechem.OEMol() if ifs.open(out_sdf_path): for mol in ifs.GetOEGraphMols(): mols.append(oechem.OEMol(mol)) else: oechem.OEThrow.Fatal(f"Unable to open {out_sdf_path}") # print(type(mols[0])) molecule = mols[0] molecule = normalize_molecule(molecule) oechem.OEWriteMolecule(oms, mol) molfile = oms.GetString() print("MOL string\n", molfile.decode('UTF-8')) return molecule
def mol2_string_IO_san(mol2_block_string): oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise(ImportError("Need License for OEChem!")) mol2_block = mol2_block_string.encode('UTF-8') ims = oechem.oemolistream() ims.SetFormat(oechem.OEFormat_MOL2) ims.openstring(mol2_block) mols = [] mol = oechem.OEMol() for mol in ims.GetOEMols(): mols.append(oechem.OEMol(mol)) oms = oechem.oemolostream() oms.SetFormat(oechem.OEFormat_PDB) oms.openstring() for mol in mols: mol2 = normalize_molecule(mol) oechem.OEWriteMolecule(oms, mol2) molfile = oms.GetString() # print("MOL string\n", molfile.decode('UTF-8')) return molfile
def show_docked(df): oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise(ImportError("Need License for OEChem!")) # def show_docked(df): mol2_blocks_docked = list(df['mol2_blocks_docked']) smiles_template = list(df['smiles']) names = list(df['names']) v = PyMol.MolViewer() v.DeleteAll() for count,molblock in enumerate(mol2_blocks_docked): molout = mol2_string_IO_san(molblock) mol = Chem.MolFromPDBBlock(molout) template = Chem.MolFromSmiles(smiles_template[count]) new_mol = AllChem.AssignBondOrdersFromTemplate(template, mol) # mol2_blocks_template = Chem.MolFromMol2Block(molblock) print(type(new_mol)) molid = names[count] print(molid) mol.SetProp('_Name', molid) probe = Chem.Mol(new_mol.ToBinary()) v.ShowMol(probe, name=molid, showOnly=False)
def smiles_to_oemol(smiles): """Create a OEMolBuilder from a smiles string. Parameters ---------- smiles : str SMILES representation of desired molecule. Returns ------- molecule : OEMol A normalized molecule with desired smiles string. """ oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for OEChem!")) molecule = oechem.OEMol() if not oechem.OEParseSmiles(molecule, smiles): raise ValueError("The supplied SMILES '%s' could not be parsed." % smiles) molecule = normalize_molecule(molecule) return molecule
def oe_sdf_to_molecule(out_sdf_path): oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for OEChem!")) ifs = oechem.oemolistream() ifs.SetFormat(oechem.OEFormat_SDF) oms = oechem.oemolostream() oms.SetFormat(oechem.OEFormat_SDF) oms.openstring() mols = [] mol = oechem.OEMol() if ifs.open(out_sdf_path): for mol in ifs.GetOEGraphMols(): mols.append(oechem.OEMol(mol)) else: oechem.OEThrow.Fatal(f"Unable to open {out_sdf_path}") # print(type(mols[0])) try: molecule = mols[0] for mol in mols: oechem.OEWriteMolecule(oms, mol) molfile = oms.GetString() print("MOL string\n", molfile.decode('UTF-8')) return molecule except IndexError: print('index error, failed to generate conformers') return None
def smiles_to_mdtraj_ffxml(smiles_strings, base_molecule_name="lig"): """Generate an MDTraj object from a smiles string. Parameters ---------- smiles_strings : list(str) Smiles strings to create molecules for base_molecule_name : str, optional, default='lig' Base name of molecule to use inside parameter files. Returns ------- traj : mdtraj.Trajectory MDTraj object for molecule ffxml : StringIO StringIO representation of ffxml file. Notes ----- ffxml can be directly input to OpenMM e.g. `forcefield = app.ForceField(ffxml)` """ try: from rdkit import Chem from rdkit.Chem import AllChem except ImportError: raise (ImportError("Must install rdkit to use smiles conversion.")) gaff_mol2_filenames = [] frcmod_filenames = [] trajectories = [] for k, smiles_string in enumerate(smiles_strings): molecule_name = "%s-%d" % (base_molecule_name, k) m = Chem.MolFromSmiles(smiles_string) m = Chem.AddHs(m) AllChem.EmbedMolecule(m) AllChem.UFFOptimizeMolecule(m) mdl_filename = tempfile.mktemp(suffix=".mdl") Chem.MolToMolFile(m, mdl_filename) amber = import_("openmoltools.amber") gaff_mol2_filename, frcmod_filename = amber.run_antechamber( molecule_name, mdl_filename, input_format='mdl') traj = md.load(gaff_mol2_filename) print(gaff_mol2_filename) print(traj) for atom in traj.top.atoms: atom.residue.name = molecule_name gaff_mol2_filenames.append(gaff_mol2_filename) frcmod_filenames.append(frcmod_filename) trajectories.append(traj) ffxml = create_ffxml_file(gaff_mol2_filenames, frcmod_filenames, override_mol2_residue_name=molecule_name) return trajectories, ffxml
def molecule_to_mol2(molecule, tripos_mol2_filename=None, conformer=0, residue_name="MOL", standardize=True): """Convert OE molecule to tripos mol2 file. Parameters ---------- molecule : openeye.oechem.OEGraphMol The molecule to be converted. tripos_mol2_filename : str, optional, default=None Output filename. If None, will create a filename similar to name.tripos.mol2, where name is the name of the OE molecule. conformer : int, optional, default=0 Save this frame If None, save all conformers residue_name : str, optional, default="MOL" OpenEye writes mol2 files with <0> as the residue / ligand name. This chokes many mol2 parsers, so we replace it with a string of your choosing. standardize: bool, optional, default=True Use a high-level writer, which will standardize the molecular properties. Set this to false if you wish to retain things such as atom names. In this case, a low-level writer will be used. Returns ------- tripos_mol2_filename : str Filename of output tripos mol2 file """ # os.environ["OE_LICENSE"] = "/data/openeye/oe_license.txt" oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise(ImportError("Need License for oechem!")) # Get molecule name. molecule_name = molecule.GetTitle() logger.debug(molecule_name) # Write molecule as Tripos mol2. if tripos_mol2_filename is None: tripos_mol2_filename = molecule_name + '.tripos.mol2' ofs = oechem.oemolostream(tripos_mol2_filename) ofs.SetFormat(oechem.OEFormat_MOL2H) for k, mol in enumerate(molecule.GetConfs()): if k == conformer or conformer is None: # Standardize will override molecular properties(atom names etc.) if standardize: oechem.OEWriteMolecule(ofs, mol) else: oechem.OEWriteMol2File(ofs, mol) ofs.close() # Replace <0> substructure names with valid text. infile = open(tripos_mol2_filename, 'r') lines = infile.readlines() infile.close() newlines = [line.replace('<0>', residue_name) for line in lines] outfile = open(tripos_mol2_filename, 'w') outfile.writelines(newlines) outfile.close() return molecule_name, tripos_mol2_filename
def approximate_volume_by_density(smiles_strings, n_molecules_list, density=1.0, box_scaleup_factor=1.1, box_buffer=2.0): """Generate an approximate box size based on the number and molecular weight of molecules present, and a target density for the final solvated mixture. If no density is specified, the target density is assumed to be 1 g/ml. Parameters ---------- smiles_strings : list(str) List of smiles strings for each component of mixture. n_molecules_list : list(int) The number of molecules of each mixture component. box_scaleup_factor : float, optional, default = 1.1 Factor by which the estimated box size is increased density : float, optional, default 1.0 Target density for final system in g/ml box_buffer : float [ANGSTROMS], optional, default 2.0. This quantity is added to the final estimated box size (after scale-up). With periodic boundary conditions, packmol docs suggests to leave an extra 2 Angstroms buffer during packing. Returns ------- box_size : float The size (edge length) of the box to generate. In ANGSTROMS. Notes ----- By default, boxes are only modestly large. This approach has not been extensively tested for stability but has been used in th Mobley lab for perhaps ~100 different systems without substantial problems. """ oechem = import_("openeye.oechem") density = density * units.grams / units.milliliter #Load molecules to get molecular weights wts = [] mass = 0.0 * units.grams / units.mole * 1. / units.AVOGADRO_CONSTANT_NA #For calculating total mass for (idx, smi) in enumerate(smiles_strings): mol = oechem.OEMol() oechem.OEParseSmiles(mol, smi) wts.append( oechem.OECalculateMolecularWeight(mol) * units.grams / units.mole) mass += n_molecules_list[idx] * wts[ idx] * 1. / units.AVOGADRO_CONSTANT_NA #Estimate volume based on mass and density #Density = mass/volume so volume = mass/density (volume units are ml) vol = mass / density #Convert to box length in angstroms edge = vol**(1. / 3.) #Compute final box size box_size = edge * box_scaleup_factor / units.angstroms # + box_buffer return box_size
def smiles_to_mdtraj_ffxml(smiles_strings, base_molecule_name="lig"): """Generate an MDTraj object from a smiles string. Parameters ---------- smiles_strings : list(str) Smiles strings to create molecules for base_molecule_name : str, optional, default='lig' Base name of molecule to use inside parameter files. Returns ------- traj : mdtraj.Trajectory MDTraj object for molecule ffxml : StringIO StringIO representation of ffxml file. Notes ----- ffxml can be directly input to OpenMM e.g. `forcefield = app.ForceField(ffxml)` """ try: from rdkit import Chem from rdkit.Chem import AllChem except ImportError: raise(ImportError("Must install rdkit to use smiles conversion.")) gaff_mol2_filenames = [] frcmod_filenames = [] trajectories = [] for k, smiles_string in enumerate(smiles_strings): molecule_name = "%s-%d" % (base_molecule_name, k) m = Chem.MolFromSmiles(smiles_string) m = Chem.AddHs(m) AllChem.EmbedMolecule(m) AllChem.UFFOptimizeMolecule(m) mdl_filename = tempfile.mktemp(suffix=".mdl") Chem.MolToMolFile(m, mdl_filename) amber = import_("openmoltools.amber") gaff_mol2_filename, frcmod_filename = amber.run_antechamber(molecule_name, mdl_filename, input_format='mdl') traj = md.load(gaff_mol2_filename) print(gaff_mol2_filename) print(traj) for atom in traj.top.atoms: atom.residue.name = molecule_name gaff_mol2_filenames.append(gaff_mol2_filename) frcmod_filenames.append(frcmod_filename) trajectories.append(traj) ffxml = create_ffxml_file(gaff_mol2_filenames, frcmod_filenames, override_mol2_residue_name=molecule_name) return trajectories, ffxml
def create_ffxml_simulation(molecule_name, gaff_mol2_filename, frcmod_filename): """Process a gaff mol2 file and frcmod file using the XML conversion, returning an OpenMM simulation. Parameters ---------- molecule_name : str The name of the molecule gaff_mol2_filename : str The name of the gaff mol2 file frcmod_filename : str The name of the gaff frcmod file Returns ------- simulation : openmm.app.Simulation A functional simulation object for simulating your molecule """ # Generate ffxml file. amber = import_("openmoltools.amber") GAFF_DAT_FILENAME = amber.find_gaff_dat() parser = amber_parser.AmberParser() parser.parse_filenames( [GAFF_DAT_FILENAME, gaff_mol2_filename, frcmod_filename]) ffxml_filename = molecule_name + '.ffxml' create_ffxml_file([gaff_mol2_filename], [frcmod_filename], ffxml_filename) traj = md.load(gaff_mol2_filename) # Read mol2 file. positions = traj.openmm_positions( 0 ) # Extract OpenMM-united positions of first (and only) trajectory frame topology = traj.top.to_openmm() # Create System object. forcefield = app.ForceField(ffxml_filename) system = forcefield.createSystem(topology, nonbondedMethod=app.NoCutoff, constraints=None, implicitSolvent=None) # Create integrator. timestep = 1.0 * units.femtoseconds integrator = simtk.openmm.VerletIntegrator(timestep) # Create simulation. platform = simtk.openmm.Platform.getPlatformByName("Reference") simulation = app.Simulation(topology, system, integrator, platform=platform) simulation.context.setPositions(positions) return simulation
def approximate_volume_by_density(smiles_strings, n_molecules_list, density=1.0, box_scaleup_factor=1.1, box_buffer=2.0): """Generate an approximate box size based on the number and molecular weight of molecules present, and a target density for the final solvated mixture. If no density is specified, the target density is assumed to be 1 g/ml. Parameters ---------- smiles_strings : list(str) List of smiles strings for each component of mixture. n_molecules_list : list(int) The number of molecules of each mixture component. box_scaleup_factor : float, optional, default = 1.1 Factor by which the estimated box size is increased density : float, optional, default 1.0 Target density for final system in g/ml box_buffer : float [ANGSTROMS], optional, default 2.0. This quantity is added to the final estimated box size (after scale-up). With periodic boundary conditions, packmol docs suggests to leave an extra 2 Angstroms buffer during packing. Returns ------- box_size : float The size (edge length) of the box to generate. In ANGSTROMS. Notes ----- By default, boxes are only modestly large. This approach has not been extensively tested for stability but has been used in th Mobley lab for perhaps ~100 different systems without substantial problems. """ oechem = import_("openeye.oechem") density = density * units.grams/units.milliliter #Load molecules to get molecular weights wts = [] mass = 0.0*units.grams/units.mole * 1./units.AVOGADRO_CONSTANT_NA #For calculating total mass for (idx,smi) in enumerate(smiles_strings): mol = oechem.OEMol() oechem.OEParseSmiles(mol, smi) wts.append( oechem.OECalculateMolecularWeight(mol)*units.grams/units.mole ) mass += n_molecules_list[idx] * wts[idx] * 1./units.AVOGADRO_CONSTANT_NA #Estimate volume based on mass and density #Density = mass/volume so volume = mass/density (volume units are ml) vol = mass/density #Convert to box length in angstroms edge = vol**(1./3.) #Compute final box size box_size = edge*box_scaleup_factor/units.angstroms# + box_buffer return box_size
def normalize_molecule(molecule): """ Normalize a copy of the molecule by checking aromaticity, adding explicit hydrogens, and (if possible) renaming by IUPAC name. Parameters ---------- molecule : OEMol the molecule to be normalized. Returns ------- molcopy : OEMol A (copied) version of the normalized molecule """ oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for OEChem!")) oeiupac = import_("openeye.oeiupac") has_iupac = oeiupac.OEIUPACIsLicensed() molcopy = oechem.OEMol(molecule) # Assign aromaticity. oechem.OEAssignAromaticFlags(molcopy, oechem.OEAroModelOpenEye) # Add hydrogens. oechem.OEAddExplicitHydrogens(molcopy) # Set title to IUPAC name. if has_iupac: name = oeiupac.OECreateIUPACName(molcopy) molcopy.SetTitle(name) # Check for any missing atom names, if found reassign all of them. if any([atom.GetName() == "" for atom in molcopy.GetAtoms()]): oechem.OETriposAtomNames(molcopy) return molcopy
def rename_water_atoms( pdb_filename, O_name = 'O', H1_name = 'H1', H2_name = 'H2' ): """Rename water atoms in a specified PDB file to have target names. Typically used to ensure a packmol-generated box containing water has water atom names corresponding to what tleap expects for standard water models. Parameters ---------- pdb_filename : str The target PDB filename to edit O_name : str, optional, default 'O' Target name to set water oxygen names to H1_name : str, optional, default 'H1' Target name to set water hydrogen names to, for first hydrogen H2_name : str, optional, default 'H2' Target name to set water hydrogen names to, for second hydrogen Returns ------- Notes ------- Uses ParmEd to makes edits. Identifies waters by reading residues from target PDB file and identifying any residue containing three atoms with names O or O#, H or H#, and H or H# (where # is a digit or sequence of digits) as water molecules. """ parmed = import_("parmed") pdb = parmed.load_file( pdb_filename ) #Find waters and rename for residue in pdb.residues: if len(residue)==3: #Build list of atom types (PDB files don't store these) from names after stripping off digits types = [] for atom in residue.atoms: name = atom.name while name[-1].isdigit(): name = name[:-1] types.append(name) #See if it's water and, if so, rename if 'O' in types and types.count('H')==2: hct = 0 for atom in residue.atoms: if 'O' in atom.name: atom.name = O_name elif 'H' in atom.name: if hct==0: atom.name = H1_name else: atom.name = H2_name hct+=1 #Write file pdb.write_pdb( pdb_filename )
def rename_water_atoms(pdb_filename, O_name='O', H1_name='H1', H2_name='H2'): """Rename water atoms in a specified PDB file to have target names. Typically used to ensure a packmol-generated box containing water has water atom names corresponding to what tleap expects for standard water models. Parameters ---------- pdb_filename : str The target PDB filename to edit O_name : str, optional, default 'O' Target name to set water oxygen names to H1_name : str, optional, default 'H1' Target name to set water hydrogen names to, for first hydrogen H2_name : str, optional, default 'H2' Target name to set water hydrogen names to, for second hydrogen Returns ------- Notes ------- Uses ParmEd to makes edits. Identifies waters by reading residues from target PDB file and identifying any residue containing three atoms with names O or O#, H or H#, and H or H# (where # is a digit or sequence of digits) as water molecules. """ parmed = import_("parmed") pdb = parmed.load_file(pdb_filename) #Find waters and rename for residue in pdb.residues: if len(residue) == 3: #Build list of atom types (PDB files don't store these) from names after stripping off digits types = [] for atom in residue.atoms: name = atom.name while name[-1].isdigit(): name = name[:-1] types.append(name) #See if it's water and, if so, rename if 'O' in types and types.count('H') == 2: hct = 0 for atom in residue.atoms: if 'O' in atom.name: atom.name = O_name elif 'H' in atom.name: if hct == 0: atom.name = H1_name else: atom.name = H2_name hct += 1 #Write file pdb.write_pdb(pdb_filename)
def smiles_to_antechamber( smiles_string, gaff_mol2_filename, frcmod_filename, residue_name="MOL", strictStereo=False, ): """Build a molecule from a smiles string and run antechamber, generating GAFF mol2 and frcmod files from a smiles string. Charges will be generated using the OpenEye QuacPac AM1-BCC implementation. Parameters ---------- smiles_string : str Smiles string of molecule to construct and charge gaff_mol2_filename : str Filename of mol2 file output of antechamber, with charges created from openeye frcmod_filename : str Filename of frcmod file output of antechamber. Most likely this file will be almost empty, at least for typical molecules. residue_name : str, optional, default="MOL" OpenEye writes mol2 files with <0> as the residue / ligand name. This chokes many mol2 parsers, so we replace it with a string of your choosing. This might be useful for downstream applications if the residue names are required to be unique. strictStereo : bool, optional, default=False If False, permits smiles strings with unspecified stereochemistry. See https://docs.eyesopen.com/omega/usage.html """ oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for oechem!")) # Get the absolute path so we can find these filenames from inside a temporary directory. gaff_mol2_filename = os.path.abspath(gaff_mol2_filename) frcmod_filename = os.path.abspath(frcmod_filename) m = smiles_to_oemol(smiles_string) m = get_charges(m, strictStereo=strictStereo, keep_confs=1) with enter_temp_directory( ): # Avoid dumping 50 antechamber files in local directory. _unused = molecule_to_mol2(m, "./tmp.mol2", residue_name=residue_name) net_charge = oechem.OENetCharge(m) tmp_gaff_mol2_filename, tmp_frcmod_filename = run_antechamber( "tmp", "./tmp.mol2", charge_method=None, net_charge=net_charge) # USE OE AM1BCC charges! shutil.copy(tmp_gaff_mol2_filename, gaff_mol2_filename) shutil.copy(tmp_frcmod_filename, frcmod_filename)
def smiles_to_antechamber( smiles_string, gaff_mol2_filename, frcmod_filename, residue_name="MOL", strictStereo=False, ): """Build a molecule from a smiles string and run antechamber, generating GAFF mol2 and frcmod files from a smiles string. Charges will be generated using the OpenEye QuacPac AM1-BCC implementation. Parameters ---------- smiles_string : str Smiles string of molecule to construct and charge gaff_mol2_filename : str Filename of mol2 file output of antechamber, with charges created from openeye frcmod_filename : str Filename of frcmod file output of antechamber. Most likely this file will be almost empty, at least for typical molecules. residue_name : str, optional, default="MOL" OpenEye writes mol2 files with <0> as the residue / ligand name. This chokes many mol2 parsers, so we replace it with a string of your choosing. This might be useful for downstream applications if the residue names are required to be unique. strictStereo : bool, optional, default=False If False, permits smiles strings with unspecified stereochemistry. See https://docs.eyesopen.com/omega/usage.html """ oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for oechem!")) # Get the absolute path so we can find these filenames from inside a temporary directory. gaff_mol2_filename = os.path.abspath(gaff_mol2_filename) frcmod_filename = os.path.abspath(frcmod_filename) m = smiles_to_oemol(smiles_string) m = get_charges(m, strictStereo=strictStereo, keep_confs=1) with enter_temp_directory(): # Avoid dumping 50 antechamber files in local directory. _unused = molecule_to_mol2(m, "./tmp.mol2", residue_name=residue_name) net_charge = oechem.OENetCharge(m) tmp_gaff_mol2_filename, tmp_frcmod_filename = run_antechamber( "tmp", "./tmp.mol2", charge_method=None, net_charge=net_charge ) # USE OE AM1BCC charges! shutil.copy(tmp_gaff_mol2_filename, gaff_mol2_filename) shutil.copy(tmp_frcmod_filename, frcmod_filename)
def create_ffxml_file(gaff_mol2_filenames, frcmod_filenames, ffxml_filename=None, override_mol2_residue_name=None): """Process multiple gaff mol2 files and frcmod files using the XML conversion and write to an XML file. Parameters ---------- gaff_mol2_filenames : list of str The names of the gaff mol2 files frcmod_filenames : str The names of the gaff frcmod files ffxml_filename : str, optional, default=None Optional name of output ffxml file to generate. If None, no file will be generated. override_mol2_residue_name : str, default=None If given, use this name to override mol2 residue names. Returns ------- ffxml_stringio : str StringIO representation of ffxml file containing residue entries for each molecule. """ # Generate ffxml file. parser = amber_parser.AmberParser( override_mol2_residue_name=override_mol2_residue_name) amber = import_("openmoltools.amber") GAFF_DAT_FILENAME = amber.find_gaff_dat() filenames = [GAFF_DAT_FILENAME] filenames.extend([filename for filename in gaff_mol2_filenames]) filenames.extend([filename for filename in frcmod_filenames]) parser.parse_filenames(filenames) ffxml_stream = parser.generate_xml() if ffxml_filename is not None: outfile = open(ffxml_filename, 'w') outfile.write(ffxml_stream.read()) outfile.close() ffxml_stream.seek(0) return ffxml_stream
def create_ffxml_file( gaff_mol2_filenames, frcmod_filenames, ffxml_filename=None, override_mol2_residue_name=None, ): """Process multiple gaff mol2 files and frcmod files using the XML conversion and write to an XML file. Parameters ---------- gaff_mol2_filenames : list of str The names of the gaff mol2 files frcmod_filenames : str The names of the gaff frcmod files ffxml_filename : str, optional, default=None Optional name of output ffxml file to generate. If None, no file will be generated. override_mol2_residue_name : str, default=None If given, use this name to override mol2 residue names. Returns ------- ffxml_stringio : str StringIO representation of ffxml file containing residue entries for each molecule. """ # Generate ffxml file. parser = amber_parser.AmberParser( override_mol2_residue_name=override_mol2_residue_name ) amber = import_("openmoltools.amber") GAFF_DAT_FILENAME = amber.find_gaff_dat() filenames = [GAFF_DAT_FILENAME] filenames.extend([filename for filename in gaff_mol2_filenames]) filenames.extend([filename for filename in frcmod_filenames]) parser.parse_filenames(filenames) ffxml_stream = parser.generate_xml() if ffxml_filename is not None: outfile = open(ffxml_filename, "w") outfile.write(ffxml_stream.read()) outfile.close() ffxml_stream.seek(0) return ffxml_stream
def create_ffxml_simulation(molecule_name, gaff_mol2_filename, frcmod_filename): """Process a gaff mol2 file and frcmod file using the XML conversion, returning an OpenMM simulation. Parameters ---------- molecule_name : str The name of the molecule gaff_mol2_filename : str The name of the gaff mol2 file frcmod_filename : str The name of the gaff frcmod file Returns ------- simulation : openmm.app.Simulation A functional simulation object for simulating your molecule """ # Generate ffxml file. amber = import_("openmoltools.amber") GAFF_DAT_FILENAME = amber.find_gaff_dat() parser = amber_parser.AmberParser() parser.parse_filenames([GAFF_DAT_FILENAME, gaff_mol2_filename, frcmod_filename]) ffxml_filename = molecule_name + '.ffxml' create_ffxml_file([gaff_mol2_filename], [frcmod_filename], ffxml_filename) traj = md.load(gaff_mol2_filename) # Read mol2 file. positions = traj.openmm_positions(0) # Extract OpenMM-united positions of first (and only) trajectory frame topology = traj.top.to_openmm() # Create System object. forcefield = app.ForceField(ffxml_filename) system = forcefield.createSystem(topology, nonbondedMethod=app.NoCutoff, constraints=None, implicitSolvent=None) # Create integrator. timestep = 1.0 * units.femtoseconds integrator = simtk.openmm.VerletIntegrator(timestep) # Create simulation. platform = simtk.openmm.Platform.getPlatformByName("Reference") simulation = app.Simulation(topology, system, integrator, platform=platform) simulation.context.setPositions(positions) return simulation
def get_mol2_string_from_OEMol(molecule): # os.environ["OE_LICENSE"] = "/data/openeye/oe_license.txt" oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for OEChem!")) molecule_name = molecule.GetTitle() conformer = 0 standardize = True # print(molecule.GetConfs()) ofs = oechem.oemolostream() ofs.SetFormat(oechem.OEFormat_MOL2H) ofs.openstring() for k, mol in enumerate(molecule.GetConfs()): if k == conformer or conformer is None: # Standardize will override molecular properties(atom names etc.) if standardize: oechem.OEWriteMolecule(ofs, mol) else: oechem.OEWriteMol2File(ofs, mol) molfile = ofs.GetString() return molfile
def convert_via_acpype( molecule_name, in_prmtop, in_crd, out_top = None, out_gro = None, debug = False, is_sorted = False ): """Use acpype.py (Sousa Da Silva et al., BMC Research Notes 5:367 (2012)) to convert AMBER prmtop and crd files to GROMACS format using amb2gmx mode. Writes to GROMACS 4.5 (and later) format, rather than the format for earlier GROMACS versions. Parameters ---------- molecule_name : str String specifying name of molecule in_prmtop : str String specifying path to AMBER-format parameter/topology (parmtop) file in_crd : str String specifying path to AMBER-format coordinate file out_top : str, optional, default = None String specifying path to GROMACS-format topology file which will be written out. If none is provided, created based on molecule_name. out_gro : str, optional, default = None String specifying path to GROMACS-format coordinate (.gro) file which will be written out. If none is provided, created based on molecule_name. debug : bool, optional, default = False Print debug info? If not specified, do not. is_sorted : bool, optional, default = False Sort resulting topology file Returns ------- out_top : str GROMACS topology file produced by acpype out_gro : str GROMACS coordinate file produced by acpype Notes ----- Deprecated. Please use ParmEd (especially amber_to_gromacs) instead. """ print("WARNING: Use of acpype for conversion is deprecated. ParmEd is preferred; please use amber_to_gromacs instead.") acpype = import_('openmoltools.acpype') #Create output file names if needed if out_top is None: out_top = "%s.top" % molecule_name if out_gro is None: out_gro = "%s.gro" % molecule_name #Create temporary output dir for acpype output outdir = tempfile.mkdtemp() #Define basename for output basename = os.path.join( outdir, 'output') #Set up acpype system = acpype.MolTopol( acFileXyz = in_crd, acFileTop = in_prmtop, basename = basename, is_sorted = is_sorted, gmx45 = True, disam = True ) #Print debug info if desired if debug: print(system.printDebug('prmtop and inpcrd files parsed')) #Write results system.writeGromacsTopolFiles( amb2gmx = True ) #Acpype names various things in the topology and coordinate file after the base name of the file used as input. Replace these names with an at-least-legible string while writing to desired output top_in = open(basename+"_GMX.top", 'r') top_out = open( out_top, 'w') for line in top_in.readlines(): top_out.write( line.replace( basename, molecule_name) ) top_in.close() top_out.close() gro_in = open(basename+"_GMX.gro", 'r') gro_out = open( out_gro, 'w') for line in gro_in.readlines(): gro_out.write( line.replace( basename, molecule_name) ) gro_in.close() gro_out.close() #Check if files exist and are not empty; return True if so if os.stat( out_top).st_size == 0 or os.stat( out_gro ) == 0: raise(ValueError("ACPYPE conversion failed.")) return out_top, out_gro
def find_gaff_dat(): warnings.warn("find_gaff_dat has been moved to openmoltools.amber.") amber = import_("openmoltools.amber") return amber.find_gaff_dat()
def molecule_to_mol2( molecule, tripos_mol2_filename=None, conformer=0, residue_name="MOL", standardize=True, ): """Convert OE molecule to tripos mol2 file. Parameters ---------- molecule : openeye.oechem.OEGraphMol The molecule to be converted. tripos_mol2_filename : str, optional, default=None Output filename. If None, will create a filename similar to name.tripos.mol2, where name is the name of the OE molecule. conformer : int, optional, default=0 Save this frame residue_name : str, optional, default="MOL" OpenEye writes mol2 files with <0> as the residue / ligand name. This chokes many mol2 parsers, so we replace it with a string of your choosing. standardize: bool, optional, default=True Use a high-level writer, which will standardize the molecular properties. Set this to false if you wish to retain things such as atom names. In this case, a low-level writer will be used. Returns ------- tripos_mol2_filename : str Filename of output tripos mol2 file """ oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for oechem!")) # Get molecule name. molecule_name = molecule.GetTitle() logger.debug(molecule_name) # Write molecule as Tripos mol2. if tripos_mol2_filename is None: tripos_mol2_filename = molecule_name + ".tripos.mol2" ofs = oechem.oemolostream(tripos_mol2_filename) ofs.SetFormat(oechem.OEFormat_MOL2H) for k, mol in enumerate(molecule.GetConfs()): if k == conformer: # Standardize will override molecular properties(atom names etc.) if standardize: oechem.OEWriteMolecule(ofs, mol) else: oechem.OEWriteMol2File(ofs, mol) ofs.close() # Replace <0> substructure names with valid text. infile = open(tripos_mol2_filename, "r") lines = infile.readlines() infile.close() newlines = [line.replace("<0>", residue_name) for line in lines] outfile = open(tripos_mol2_filename, "w") outfile.writelines(newlines) outfile.close() return molecule_name, tripos_mol2_filename
def run_antechamber(molecule_name, input_filename, charge_method="bcc", net_charge=None, gaff_mol2_filename=None, frcmod_filename=None): """Run AmberTools antechamber and parmchk2 to create GAFF mol2 and frcmod files. Parameters ---------- molecule_name : str Name of the molecule to be parameterized, will be used in output filenames. ligand_filename : str The molecule to be parameterized. Must be tripos mol2 format. charge_method : str, optional If not None, the charge method string will be passed to Antechamber. net_charge : int, optional If not None, net charge of the molecule to be parameterized. If None, Antechamber sums up partial charges from the input file. gaff_mol2_filename : str, optional, default=None Name of GAFF mol2 filename to output. If None, uses local directory and molecule_name frcmod_filename : str, optional, default=None Name of GAFF frcmod filename to output. If None, uses local directory and molecule_name Returns ------- gaff_mol2_filename : str GAFF format mol2 filename produced by antechamber frcmod_filename : str Amber frcmod file produced by prmchk """ utils = import_("openmoltools.utils") ext = utils.parse_ligand_filename(input_filename)[1] filetype = ext[1:] if filetype != "mol2": raise(ValueError("Must input mol2 filename")) if gaff_mol2_filename is None: gaff_mol2_filename = molecule_name + '.gaff.mol2' if frcmod_filename is None: frcmod_filename = molecule_name + '.frcmod' #Build absolute paths for input and output files gaff_mol2_filename = os.path.abspath( gaff_mol2_filename ) frcmod_filename = os.path.abspath( frcmod_filename ) input_filename = os.path.abspath( input_filename ) #Use temporary directory context to do this to avoid issues with spaces in filenames, etc. with mdtraj.utils.enter_temp_directory(): shutil.copy( input_filename, 'in.mol2' ) cmd = "antechamber -i in.mol2 -fi mol2 -o out.mol2 -fo mol2 -s 2" if charge_method is not None: cmd += ' -c %s' % charge_method if net_charge is not None: cmd += ' -nc %d' % net_charge logger.debug(cmd) output = getoutput(cmd) logger.debug(output) cmd = "parmchk2 -i out.mol2 -f mol2 -o out.frcmod" logger.debug(cmd) output = getoutput(cmd) logger.debug(output) check_for_errors( output ) #Copy back shutil.copy( 'out.mol2', gaff_mol2_filename ) shutil.copy( 'out.frcmod', frcmod_filename ) return gaff_mol2_filename, frcmod_filename
def generate_conformers( molecule, max_confs=800, strictStereo=True, ewindow=15.0, rms_threshold=1.0, strictTypes=True, ): """Generate conformations for the supplied molecule Parameters ---------- molecule : OEMol Molecule for which to generate conformers max_confs : int, optional, default=800 Max number of conformers to generate. If None, use default OE Value. strictStereo : bool, optional, default=True If False, permits smiles strings with unspecified stereochemistry. strictTypes : bool, optional, default=True If True, requires that Omega have exact MMFF types for atoms in molecule; otherwise, allows the closest atom type of the same element to be used. Returns ------- molcopy : OEMol A multi-conformer molecule with up to max_confs conformers. Notes ----- Roughly follows http://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html """ oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for OEChem!")) oeomega = import_("openeye.oeomega") if not oeomega.OEOmegaIsLicensed(): raise (ImportError("Need License for OEOmega!")) molcopy = oechem.OEMol(molecule) omega = oeomega.OEOmega() # These parameters were chosen to match http://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html omega.SetMaxConfs(max_confs) omega.SetIncludeInput(True) omega.SetCanonOrder(False) omega.SetSampleHydrogens( True ) # Word to the wise: skipping this step can lead to significantly different charges! omega.SetEnergyWindow(ewindow) omega.SetRMSThreshold( rms_threshold ) # Word to the wise: skipping this step can lead to significantly different charges! omega.SetStrictStereo(strictStereo) omega.SetStrictAtomTypes(strictTypes) omega.SetIncludeInput(False) # don't include input if max_confs is not None: omega.SetMaxConfs(max_confs) status = omega(molcopy) # generate conformation if not status: raise (RuntimeError("omega returned error code %d" % status)) return molcopy
def run_antechamber(molecule_name, input_filename, charge_method="bcc", net_charge=None, gaff_mol2_filename=None, frcmod_filename=None, input_format='mol2', resname=False, log_debug_output=False): """Run AmberTools antechamber and parmchk2 to create GAFF mol2 and frcmod files. Parameters ---------- molecule_name : str Name of the molecule to be parameterized, will be used in output filenames. ligand_filename : str The molecule to be parameterized. Must be tripos mol2 format. charge_method : str, optional If not None, the charge method string will be passed to Antechamber. net_charge : int, optional If not None, net charge of the molecule to be parameterized. If None, Antechamber sums up partial charges from the input file. gaff_mol2_filename : str, optional, default=None Name of GAFF mol2 filename to output. If None, uses local directory and molecule_name frcmod_filename : str, optional, default=None Name of GAFF frcmod filename to output. If None, uses local directory and molecule_name input_format : str, optional, default='mol2' Format specifier for input file to pass to antechamber. resname : bool, optional, default=False Set the residue name used within output files to molecule_name log_debug_output : bool, optional, default=False If true, will send output of tleap to logger. Returns ------- gaff_mol2_filename : str GAFF format mol2 filename produced by antechamber frcmod_filename : str Amber frcmod file produced by prmchk """ utils = import_("openmoltools.utils") ext = utils.parse_ligand_filename(input_filename)[1] if gaff_mol2_filename is None: gaff_mol2_filename = molecule_name + '.gaff.mol2' if frcmod_filename is None: frcmod_filename = molecule_name + '.frcmod' #Build absolute paths for input and output files gaff_mol2_filename = os.path.abspath( gaff_mol2_filename ) frcmod_filename = os.path.abspath( frcmod_filename ) input_filename = os.path.abspath( input_filename ) def read_file_contents(filename): infile = open(filename, 'r') contents = infile.read() infile.close() return contents #Use temporary directory context to do this to avoid issues with spaces in filenames, etc. with mdtraj.utils.enter_temp_directory(): local_input_filename = 'in.' + input_format shutil.copy( input_filename, local_input_filename ) # Run antechamber. cmd = "antechamber -i %(local_input_filename)s -fi %(input_format)s -o out.mol2 -fo mol2 -s 2" % vars() if charge_method is not None: cmd += ' -c %s' % charge_method if net_charge is not None: cmd += ' -nc %d' % net_charge if resname: cmd += ' -rn %s' % molecule_name if log_debug_output: logger.debug(cmd) output = getoutput(cmd) if not os.path.exists('out.mol2'): msg = "antechamber failed to produce output mol2 file\n" msg += "command: %s\n" % cmd msg += "output:\n" msg += 8 * "----------" + '\n' msg += output msg += 8 * "----------" + '\n' msg += "input mol2:\n" msg += 8 * "----------" + '\n' msg += read_file_contents(local_input_filename) msg += 8 * "----------" + '\n' raise Exception(msg) if log_debug_output: logger.debug(output) # Run parmchk. cmd = "parmchk2 -i out.mol2 -f mol2 -o out.frcmod" if log_debug_output: logger.debug(cmd) output = getoutput(cmd) if not os.path.exists('out.frcmod'): msg = "parmchk2 failed to produce output frcmod file\n" msg += "command: %s\n" % cmd msg += "output:\n" msg += 8 * "----------" + '\n' msg += output msg += 8 * "----------" + '\n' msg += "input mol2:\n" msg += 8 * "----------" + '\n' msg += read_file_contents('out.mol2') msg += 8 * "----------" + '\n' raise Exception(msg) if log_debug_output: logger.debug(output) check_for_errors(output) #Copy back shutil.copy( 'out.mol2', gaff_mol2_filename ) shutil.copy( 'out.frcmod', frcmod_filename ) return gaff_mol2_filename, frcmod_filename
def run_antechamber(molecule_name, input_filename, charge_method="bcc", net_charge=None, gaff_mol2_filename=None, frcmod_filename=None): """Run AmberTools antechamber and parmchk2 to create GAFF mol2 and frcmod files. Parameters ---------- molecule_name : str Name of the molecule to be parameterized, will be used in output filenames. ligand_filename : str The molecule to be parameterized. Must be tripos mol2 format. charge_method : str, optional If not None, the charge method string will be passed to Antechamber. net_charge : int, optional If not None, net charge of the molecule to be parameterized. If None, Antechamber sums up partial charges from the input file. gaff_mol2_filename : str, optional, default=None Name of GAFF mol2 filename to output. If None, uses local directory and molecule_name frcmod_filename : str, optional, default=None Name of GAFF frcmod filename to output. If None, uses local directory and molecule_name Returns ------- gaff_mol2_filename : str GAFF format mol2 filename produced by antechamber frcmod_filename : str Amber frcmod file produced by prmchk """ utils = import_("openmoltools.utils") ext = utils.parse_ligand_filename(input_filename)[1] filetype = ext[1:] if filetype != "mol2": raise (ValueError("Must input mol2 filename")) if gaff_mol2_filename is None: gaff_mol2_filename = molecule_name + '.gaff.mol2' if frcmod_filename is None: frcmod_filename = molecule_name + '.frcmod' #Build absolute paths for input and output files gaff_mol2_filename = os.path.abspath(gaff_mol2_filename) frcmod_filename = os.path.abspath(frcmod_filename) input_filename = os.path.abspath(input_filename) #Use temporary directory context to do this to avoid issues with spaces in filenames, etc. with mdtraj.utils.enter_temp_directory(): shutil.copy(input_filename, 'in.mol2') cmd = "antechamber -i in.mol2 -fi mol2 -o out.mol2 -fo mol2 -s 2" if charge_method is not None: cmd += ' -c %s' % charge_method if net_charge is not None: cmd += ' -nc %d' % net_charge logger.debug(cmd) output = getoutput(cmd) logger.debug(output) cmd = "parmchk2 -i out.mol2 -f mol2 -o out.frcmod" logger.debug(cmd) output = getoutput(cmd) logger.debug(output) check_for_errors(output) #Copy back shutil.copy('out.mol2', gaff_mol2_filename) shutil.copy('out.frcmod', frcmod_filename) return gaff_mol2_filename, frcmod_filename
def get_checkmol_descriptors(molecule_filename, executable_name='checkmol'): """For a specified molecule file, return a list of functional groups as assigned by checkmol for the molecule(s) present. The first entry in the list will correspond to the groups in the first molecule, the second gives groups in the second (if present) and so on. Raises an exception if checkmol is not found. Parameters ---------- molecule_filename : str Specifies name of file to read executable_name : str, default = 'checkmol' Specify name (or full path) of execuable for checkmol Returns ------- descriptors : list (of lists of strings) Checkmol functional group assignments for each molecule(s) in the input file, where descriptors[0] gives the descriptors for the first molecule, etc. Notes ----- This should properly handle single-molecule and multiple-molecule files; however, multiple-conformer files may result in each conformer appearing (rather than each molecule) appearing in the list of descriptors, which may or may not be the expected behavior. """ oechem = import_("openeye.oechem") status = find_executable(executable_name) if status == None: raise (ValueError( "Cannot find checkmol; cannot assign checkmol descriptors without it." )) #Open input file ifs = oechem.oemolistream(molecule_filename) #Input molecule mol = oechem.OEGraphMol() #Set up temporary file for molecule output fname = tempfile.mktemp(suffix='.sdf') #Storage for descriptors descriptors = [] #Read/write/run checkmol while oechem.OEReadMolecule(ifs, mol): #Dump molecule out ofs = oechem.oemolostream(fname) oechem.OEWriteMolecule(ofs, mol) ofs.close() #Run checkmol groups = getoutput('%s %s' % (executable_name, fname)) #Split to separate groups groups = groups.split('\n') #Store results descriptors.append(groups) #Raise an exception if the whole list is empty fnd = False for elem in descriptors: if len(elem) > 0: fnd = True if not fnd: raise (ValueError( "checkmol only produced empty descriptors for your molecule. Something is wrong; please check your input file and checkmol installation." )) #Delete temporary file os.remove(fname) return descriptors
def convert_via_acpype(molecule_name, in_prmtop, in_crd, out_top=None, out_gro=None, debug=False, is_sorted=False): """Use acpype.py (Sousa Da Silva et al., BMC Research Notes 5:367 (2012)) to convert AMBER prmtop and crd files to GROMACS format using amb2gmx mode. Writes to GROMACS 4.5 (and later) format, rather than the format for earlier GROMACS versions. Parameters ---------- molecule_name : str String specifying name of molecule in_prmtop : str String specifying path to AMBER-format parameter/topology (parmtop) file in_crd : str String specifying path to AMBER-format coordinate file out_top : str, optional, default = None String specifying path to GROMACS-format topology file which will be written out. If none is provided, created based on molecule_name. out_gro : str, optional, default = None String specifying path to GROMACS-format coordinate (.gro) file which will be written out. If none is provided, created based on molecule_name. debug : bool, optional, default = False Print debug info? If not specified, do not. is_sorted : bool, optional, default = False Sort resulting topology file Returns ------- out_top : str GROMACS topology file produced by acpype out_gro : str GROMACS coordinate file produced by acpype Notes ----- Deprecated. Please use ParmEd (especially amber_to_gromacs) instead. """ print( "WARNING: Use of acpype for conversion is deprecated. ParmEd is preferred; please use amber_to_gromacs instead." ) acpype = import_('openmoltools.acpype') #Create output file names if needed if out_top is None: out_top = "%s.top" % molecule_name if out_gro is None: out_gro = "%s.gro" % molecule_name #Create temporary output dir for acpype output outdir = tempfile.mkdtemp() #Define basename for output basename = os.path.join(outdir, 'output') #Set up acpype system = acpype.MolTopol(acFileXyz=in_crd, acFileTop=in_prmtop, basename=basename, is_sorted=is_sorted, gmx45=True, disam=True) #Print debug info if desired if debug: print(system.printDebug('prmtop and inpcrd files parsed')) #Write results system.writeGromacsTopolFiles(amb2gmx=True) #Acpype names various things in the topology and coordinate file after the base name of the file used as input. Replace these names with an at-least-legible string while writing to desired output top_in = open(basename + "_GMX.top", 'r') top_out = open(out_top, 'w') for line in top_in.readlines(): top_out.write(line.replace(basename, molecule_name)) top_in.close() top_out.close() gro_in = open(basename + "_GMX.gro", 'r') gro_out = open(out_gro, 'w') for line in gro_in.readlines(): gro_out.write(line.replace(basename, molecule_name)) gro_in.close() gro_out.close() #Check if files exist and are not empty; return True if so if os.stat(out_top).st_size == 0 or os.stat(out_gro) == 0: raise (ValueError("ACPYPE conversion failed.")) return out_top, out_gro
def run_tleap(*args, **kwargs): warnings.warn("run_tleap has been moved to openmoltools.amber.") amber = import_("openmoltools.amber") return amber.run_tleap(*args, **kwargs)
def find_gaff_dat(): print("Warning: find_gaff_dat has been moved to openmoltools.amber.") amber = import_("openmoltools.amber") return amber.find_gaff_dat()
def get_checkmol_descriptors( molecule_filename, executable_name = 'checkmol' ): """For a specified molecule file, return a list of functional groups as assigned by checkmol for the molecule(s) present. The first entry in the list will correspond to the groups in the first molecule, the second gives groups in the second (if present) and so on. Raises an exception if checkmol is not found. Parameters ---------- molecule_filename : str Specifies name of file to read executable_name : str, default = 'checkmol' Specify name (or full path) of execuable for checkmol Returns ------- descriptors : list (of lists of strings) Checkmol functional group assignments for each molecule(s) in the input file, where descriptors[0] gives the descriptors for the first molecule, etc. Notes ----- This should properly handle single-molecule and multiple-molecule files; however, multiple-conformer files may result in each conformer appearing (rather than each molecule) appearing in the list of descriptors, which may or may not be the expected behavior. """ oechem = import_("openeye.oechem") status = find_executable( executable_name ) if status==None: raise(ValueError("Cannot find checkmol; cannot assign checkmol descriptors without it.")) #Open input file ifs = oechem.oemolistream( molecule_filename ) #Input molecule mol = oechem.OEGraphMol( ) #Set up temporary file for molecule output fname = tempfile.mktemp( suffix = '.sdf' ) #Storage for descriptors descriptors = [] #Read/write/run checkmol while oechem.OEReadMolecule( ifs, mol ): #Dump molecule out ofs = oechem.oemolostream( fname ) oechem.OEWriteMolecule( ofs, mol ) ofs.close() #Run checkmol groups = getoutput('%s %s' % (executable_name, fname) ) #Split to separate groups groups = groups.split('\n') #Store results descriptors.append( groups ) #Raise an exception if the whole list is empty fnd = False for elem in descriptors: if len(elem)>0: fnd = True if not fnd: raise(ValueError("checkmol only produced empty descriptors for your molecule. Something is wrong; please check your input file and checkmol installation.")) #Delete temporary file os.remove( fname ) return descriptors
def run_tleap(*args, **kwargs): print("Warning: run_tleap has been moved to openmoltools.amber.") amber = import_("openmoltools.amber") return amber.run_tleap(*args, **kwargs)
def get_charges( molecule, max_confs=800, strictStereo=True, normalize=True, keep_confs=None, legacy=True, assign_formal_charges: bool = False, ): """Generate charges for an OpenEye OEMol molecule. Parameters ---------- molecule : OEMol Molecule for which to generate conformers. Omega will be used to generate max_confs conformations. max_confs : int, optional, default=800 Max number of conformers to generate strictStereo : bool, optional, default=True If False, permits smiles strings with unspecified stereochemistry. See https://docs.eyesopen.com/omega/usage.html normalize : bool, optional, default=True If True, normalize the molecule by checking aromaticity, adding explicit hydrogens, and renaming by IUPAC name. keep_confs : int, optional, default=None If None, apply the charges to the provided conformation and return this conformation, unless no conformation is present. Otherwise, return some or all of the generated conformations. If -1, all generated conformations are returned. Otherwise, keep_confs = N will return an OEMol with up to N generated conformations. Multiple conformations are still used to *determine* the charges. legacy : bool, default=True If False, uses the new OpenEye charging engine. See https://docs.eyesopen.com/toolkits/python/quacpactk/OEProtonFunctions/OEAssignCharges.html# assign_formal_charges : default False, (Re)assign formal charges for atoms in the molecule. Returns ------- charged_copy : OEMol A molecule with OpenEye's recommended AM1BCC charge selection scheme. Notes ----- Roughly follows http://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html """ # If there is no geometry, return at least one conformation. if molecule.GetConfs() == 0: keep_confs = 1 oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for OEChem!")) oequacpac = import_("openeye.oequacpac") if not oequacpac.OEQuacPacIsLicensed(): raise (ImportError("Need License for oequacpac!")) if normalize: molecule = normalize_molecule(molecule) else: molecule = oechem.OEMol(molecule) if assign_formal_charges: # modifies molecule in place oechem.OEAssignFormalCharges(molecule) print("") charged_copy = generate_conformers( molecule, max_confs=max_confs, strictStereo=strictStereo) # Generate up to max_confs conformers if not legacy: # 2017.2.1 OEToolkits new charging function status = oequacpac.OEAssignCharges(charged_copy, oequacpac.OEAM1BCCCharges()) if not status: raise (RuntimeError("OEAssignCharges failed.")) else: # AM1BCCSym recommended by Chris Bayly to KAB+JDC, Oct. 20 2014. status = oequacpac.OEAssignPartialCharges( charged_copy, oequacpac.OECharges_AM1BCCSym) if not status: raise (RuntimeError( "OEAssignPartialCharges returned error code %d" % status)) # Determine conformations to return if keep_confs == None: # If returning original conformation original = molecule.GetCoords() # Delete conformers over 1 for k, conf in enumerate(charged_copy.GetConfs()): if k > 0: charged_copy.DeleteConf(conf) # Copy coordinates to single conformer charged_copy.SetCoords(original) elif keep_confs > 0: logger.debug( "keep_confs was set to %s. Molecule positions will be reset." % keep_confs) # Otherwise if a number is provided, return this many confs if available for k, conf in enumerate(charged_copy.GetConfs()): if k > keep_confs - 1: charged_copy.DeleteConf(conf) elif keep_confs == -1: # If we want all conformations, continue pass else: # Not a valid option to keep_confs raise (ValueError("Not a valid option to keep_confs in get_charges.")) return charged_copy
def build_mixture_prmtop(mol2_filenames, frcmod_filenames, box_filename, prmtop_filename, inpcrd_filename, water_model = 'TIP3P'): """Create a prmtop and inpcrd from a collection of mol2 and frcmod files as well as a single box PDB. We have used this for setting up simulations of neat liquids or binary mixtures. Parameters ---------- mol2_filenames : list(str) Filenames of GAFF flavored mol2 files. Each must contain exactly ONE ligand. frcmod_filenames : str Filename of input GAFF frcmod filenames. box_filename : str Filename of PDB containing an arbitrary box of the mol2 molecules. prmtop_filename : str output prmtop filename. Should have suffix .prmtop inpcrd_filename : str output inpcrd filename. Should have suffix .inpcrd water_model : str, optional. Default: "TIP3P" String specifying water model to be used IF water is present as a component of the mixture. Valid options are currently "TIP3P", "SPC", or None. If None is specified, flexible GAFF-water will be used as for any other solute (old behavior). Returns ------- tleap_commands : str The string of commands piped to tleap for building the prmtop and inpcrd files. This will *already* have been run, but the output can be useful for debugging or archival purposes. However, this will reflect temporary file names for both input and output file as these are used to avoid tleap filename restrictions. Notes ----- This can be easily broken if there are missing, duplicated, or inconsistent ligand residue names in your box, mol2, and frcmod files. You can use mdtraj to edit the residue names with something like this: trj.top.residue(0).name = "L1" """ # Check for one residue name per mol2 file and uniqueness between all mol2 files all_names = set() for filename in mol2_filenames: t = md.load(filename) names = set([r.name for r in t.top.residues]) if len(names) != 1: raise(ValueError("Must have a SINGLE residue name in each mol2 file.")) all_names = all_names.union(list(names)) if len(all_names) != len(mol2_filenames): raise(ValueError("Must have UNIQUE residue names in each mol2 file.")) if len(mol2_filenames) != len(frcmod_filenames): raise(ValueError("Must provide an equal number of frcmod and mol2 file names.")) #Get number of files nfiles = len(mol2_filenames) #Check validity of water model options valid_water = ['TIP3P', 'SPC', None] if not water_model in valid_water: raise(ValueError("Must provide a valid water model.")) #If we are requesting a different water model, check if there is water present if not water_model==None: parmed = import_("parmed") solventIsWater = [] waterPresent = False for i in range(nfiles): mol = parmed.load_file( mol2_filenames[i] ) #Check if it is water by checking GAFF atom names types = [ atom.type for atom in mol.atoms ] if 'oh' in types and types.count('ho')==2 and len(types)==3: solventIsWater.append(True) waterPresent = True else: solventIsWater.append(False) #In this case, if we have any water, we will now work on fewer .mol2 and .frcmod files and instead use the force field files for those. So, reduce nfiles and remove the files we don't need from the .mol2 and .frcmod filename lists #After doing so, go on to interpret the specified water model and compose the water model string needed for tleap if waterPresent: new_mol2_filenames = [] new_frcmod_filenames = [] water_mol2_filenames = [] for i in range( nfiles ): if not solventIsWater[i]: new_mol2_filenames.append( mol2_filenames[i] ) new_frcmod_filenames.append( frcmod_filenames[i] ) else: water_mol2_filenames.append( mol2_filenames[i] ) nfiles = len(new_mol2_filenames) mol2_filenames = new_mol2_filenames frcmod_filenames = new_frcmod_filenames #Now interpret the specified water model and translate into AMBER nomenclature if water_model=='TIP3P': water_model = 'TP3' elif water_model =='SPC': water_model = 'SPC' else: raise(ValueError("Cannot translate specified water model into one of the available models.")) #Compose string for loading specified water molecule water_string = '\n' water_names = [md.load(filename).top.residue(0).name for filename in water_mol2_filenames] for name in water_names: water_string += '%s = %s\n' % (name, water_model ) #Also if not TIP3P, update to source correct frcmod file if water_model == 'SPC': water_string += 'loadamberparams frcmod.spce\n' elif water_model =='TP3': continue else: raise(ValueError("Cannot identify water frcmod file to be loaded.")) #Rename water atoms in box file to match what is expected by AMBER packmol = import_("openmoltools.packmol") packmol.rename_water_atoms(box_filename) else: waterPresent = False #Make temporary, hardcoded filenames for mol2 and frcmod input to avoid tleap filename restrictions tmp_mol2_filenames = [ 'in%d.mol2' % n for n in range(nfiles) ] tmp_frcmod_filenames = [ 'in%d.frcmod' % n for n in range(nfiles) ] #Make temporary, hardcoded filenames for output files to avoid tleap filename restrictions tmp_prmtop_filename = 'out.prmtop' tmp_inpcrd_filename = 'out.inpcrd' tmp_box_filename = 'tbox.pdb' #Build absolute paths of input files so we can use context and temporary directory infiles = mol2_filenames + frcmod_filenames + [box_filename] infiles = [ os.path.abspath(filenm) for filenm in infiles ] #Build absolute paths of output files so we can copy them back prmtop_filename = os.path.abspath( prmtop_filename ) inpcrd_filename = os.path.abspath( inpcrd_filename ) #Use temporary directory and do the setup with mdtraj.utils.enter_temp_directory(): #Copy input files to temporary file names in target directory for (infile, outfile) in zip( infiles, tmp_mol2_filenames+tmp_frcmod_filenames+[tmp_box_filename] ): shutil.copy( infile, outfile) logger.debug('Copying input file %s to %s...\n' % (infile, outfile)) all_names = [md.load(filename).top.residue(0).name for filename in tmp_mol2_filenames] mol2_section = "\n".join("%s = loadmol2 %s" % (all_names[k], filename) for k, filename in enumerate(tmp_mol2_filenames)) #If non-GAFF water is present, load desired parameters for that water as well. if waterPresent: mol2_section += water_string amberparams_section = "\n".join("loadamberparams %s" % (filename) for k, filename in enumerate(tmp_frcmod_filenames)) tleap_commands = TLEAP_TEMPLATE % dict(mol2_section=mol2_section, amberparams_section=amberparams_section, box_filename=tmp_box_filename, prmtop_filename=tmp_prmtop_filename, inpcrd_filename=tmp_inpcrd_filename) print(tleap_commands) file_handle = open('tleap_commands', 'w') file_handle.writelines(tleap_commands) file_handle.close() logger.debug('Running tleap in temporary directory.') cmd = "tleap -f %s " % file_handle.name logger.debug(cmd) output = getoutput(cmd) logger.debug(output) check_for_errors( output, other_errors = ['Improper number of arguments'], ignore_errors = ['unperturbed charge of the unit', 'ignoring the error'] ) #Copy stuff back to right filenames for (tfile, finalfile) in zip( [tmp_prmtop_filename, tmp_inpcrd_filename], [prmtop_filename, inpcrd_filename] ): shutil.copy( tfile, finalfile) return tleap_commands
def get_charges( molecule, max_confs=800, strictStereo=True, normalize=True, keep_confs=None, legacy=True, assign_formal_charges: bool = False, ): """Generate charges for an OpenEye OEMol molecule. Parameters ---------- molecule : OEMol Molecule for which to generate conformers. Omega will be used to generate max_confs conformations. max_confs : int, optional, default=800 Max number of conformers to generate strictStereo : bool, optional, default=True If False, permits smiles strings with unspecified stereochemistry. See https://docs.eyesopen.com/omega/usage.html normalize : bool, optional, default=True If True, normalize the molecule by checking aromaticity, adding explicit hydrogens, and renaming by IUPAC name. keep_confs : int, optional, default=None If None, apply the charges to the provided conformation and return this conformation, unless no conformation is present. Otherwise, return some or all of the generated conformations. If -1, all generated conformations are returned. Otherwise, keep_confs = N will return an OEMol with up to N generated conformations. Multiple conformations are still used to *determine* the charges. legacy : bool, default=True If False, uses the new OpenEye charging engine. See https://docs.eyesopen.com/toolkits/python/quacpactk/OEProtonFunctions/OEAssignCharges.html# assign_formal_charges : default False, (Re)assign formal charges for atoms in the molecule. Returns ------- charged_copy : OEMol A molecule with OpenEye's recommended AM1BCC charge selection scheme. Notes ----- Roughly follows http://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html """ # If there is no geometry, return at least one conformation. if molecule.GetConfs() == 0: keep_confs = 1 oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise (ImportError("Need License for OEChem!")) oequacpac = import_("openeye.oequacpac") if not oequacpac.OEQuacPacIsLicensed(): raise (ImportError("Need License for oequacpac!")) if normalize: molecule = normalize_molecule(molecule) else: molecule = oechem.OEMol(molecule) if assign_formal_charges: # modifies molecule in place oechem.OEAssignFormalCharges(molecule) print("") charged_copy = generate_conformers( molecule, max_confs=max_confs, strictStereo=strictStereo ) # Generate up to max_confs conformers if not legacy: # 2017.2.1 OEToolkits new charging function status = oequacpac.OEAssignCharges(charged_copy, oequacpac.OEAM1BCCCharges()) if not status: raise (RuntimeError("OEAssignCharges failed.")) else: # AM1BCCSym recommended by Chris Bayly to KAB+JDC, Oct. 20 2014. status = oequacpac.OEAssignPartialCharges( charged_copy, oequacpac.OECharges_AM1BCCSym ) if not status: raise ( RuntimeError("OEAssignPartialCharges returned error code %d" % status) ) # Determine conformations to return if keep_confs == None: # If returning original conformation original = molecule.GetCoords() # Delete conformers over 1 for k, conf in enumerate(charged_copy.GetConfs()): if k > 0: charged_copy.DeleteConf(conf) # Copy coordinates to single conformer charged_copy.SetCoords(original) elif keep_confs > 0: logger.debug( "keep_confs was set to %s. Molecule positions will be reset." % keep_confs ) # Otherwise if a number is provided, return this many confs if available for k, conf in enumerate(charged_copy.GetConfs()): if k > keep_confs - 1: charged_copy.DeleteConf(conf) elif keep_confs == -1: # If we want all conformations, continue pass else: # Not a valid option to keep_confs raise (ValueError("Not a valid option to keep_confs in get_charges.")) return charged_copy
def run_antechamber(molecule_name, input_filename, charge_method="bcc", net_charge=None, gaff_mol2_filename=None, frcmod_filename=None): """Run AmberTools antechamber and parmchk2 to create GAFF mol2 and frcmod files. Parameters ---------- molecule_name : str Name of the molecule to be parameterized, will be used in output filenames. ligand_filename : str The molecule to be parameterized. Must be tripos mol2 format. charge_method : str, optional If not None, the charge method string will be passed to Antechamber. net_charge : int, optional If not None, net charge of the molecule to be parameterized. If None, Antechamber sums up partial charges from the input file. gaff_mol2_filename : str, optional, default=None Name of GAFF mol2 filename to output. If None, uses local directory and molecule_name frcmod_filename : str, optional, default=None Name of GAFF frcmod filename to output. If None, uses local directory and molecule_name Returns ------- gaff_mol2_filename : str GAFF format mol2 filename produced by antechamber frcmod_filename : str Amber frcmod file produced by prmchk """ utils = import_("openmoltools.utils") ext = utils.parse_ligand_filename(input_filename)[1] filetype = ext[1:] if filetype != "mol2": raise(ValueError("Must input mol2 filename")) if gaff_mol2_filename is None: gaff_mol2_filename = molecule_name + '.gaff.mol2' if frcmod_filename is None: frcmod_filename = molecule_name + '.frcmod' cmd = "antechamber -i %s -fi mol2 -o %s -fo mol2 -s 2" % (input_filename, gaff_mol2_filename) if charge_method is not None: cmd += ' -c %s' % charge_method if net_charge is not None: cmd += ' -nc %d' % net_charge logger.debug(cmd) output = getoutput(cmd) logger.debug(output) cmd = "parmchk2 -i %s -f mol2 -o %s" % (gaff_mol2_filename, frcmod_filename) logger.debug(cmd) output = getoutput(cmd) logger.debug(output) check_for_errors( output ) return gaff_mol2_filename, frcmod_filename
def build_mixture_prmtop( mol2_filenames, frcmod_filenames, box_filename, prmtop_filename, inpcrd_filename, water_model="TIP3P", ): """Create a prmtop and inpcrd from a collection of mol2 and frcmod files as well as a single box PDB. We have used this for setting up simulations of neat liquids or binary mixtures. Parameters ---------- mol2_filenames : list(str) Filenames of GAFF flavored mol2 files. Each must contain exactly ONE ligand. frcmod_filenames : str Filename of input GAFF frcmod filenames. box_filename : str Filename of PDB containing an arbitrary box of the mol2 molecules. prmtop_filename : str output prmtop filename. Should have suffix .prmtop inpcrd_filename : str output inpcrd filename. Should have suffix .inpcrd water_model : str, optional. Default: "TIP3P" String specifying water model to be used IF water is present as a component of the mixture. Valid options are currently "TIP3P", "SPC", or None. If None is specified, flexible GAFF-water will be used as for any other solute (old behavior). Returns ------- tleap_commands : str The string of commands piped to tleap for building the prmtop and inpcrd files. This will *already* have been run, but the output can be useful for debugging or archival purposes. However, this will reflect temporary file names for both input and output file as these are used to avoid tleap filename restrictions. Notes ----- This can be easily broken if there are missing, duplicated, or inconsistent ligand residue names in your box, mol2, and frcmod files. You can use mdtraj to edit the residue names with something like this: trj.top.residue(0).name = "L1" """ # Check for one residue name per mol2 file and uniqueness between all mol2 files all_names = set() for filename in mol2_filenames: t = md.load(filename) names = set([r.name for r in t.top.residues]) if len(names) != 1: raise (ValueError( "Must have a SINGLE residue name in each mol2 file.")) all_names = all_names.union(list(names)) if len(all_names) != len(mol2_filenames): raise (ValueError("Must have UNIQUE residue names in each mol2 file.")) if len(mol2_filenames) != len(frcmod_filenames): raise (ValueError( "Must provide an equal number of frcmod and mol2 file names.")) # Get number of files nfiles = len(mol2_filenames) # Check validity of water model options valid_water = ["TIP3P", "SPC", None] if not water_model in valid_water: raise (ValueError("Must provide a valid water model.")) # If we are requesting a different water model, check if there is water present if not water_model == None: parmed = import_("parmed") solventIsWater = [] waterPresent = False for i in range(nfiles): mol = parmed.load_file(mol2_filenames[i]) # Check if it is water by checking GAFF atom names types = [atom.type for atom in mol.atoms] if "oh" in types and types.count("ho") == 2 and len(types) == 3: solventIsWater.append(True) waterPresent = True else: solventIsWater.append(False) # In this case, if we have any water, we will now work on fewer .mol2 and .frcmod files and instead use the force field files for those. So, reduce nfiles and remove the files we don't need from the .mol2 and .frcmod filename lists # After doing so, go on to interpret the specified water model and compose the water model string needed for tleap if waterPresent: new_mol2_filenames = [] new_frcmod_filenames = [] water_mol2_filenames = [] for i in range(nfiles): if not solventIsWater[i]: new_mol2_filenames.append(mol2_filenames[i]) new_frcmod_filenames.append(frcmod_filenames[i]) else: water_mol2_filenames.append(mol2_filenames[i]) nfiles = len(new_mol2_filenames) mol2_filenames = new_mol2_filenames frcmod_filenames = new_frcmod_filenames # Now interpret the specified water model and translate into AMBER nomenclature if water_model == "TIP3P": water_model = "TP3" elif water_model == "SPC": water_model = "SPC" else: raise (ValueError( "Cannot translate specified water model into one of the available models." )) # Compose string for loading specified water molecule water_string = "\n" water_names = [ md.load(filename).top.residue(0).name for filename in water_mol2_filenames ] for name in water_names: water_string += "%s = %s\n" % (name, water_model) # Also if not TIP3P, update to source correct frcmod file if water_model == "SPC": water_string += "loadamberparams frcmod.spce\n" elif water_model == "TP3": continue else: raise (ValueError( "Cannot identify water frcmod file to be loaded.")) # Rename water atoms in box file to match what is expected by AMBER packmol = import_("openmoltools.packmol") packmol.rename_water_atoms(box_filename) else: waterPresent = False # Make temporary, hardcoded filenames for mol2 and frcmod input to avoid tleap filename restrictions tmp_mol2_filenames = ["in%d.mol2" % n for n in range(nfiles)] tmp_frcmod_filenames = ["in%d.frcmod" % n for n in range(nfiles)] # Make temporary, hardcoded filenames for output files to avoid tleap filename restrictions tmp_prmtop_filename = "out.prmtop" tmp_inpcrd_filename = "out.inpcrd" tmp_box_filename = "tbox.pdb" # Build absolute paths of input files so we can use context and temporary directory infiles = mol2_filenames + frcmod_filenames + [box_filename] infiles = [os.path.abspath(filenm) for filenm in infiles] # Build absolute paths of output files so we can copy them back prmtop_filename = os.path.abspath(prmtop_filename) inpcrd_filename = os.path.abspath(inpcrd_filename) # Use temporary directory and do the setup with mdtraj.utils.enter_temp_directory(): # Copy input files to temporary file names in target directory for (infile, outfile) in zip( infiles, tmp_mol2_filenames + tmp_frcmod_filenames + [tmp_box_filename]): shutil.copy(infile, outfile) logger.debug("Copying input file %s to %s...\n" % (infile, outfile)) all_names = [ md.load(filename).top.residue(0).name for filename in tmp_mol2_filenames ] mol2_section = "\n".join( "%s = loadmol2 %s" % (all_names[k], filename) for k, filename in enumerate(tmp_mol2_filenames)) # If non-GAFF water is present, load desired parameters for that water as well. if waterPresent: mol2_section += water_string amberparams_section = "\n".join( "loadamberparams %s" % (filename) for k, filename in enumerate(tmp_frcmod_filenames)) tleap_commands = TLEAP_TEMPLATE % dict( mol2_section=mol2_section, amberparams_section=amberparams_section, box_filename=tmp_box_filename, prmtop_filename=tmp_prmtop_filename, inpcrd_filename=tmp_inpcrd_filename, ) print(tleap_commands) file_handle = open("tleap_commands", "w") file_handle.writelines(tleap_commands) file_handle.close() logger.debug("Running tleap in temporary directory.") cmd = "tleap -f %s " % file_handle.name logger.debug(cmd) output = getoutput(cmd) logger.debug(output) check_for_errors( output, other_errors=["Improper number of arguments"], ignore_errors=[ "unperturbed charge of the unit", "ignoring the error" ], ) # Copy stuff back to right filenames for (tfile, finalfile) in zip( [tmp_prmtop_filename, tmp_inpcrd_filename], [prmtop_filename, inpcrd_filename], ): shutil.copy(tfile, finalfile) return tleap_commands