def gen_canonical_isomeric_smiles(oemol): # 1. Create an OpenFF molecule from the OpenEye molecule, guessing the # stereochemistry if needed. oe_molecule = oechem.OEMol(oemol) try: molecule = Molecule.from_openeye(oe_molecule) except: molecule = Molecule.from_openeye(oe_molecule, allow_undefined_stereo=True) stereoisomers = molecule.enumerate_stereoisomers(undefined_only=True, max_isomers=1) if len(stereoisomers) > 0: molecule = stereoisomers[0] # 2. Canonically order the molecule molecule = molecule.canonical_order_atoms() # 3. Figure out which atoms in the canonical molecule should be tagged. mapped_smiles = oechem.OEMolToSmiles(oe_molecule) torsion_match = molecule.chemical_environment_matches(mapped_smiles)[0] # 4. Generate a canonical isomeric mapped smiles molecule.properties["atom_map"] = { j: i + 1 for i, j in enumerate(torsion_match) } center_bond = set(molecule.properties["atom_map"].keys()) canonical_isomeric_smiles = molecule.to_smiles(isomeric=True, explicit_hydrogens=True, mapped=False) return molecule, canonical_isomeric_smiles, center_bond
def test_mapping_strength_levels(pairs_of_smiles=[('Cc1ccccc1','c1ccc(cc1)N'),('CC(c1ccccc1)','O=C(c1ccccc1)'),('Oc1ccccc1','Sc1ccccc1')],test=True): correct_results = {0:{'default': (3,2), 'weak':(3,2), 'strong':(4,3)}, 1:{'default': (7,3), 'weak':(6,2), 'strong':(7,3)}, 2:{'default': (1,1), 'weak':(1,1), 'strong':(2,2)}} mapping = ['weak','default','strong'] for example in mapping: for index, (lig_a, lig_b) in enumerate(pairs_of_smiles): print(f"conducting {example} mapping with ligands {lig_a}, {lig_b}") initial_molecule = smiles_to_oemol(lig_a) proposed_molecule = smiles_to_oemol(lig_b) molecules = [Molecule.from_openeye(mol) for mol in [initial_molecule, proposed_molecule]] system_generator = SystemGenerator(forcefields = forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs,nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, small_molecule_forcefield = 'gaff-1.81', molecules=molecules, cache=None) proposal_engine = SmallMoleculeSetProposalEngine([initial_molecule, proposed_molecule], system_generator) initial_system, initial_positions, initial_topology = OEMol_to_omm_ff(initial_molecule, system_generator) print(f"running now with map strength {example}") proposal = proposal_engine.propose(initial_system, initial_topology, map_strength = example) print(lig_a, lig_b,'length OLD and NEW atoms',len(proposal.unique_old_atoms), len(proposal.unique_new_atoms)) if test: render_atom_mapping(f'{index}-{example}.png', initial_molecule, proposed_molecule, proposal._new_to_old_atom_map) assert ( (len(proposal.unique_old_atoms), len(proposal.unique_new_atoms)) == correct_results[index][example]), f"the mapping failed, correct results are {correct_results[index][example]}" print(f"the mapping worked!!!") print()
def test_small_molecule_proposals(): """ Make sure the small molecule proposal engine generates molecules """ list_of_smiles = ['CCCC','CCCCC','CCCCCC'] list_of_mols = [] for smi in list_of_smiles: mol = smiles_to_oemol(smi) list_of_mols.append(mol) molecules = [Molecule.from_openeye(mol) for mol in list_of_mols] stats_dict = defaultdict(lambda: 0) system_generator = SystemGenerator(forcefields = forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, small_molecule_forcefield = small_molecule_forcefield, molecules=molecules, cache=None) proposal_engine = topology_proposal.SmallMoleculeSetProposalEngine(list_of_mols, system_generator) initial_system, initial_positions, initial_topology, = OEMol_to_omm_ff(list_of_mols[0], system_generator) proposal = proposal_engine.propose(initial_system, initial_topology) for i in range(50): #positions are ignored here, and we don't want to run the geometry engine new_proposal = proposal_engine.propose(proposal.old_system, proposal.old_topology) stats_dict[new_proposal.new_chemical_state_key] += 1 #check that the molecule it generated is actually the smiles we expect matching_molecules = [res for res in proposal.new_topology.residues() if res.name=='MOL'] if len(matching_molecules) != 1: raise ValueError("More than one residue with the same name!") mol_res = matching_molecules[0] oemol = generateOEMolFromTopologyResidue(mol_res) smiles = SmallMoleculeSetProposalEngine.canonicalize_smiles(oechem.OEMolToSmiles(oemol)) assert smiles == proposal.new_chemical_state_key proposal = new_proposal
def molecule_from_record(record: MoleculeESPRecord) -> Molecule: """Converts an ``openff-recharge`` ESP record to to an Open Force Field molecule.""" oe_molecule = oechem.OEMol() oechem.OESmilesToMol(oe_molecule, record.tagged_smiles) ordered_conformer = reorder_conformer(oe_molecule, record.conformer) # Clear the records index map. for atom in oe_molecule.GetAtoms(): atom.SetMapIdx(0) oe_molecule.DeleteConfs() oe_molecule.NewConf(oechem.OEFloatArray(ordered_conformer.flatten())) with NamedTemporaryFile(suffix=".mol2") as file: # Workaround for stereochemistry being incorrectly perceived. molecule = Molecule.from_openeye(oe_molecule, allow_undefined_stereo=True) molecule.to_file(file.name, "mol2") molecule = molecule.from_file(file.name) return molecule
def min_ffxml(mol, ffxml): # make copy of the input mol oe_mol = oechem.OEGraphMol(mol) try: # create openforcefield molecule ==> prone to triggering Exception off_mol = Molecule.from_openeye(oe_mol) # load in force field ff = ForceField(ffxml) # create components for OpenMM system topology = Topology.from_molecules(molecules=[off_mol]) # create openmm system ==> prone to triggering Exception #system = ff.create_openmm_system(topology, charge_from_molecules=[off_mol]) system = ff.create_openmm_system(topology) except Exception as e: smilabel = oechem.OEGetSDData(oe_mol, "SMILES QCArchive") print( ' >>> openforcefield failed to create OpenMM system: ' f"'{oe_mol.GetTitle()}' '{smilabel}'") print(f"{e}\n") return print(" >>> successful OpenMM system creation for openforcefield " f"mol \"{oe_mol.GetTitle()}\"")
def process_molecule(smiles: str) -> Tuple[Optional[Molecule], Optional[str]]: error = None try: oe_molecule = smiles_to_molecule(smiles, guess_stereochemistry=True) # Generate a set of conformers and charges for the molecule. conformers = ConformerGenerator.generate(oe_molecule, ConformerSettings()) charges = ChargeGenerator.generate(oe_molecule, conformers, ChargeSettings()) # Add the charges and conformers to the OE object. for oe_atom in oe_molecule.GetAtoms(): oe_atom.SetPartialCharge(charges[oe_atom.GetIdx()].item()) oe_molecule.DeleteConfs() for conformer in conformers: oe_molecule.NewConf(oechem.OEFloatArray(conformer.flatten())) # Map to an OpenFF molecule object. molecule = Molecule.from_openeye(oe_molecule) # Compute the WBOs molecule.assign_fractional_bond_orders( "am1-wiberg", use_conformers=molecule.conformers) except (BaseException, Exception) as e: molecule = None error = f"Failed to process {smiles}: {str(e)}" return molecule, error
def data_generator(): for record_name in random.sample(list(ds_qc.data.records), 10): try: print(record_name, flush=True) r = ds_qc.get_record(record_name, specification='default') if r is not None: traj = r.get_trajectory() if traj is not None: for snapshot in traj: mol = snapshot.get_molecule() # mol = snapshot.get_molecule().dict(encoding='json') xyz = tf.convert_to_tensor( mol.geometry * BOHR_TO_NM, dtype=tf.float32) qm_force = tf.convert_to_tensor( snapshot.return_result\ * HARTREE_PER_BOHR_TO_KJ_PER_MOL_PER_NM, dtype=tf.float32) mol = cmiles.utils.load_molecule(mol.dict(encoding='json')) top = Topology.from_molecules(Molecule.from_openeye(mol)) sys = FF.create_openmm_system(top) yield( xyz, qm_force, sys) except: pass
def select_torsions(molecules_list_dict, molecule_attributes, forcefield, target_coverage=3): torsions_dict = {} smirks_torsions_counter = Counter() i_mol = 0 for mol_index, mol_attr in molecule_attributes.items(): central = [] print(f'{i_mol:<7d}: {mol_index}') i_mol += 1 mapped_smiles = mol_attr['canonical_isomeric_explicit_hydrogen_mapped_smiles'] # round trip from QCFractal molecule to OpenEye molecule then to Off Molecule # this is needed for now to ensure atom indeices are consistent qcjson_mol = molecules_list_dict[mol_index][0] oemol = cmiles.utils.load_molecule(qcjson_mol) bonds = [] for bond in oemol.GetBonds(): bonds.append((bond.GetBgnIdx(), bond.GetEndIdx())) bond_graph = BondGraph(bonds) rings = bond_graph.get_rings() d_rings = defaultdict(set) for i_ring, ring in enumerate(rings): for atom_idx in ring: d_rings[atom_idx].add(i_ring) off_mol = Off_Molecule.from_openeye(oemol, allow_undefined_stereo=True) torsions_coverage = smirnoff_analyze_torsions(forcefield, off_mol) for torsion_param, torsion_idx_list in torsions_coverage.items(): smirks = torsion_param.smirks for atom_indices in torsion_idx_list: if smirks_torsions_counter[smirks] < target_coverage and torsion_param.id in list_of_tids: i, j, k, l = atom_indices if d_rings[j] & d_rings[k]: pass elif set([j,k]) not in central: smirks_torsions_counter[smirks] += 1 canonical_torsion_index = cmiles.utils.to_canonical_label(mapped_smiles, atom_indices) torsions_dict[canonical_torsion_index] = { 'initial_molecules': molecules_list_dict[mol_index], 'atom_indices': [ atom_indices ], 'attributes': mol_attr, 'tid' : torsion_param.id } central.append(set([j,k])) print(f" - torsion {atom_indices} added for smirks {smirks}") elif smirks_torsions_counter[smirks] >= target_coverage and torsion_param.id in list_of_tids: print(f" - torsion {atom_indices} skipped because {smirks} have {smirks_torsions_counter[smirks]} already") print("\n## Selected Torsion Coverage ##\n" + '-'*90) ff_torsion_param_list = forcefield.get_parameter_handler('ProperTorsions').parameters n_covered = 0 for param in ff_torsion_param_list: if param.id in list_of_tids: count = smirks_torsions_counter[param.smirks] print(f"{param.id:5s}{param.smirks:80s} : {count:7d}") if count > 0: n_covered += 1 print('-'*90) print(f'{n_covered} / {len(list_of_tids)} torsion SMIRKs covered') return torsions_dict
def create_openmm_system(conversion, molecules): """ Create an OpenMM system using the input MOL2 file and force field file. """ molecule = Molecule.from_openeye(molecules[0]) topology = Topology.from_molecules([molecule]) ff = ForceField(conversion.ff) system = ff.create_openmm_system(topology) return topology, system
def gen_tid_molecules_list_of_interest(molecule_attributes, molecules_list_dict, forcefield, tid_list): # gen dictionary with keys, including all tids in the input forcefield ff_torsion_param_list = forcefield.get_parameter_handler( 'ProperTorsions').parameters ff_torsion_param_list_of_interest = [] tid_molecules_list = {} for torsion_param in ff_torsion_param_list: if torsion_param.id in tid_list: ff_torsion_param_list_of_interest.append(torsion_param) tid_molecules_list[torsion_param.id] = [] for idx, (mol_index, mol_attr) in enumerate(molecule_attributes.items()): mapped_smiles = mol_attr[ 'canonical_isomeric_explicit_hydrogen_mapped_smiles'] qcjson_mol = molecules_list_dict[mol_index][0] oemol = cmiles.utils.load_molecule(qcjson_mol) off_mol = Off_Molecule.from_openeye(oemol, allow_undefined_stereo=True) torsions_coverage, center_tids = smirnoff_analysis_torsions( forcefield, off_mol) filtered_torsions_coverage = filter_torsions_coverage( torsions_coverage, oemol) for tid, indices_list in filtered_torsions_coverage.items(): if tid in tid_list: for indices in indices_list: covered_tids = [] i, j, k, l = indices tids = center_tids[(j, k)] for i in tids: if i not in covered_tids: covered_tids.append(i) tid_molecules_list[tid].append({ 'mol_index': mol_index, 'indices': indices, 'covered_tids': covered_tids }) print("\n## Torsion parameter: matched molecules ##\n" + '-' * 90) print( f"{'idx':<7} {'ID':7s} {'SMIRKS Pattern':70s} {'Number of molecules matched'}" ) for idx, (tid, molecules_list) in enumerate(tid_molecules_list.items()): torsion_param = get_torsion_definition( ff_torsion_param_list_of_interest, tid) print( f'{idx:<7} {torsion_param.id:7s} {torsion_param.smirks:70s} {len(molecules_list)}' ) print('-' * 90) return tid_molecules_list
def generate_selected_torsions(input_json): """Identify torsions that can be driven. Parameters ---------- input_json: str, JSON file name to the output json of generate.py (prepared as if for an OptimizationDataset) The data in the json file should be a list of {'initial_molecules': [..], 'cmiles_identifiers':{}}. Returns ------- torsions_dict: dict Dictionary for selected torsions, has this structure: { canonical_torsion_index1: { 'initial_molecules': [ Molecule1a, Molecule1b, .. ], 'atom_indices': [ (0,1,2,3) ], 'attributes': {'canonical_explicit_hydrogen_smiles': .., 'canonical_isomeric_smiles': .., ..} }, .. } Note ---- The 'atom_indices' in return dict value is a list with only one item, because we select only 1-D torsion for now. """ molecule_data_list = read_molecules(input_json) # generate torsion_dict torsions_dict = {} ntorsions = 0 for mol_index, json_mol in enumerate(molecule_data_list): mapped_smiles = json_mol['cmiles_identifiers']['canonical_isomeric_explicit_hydrogen_mapped_smiles'] print(f'{mol_index} : {mapped_smiles}') # round trip from QCFractal molecule to OpenEye molecule then to Off Molecule # this is needed for now to ensure atom indices are consistent qcjson_mol = json_mol['initial_molecules'][0] oemol = cmiles.utils.load_molecule(qcjson_mol) off_mol = Molecule.from_openeye(oemol, allow_undefined_stereo=True) torsion_idx_list = enumerate_torsions(oemol) for atom_indices in torsion_idx_list: torsions_dict[ntorsions] = { 'initial_molecules': [ qcjson_mol ], 'atom_indices': [ atom_indices ], 'attributes': json_mol['cmiles_identifiers'], } print(f" - torsion {atom_indices} added") ntorsions += 1 print(f'{ntorsions} torsions added') return torsions_dict
def _convert_to_off(mol): import openforcefield if isinstance(mol, esp.Graph): return mol.mol elif isinstance(mol, openforcefield.topology.molecule.Molecule): return mol elif isinstance(mol, rdkit.Chem.rdchem.Mol): return Molecule.from_rdkit(mol) elif "openeye" in str( type(mol)): # because we don't want to depend on OE return Molecule.from_openeye(mol)
def smiles_to_svg(smiles: str, torsion_indices: (int, int), image_width: int = 200, image_height: int = 200) -> str: """Renders a 2D representation of a molecule based on its SMILES representation as an SVG string. Parameters ---------- smiles The SMILES pattern. torsion_indices The torsion indices for the molecule. image_width The width to make the final SVG. image_height The height to make the final SVG. Returns ------- The 2D SVG representation. """ # Parse the SMILES into an RDKit molecule smiles_parser = Chem.rdmolfiles.SmilesParserParams() smiles_parser.removeHs = False oe_conformed = False try: oe_molecule, status = smiles2oemol(smiles) openff_molecule = Molecule.from_openeye(oe_molecule) rdkit_molecule = openff_molecule.to_rdkit() oe_conformed = True except: rdkit_molecule = Chem.MolFromSmiles(smiles, smiles_parser) # Generate a set of 2D coordinates. Chem.rdDepictor.Compute2DCoords(rdkit_molecule) drawer = rdMolDraw2D.MolDraw2DSVG(image_width, image_height) torsion_bonds = [] if oe_conformed: for i in range(len(torsion_indices) - 1): if rdkit_molecule.GetBondBetweenAtoms(torsion_indices[i], torsion_indices[i+1]): torsion_bonds.append(rdkit_molecule.GetBondBetweenAtoms(torsion_indices[i], torsion_indices[i+1]).GetIdx()) rdMolDraw2D.PrepareAndDrawMolecule(drawer, rdkit_molecule, highlightBonds = torsion_bonds) drawer.FinishDrawing() svg_content = drawer.GetDrawingText() return svg_content
def test_OEMol_to_omm_ff(molecule=smiles_to_oemol('CC')): """ Generating openmm objects for simulation from an OEMol object Parameters ---------- molecule : openeye.oechem.OEMol Returns ------- system : openmm.System openmm system object positions : unit.quantity positions of the system topology : app.topology.Topology openmm compatible topology object """ import simtk.openmm.app as app import simtk.unit as unit from perses.utils.openeye import OEMol_to_omm_ff from simtk import openmm from openmmforcefields.generators import SystemGenerator from openforcefield.topology import Molecule #default arguments for SystemGenerators barostat = None forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield_kwargs = { 'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.NoCutoff, 'constraints': app.HBonds, 'hydrogenMass': 4 * unit.amus } small_molecule_forcefield = 'gaff-2.11' system_generator = SystemGenerator( forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, small_molecule_forcefield=small_molecule_forcefield, molecules=[Molecule.from_openeye(molecule)], cache=None) system, positions, topology = OEMol_to_omm_ff(molecule, system_generator) assert (type(system) == type(openmm.System()) ), "An openmm.System has not been generated from OEMol_to_omm_ff()" return system, positions, topology
def get_smirnoff_params(mol: oechem.OEMol) -> {"id": ["atom_indices"]}: """For the given molecule, finds the SMIRNOFF params and their atom indices""" off_mol = Molecule.from_openeye(mol, allow_undefined_stereo=True) try: topology = Topology.from_molecules(off_mol) except Exception as e: return {} molecule_force_list = utilize_params_util.SMIRNOFF.label_molecules(topology) params = defaultdict(list) for force_tag, force_dict in molecule_force_list[0].items(): for (atom_index, parameter) in force_dict.items(): params[parameter.id].append(atom_index) return params
def select_torsions(molecules_list_dict, molecule_attributes, forcefield, target_coverage=3): torsions_dict = {} smirks_torsions_counter = Counter() i_mol = 0 for mol_index, mol_attr in molecule_attributes.items(): print(f'{i_mol:<7d}: {mol_index}') i_mol += 1 mapped_smiles = mol_attr[ 'canonical_isomeric_explicit_hydrogen_mapped_smiles'] # round trip from QCFractal molecule to OpenEye molecule then to Off Molecule # this is needed for now to ensure atom indeices are consistent qcjson_mol = molecules_list_dict[mol_index][0] oemol = cmiles.utils.load_molecule(qcjson_mol) off_mol = Off_Molecule.from_openeye(oemol, allow_undefined_stereo=True) torsions_coverage = smirnoff_analyze_torsions(forcefield, off_mol) for smirks, torsion_idx_list in torsions_coverage.items(): for atom_indices in torsion_idx_list: if smirks_torsions_counter[smirks] < target_coverage: smirks_torsions_counter[smirks] += 1 canonical_torsion_index = cmiles.utils.to_canonical_label( mapped_smiles, atom_indices) torsions_dict[canonical_torsion_index] = { 'initial_molecules': molecules_list_dict[mol_index], 'atom_indices': [atom_indices], 'attributes': mol_attr, } print( f" - torsion {atom_indices} added for smirks {smirks}" ) else: print( f" - torsion {atom_indices} skipped because {smirks} have {smirks_torsions_counter[smirks]} already" ) print("\n## Selected Torsion Coverage ##\n" + '-' * 90) ff_torsion_param_list = forcefield.get_parameter_handler( 'ProperTorsions').parameters n_covered = 0 for param in ff_torsion_param_list: count = smirks_torsions_counter[param.smirks] print(f"{param.smirks:80s} : {count:7d}") if count > 0: n_covered += 1 print('-' * 90) print(f'{n_covered} / {len(ff_torsion_param_list)} torsion SMIRKs covered') return torsions_dict
def min_ffxml(mol, ofs, ffxml): """ Minimize the mol with force field input from FFXML file. Parameters ---------- mol : OpenEye single-conformer molecule ofs : OpenEye output filestream ffxml : string name of FFXML file """ # make copy of the input mol oe_mol = oechem.OEGraphMol(mol) try: # create openforcefield molecule ==> prone to triggering Exception off_mol = Molecule.from_openeye(oe_mol) # load in force field ff = ForceField(ffxml) # create components for OpenMM system topology = Topology.from_molecules(molecules=[off_mol]) # create openmm system ==> prone to triggering Exception #system = ff.create_openmm_system(topology, charge_from_molecules=[off_mol]) system = ff.create_openmm_system(topology) except Exception as e: smilabel = oechem.OEGetSDData(oe_mol, "SMILES QCArchive") print( ' >>> openforcefield failed to create OpenMM system: ' f'{oe_mol.GetTitle()} {smilabel}: {e}') return positions = structure.extractPositionsFromOEMol(oe_mol) # minimize structure with ffxml newpos, energy = run_openmm(topology, system, positions) # save geometry, save energy as tag, write mol to file oe_mol.SetCoords(oechem.OEFloatArray(newpos)) oechem.OESetSDData(oe_mol, "Energy FFXML", str(energy)) oechem.OEWriteConstMolecule(ofs, oe_mol) return
def calculate_mol_params(mol: oechem.OEMol) -> Dict[str, List[List[int]]]: """Calculates parameters of the given molecule. Returns a dict where the keys are parameter ids and the values are lists of indices where the parameter occurs (each entry in the list is itself a list because the parameter involves multiple atoms). """ oechem.OEAddExplicitHydrogens(mol) off_mol = Molecule.from_openeye(mol, allow_undefined_stereo=True) topology = Topology.from_molecules(off_mol) molecule_force_list = FORCE_FIELD.label_molecules(topology) params = defaultdict(list) for _, force_dict in molecule_force_list[0].items(): for (atom_indices, parameter) in force_dict.items(): params[parameter.id].append(atom_indices) return params
def __init__(self, config_: Config): self.config = config_ self.logger = make_message_writer(self.config.verbose, self.__class__.__name__) with self.logger("__init__") as logger: self.boxvec = None self.explicit = self.config.explicit self.system = None ofs = oechem.oemolistream(self.config.ligand_file_name) oemol = oechem.OEMol() oechem.OEReadMolecule(ofs, oemol) ofs.close() self.inital_ligand_smiles = oechem.OEMolToSmiles(oemol) self.params_written = 0 self.mol = Molecule.from_openeye(oemol, allow_undefined_stereo=True) fixer = PDBFixer(self.config.pdb_file_name) if self.config.use_pdbfixer: logger.log("Fixing with PDBFixer") fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keepWater=False) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) logger.log("Found missing residues: ", fixer.missingResidues) logger.log("Found missing terminals residues: ", fixer.missingTerminals) logger.log("Found missing atoms:", fixer.missingAtoms) logger.log("Found nonstandard residues:", fixer.nonstandardResidues) self.config.pdb_file_name = f"{self.config.tempdir(main_context=True)}/inital_fixed.pdb" with open(self.config.pdb_file_name, 'w') as f: app.PDBFile.writeFile(fixer.topology, fixer.positions, f) cmd.reinitialize() cmd.load(self.config.pdb_file_name) cmd.load(self.config.ligand_file_name, "UNL") cmd.alter("UNL", "resn='UNL'") cmd.save("{}".format(self.config.pdb_file_name))
def system_generator_wrapper( oemols, barostat=None, forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'], forcefield_kwargs={ 'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.NoCutoff, 'constraints': app.HBonds, 'hydrogenMass': 4 * unit.amus }, small_molecule_forcefield='gaff-2.11', **kwargs): """ make a system generator (vacuum) for a small molecule Arguments --------- oemols : list of openeye.oechem.OEMol oemols barostat : openmm.MonteCarloBarostat, default None barostat forcefield_files : list of str pointers to protein forcefields and solvent forcefield_kwargs : dict dict of forcefield_kwargs small_molecule_forcefield : str pointer to small molecule forcefield to use Returns ------- system_generator : openmmforcefields.generators.SystemGenerator """ from openforcefield.topology import Molecule from openmmforcefields.generators import SystemGenerator system_generator = SystemGenerator( forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, small_molecule_forcefield=small_molecule_forcefield, molecules=[Molecule.from_openeye(oemol) for oemol in oemols], cache=None) return system_generator
def test_merge_system(): """Test merging of a system created from AMBER and another created from SMIRNOFF.""" from .utils import create_system_from_amber, get_amber_file_path, get_alkethoh_file_path # Create System from AMBER prmtop_filename, inpcrd_filename = get_amber_file_path( 'cyclohexane_ethanol_0.4_0.6') system0, topology0, positions0 = create_system_from_amber( prmtop_filename, inpcrd_filename) # TODO: from openeye import oechem # Load simple OEMol alkethoh_mol2_filepath = get_alkethoh_file_path('AlkEthOH_c100')[0] ifs = oechem.oemolistream(alkethoh_mol2_filepath) mol = oechem.OEMol() flavor = oechem.OEIFlavor_Generic_Default | oechem.OEIFlavor_MOL2_Default | oechem.OEIFlavor_MOL2_Forcefield ifs.SetFlavor(oechem.OEFormat_MOL2, flavor) oechem.OEReadMolecule(ifs, mol) oechem.OETriposAtomNames(mol) # Load forcefield file. AlkEthOH_offxml_filename = utils.get_data_file_path( 'test_forcefields/Frosst_AlkEthOH.offxml') forcefield = ForceField(AlkEthOH_offxml_filename) # Create OpenMM System and Topology. off_mol = Molecule.from_openeye(mol, allow_undefined_stereo=True) off_top = Topology.from_molecules([off_mol]) system1 = forcefield.create_openmm_system(off_top) topology1 = structure.generateTopologyFromOEMol(mol) positions1 = structure.extractPositionsFromOEMol(mol) structure.merge_system(topology0, topology1, system0, system1, positions0, positions1, verbose=True)
def generateSMIRNOFFStructure(oemol): """ Given an OpenEye molecule (oechem.OEMol), create an OpenMM System and use to generate a ParmEd structure using the SMIRNOFF forcefield parameters. Parameters ---------- oemol : openeye.oechem.OEMol OpenEye molecule Returns ------- molecule_structure : parmed.Structure The resulting Structure """ warnings.warn(DEPRECATION_WARNING_TEXT, PendingDeprecationWarning) from openforcefield.topology import Molecule, Topology from openforcefield.typing.engines.smirnoff import ForceField off_mol = Molecule.from_openeye(oemol) off_top = Topology.from_molecules([off_mol]) mol_ff = ForceField('test_forcefields/smirnoff99Frosst.offxml') # Create OpenMM System and Topology. omm_top = generateTopologyFromOEMol(oemol) # If it's a nonperiodic box, then we can't use default (PME) settings if omm_top.getPeriodicBoxVectors() is None: mol_ff.get_parameter_handler("Electrostatics", {})._method = 'Coulomb' system = mol_ff.create_openmm_system(off_top) # Convert to ParmEd structure. import parmed xyz = extractPositionsFromOEMol(oemol) molecule_structure = parmed.openmm.load_topology(omm_top, system, xyz=xyz) return molecule_structure
def _openeye_parameteriser(cls, mol, **kwargs): """ Creates a parameterised system from openeye molecule Parameters ---------- mol : oechem.OEMol """ try: forcefield = ForceField('test_forcefields/smirnoff99Frosst.offxml') molecule = Molecule.from_openeye( mol, allow_undefined_stereo=cls.allow_undefined_stereo) from openforcefield.utils.toolkits import OpenEyeToolkitWrapper molecule.compute_partial_charges_am1bcc( toolkit_registry=OpenEyeToolkitWrapper()) topology = Topology.from_molecules(molecule) openmm_system = forcefield.create_openmm_system( topology, charge_from_molecules=[molecule]) ligand_pmd = parmed.openmm.topsystem.load_topology( topology.to_openmm(), openmm_system, molecule._conformers[0]) except Exception as e: raise ValueError("Parameterisation Failed : {}".format(e)) #TODO ligand_pmd.title = cls.smiles for i in ligand_pmd.residues: i.name = 'LIG' tmp_dir = tempfile.mkdtemp() # We need all molecules as both pdb files (as packmol input) # and mdtraj.Trajectory for restoring bonds later. pdb_filename = tempfile.mktemp(suffix=".pdb", dir=tmp_dir) from openeye import oechem # OpenEye Python toolkits oechem.OEWriteMolecule(oechem.oemolostream(pdb_filename), mol) cls.pdb_filename = pdb_filename cls.ligand_pmd = ligand_pmd
def generate_testsystem(smiles = 'CCCC', forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'], forcefield_kwargs = {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : None, 'hydrogenMass' : 4 * unit.amus}, nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff}, periodic_forcefield_kwargs = {'nonbondedMethod': app.PME}, small_molecule_forcefield = 'gaff-2.11', padding=9*unit.angstroms, ionicStrength=0.0*unit.molar, water_model = 'tip3p', pressure = 1.0 * unit.atmosphere, temperature = 300 * unit.kelvin, barostat_period = 50, **kwargs ): """ internal small molecule testsystem generator arguments smiles : str, default 'CCCC' smiles string of the small molecule forcefield_files = list, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield file names forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : None, 'hydrogenMass' : 4 * unit.amus} forcefield kwargs nonperiodic_forcefield_kwargs : dict, default {'nonbondedMethod': app.NoCutoff} dict of nonperiodic forcefield kwargs small_molecule_forcefield : str, default 'gaff-2.11' small molecule forcefield to parameterize smiles padding : simtk.unit.Quantity (compatible with unit.angstroms),default 9.0 * unit.angstroms solvent padding ionicStrength : simtk.unit.Quantity, default 0.0*unit.molar ionic strength of solvent water_model : str, default 'tip3p' water model for solvation pressure : simtk.unit.Quantity, default 1.0 * unit.atmosphere pressure of the barostat temperature : simtk.unit.Quantity, default 300 * unit.kelvin temperature of barostat barostat_period : int, default 50 integer of the barostat period returns vac_sys_pos_top : tuple tuple of the vacuum openmm.System, unit.Quantity(unit.nanometers), openmm.Topology sol_sys_pos_top : tuple tuple of the solvent openmm.System, unit.Quantity(unit.nanometers), openmm.Topology """ from openforcefield.topology import Molecule from perses.utils.openeye import smiles_to_oemol from openmmforcefields.generators.system_generators import SystemGenerator from perses.utils.openeye import OEMol_to_omm_ff from simtk import openmm from qmlify.utils import pull_force_by_name oemol = smiles_to_oemol(smiles) off_molecules = [Molecule.from_openeye(oemol)] vac_system_generator = SystemGenerator(forcefields=forcefield_files, small_molecule_forcefield=small_molecule_forcefield, forcefield_kwargs=forcefield_kwargs, nonperiodic_forcefield_kwargs = nonperiodic_forcefield_kwargs, molecules = off_molecules) barostat = openmm.MonteCarloBarostat(pressure, temperature, barostat_period) sol_system_generator = SystemGenerator(forcefields=forcefield_files, small_molecule_forcefield=small_molecule_forcefield, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs = periodic_forcefield_kwargs, molecules = off_molecules, barostat = barostat) vac_system, vac_positions, vac_topology = OEMol_to_omm_ff(oemol, vac_system_generator) #now i can attempt to solvate modeller = app.Modeller(vac_topology, vac_positions) modeller.addSolvent(sol_system_generator.forcefield, model=water_model, padding=padding, ionicStrength=ionicStrength) sol_positions, sol_topology = modeller.getPositions(), modeller.getTopology() sol_positions = unit.quantity.Quantity(value = np.array([list(atom_pos) for atom_pos in sol_positions.value_in_unit_system(unit.md_unit_system)]), unit = unit.nanometers) sol_system = sol_system_generator.create_system(sol_topology) vac_sys_pos_top = (vac_system, vac_positions, vac_topology) sol_sys_pos_top = (sol_system, sol_positions, sol_topology) #a quick assertion to make sure the nonbonded forces are being treated properly vac_nbf, sol_nbf = pull_force_by_name(vac_system, 'NonbondedForce'), pull_force_by_name(sol_system, 'NonbondedForce') assert not vac_nbf.usesPeriodicBoundaryConditions() assert sol_nbf.usesPeriodicBoundaryConditions() return vac_sys_pos_top, sol_sys_pos_top
def find_smirks_parameters(smiles_list, molecule_paths): """Finds the force field parameters which would be assigned to a list of molecules defined by the provided SMILES patterns. Parameters ---------- smiles_list: list of str The SMILES patterns of the target molecules molecule_paths: list of Path The list of molecules that correspond to the SMILES strings (to make it easier to see which molecules utilize which parameters) Returns ------- dict of str and list of str A dictionary with keys of SMIRKS patterns, and values of lists of SMILES patterns which would utilize those patterns, and the parameter ID in the force field. """ force_field = smirnoff.ForceField('smirnoff99Frosst-1.0.9.offxml') smiles_by_smirks = {} smiles_by_smirks["Bonds"] = {} smiles_by_smirks["Angles"] = {} smiles_by_smirks["ProperTorsions"] = {} smiles_by_smirks["vdW"] = {} smiles_by_smirks["ImproperTorsions"] = {} smiles_by_smirks["Electrostatics"] = {} # Populate the dictionary using the open force field toolkit. for index, smiles in enumerate(smiles_list): ifs = oechem.oemolistream() if not ifs.open(str(molecule_paths[index])): logging.error( f'Unable to open {molecule_paths[index]} for reading...') ifs.open(str(molecule_paths[index])) oe_mols = [] for mol in ifs.GetOEMols(): oe_mols.append(oechem.OEMol(mol)) oechem.OE3DToAtomStereo(oe_mols[0]) molecule = Molecule.from_openeye(oe_mols[0]) # molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True) topology = Topology.from_molecules([molecule]) molecule_force_list = force_field.label_molecules(topology) for molecule_index, molecule_forces in enumerate(molecule_force_list): print(f'Forces for molecule {molecule_index}') for force_name, force_dict in molecule_forces.items(): print(f"\n{force_name}:") for (atom_indices, parameter) in force_dict.items(): atomstr = '' for idx in atom_indices: atomstr += '%5s' % idx print("atoms: %s parameter_id: %s smirks %s" % ([ oe_mols[0].GetAtom(oechem.OEHasAtomIdx(i)).GetName() for i in atom_indices ], parameter.id, parameter.smirks)) # This is not catching _all_ the atoms that hit a certain parameter. # I think these need to be initialized in the outer loop. # Each parameter is getting a list of length 1. if parameter.id not in smiles_by_smirks[force_name]: smiles_by_smirks[force_name][parameter.id] = {} if "atom_indices" not in smiles_by_smirks[force_name]: smiles_by_smirks[force_name][ parameter.id]["atom_indices"] = [] if "atom_names" not in smiles_by_smirks[force_name]: smiles_by_smirks[force_name][ parameter.id]["atom_names"] = [] smiles_by_smirks[force_name][ parameter.id]["atom_indices"].append(atom_indices) smiles_by_smirks[force_name][ parameter.id]["atom_names"].append([ oe_mols[0].GetAtom( oechem.OEHasAtomIdx(i)).GetName() for i in atom_indices ]) smiles_by_smirks[force_name][ parameter.id]["smirks"] = parameter.smirks return smiles_by_smirks
def get_parameters(mols_dict, ffxml): """ For a group of structures, call the Open Force Field function get_molecule_parameterIDs to identify parameter assignment, grouped by molecule and grouped by parameter. Parameters ---------- mols_dict : dict of dicts the first level key is the SMILES string and the value of that key is a dict with the following key/value pairs-- metric geometric measurement structure OEGraphMol of the structure ffxml : string name of FFXML force field file Returns ------- parameters_by_molecule : dict key is isosmiles generated by Open Force Field internal code; value is a list of parameter IDs associated with this molecule parameters_by_ID : dict key is parameter ID; value is a list of isosmiles for all the molecules that have this ID smi_dict : dict key is isosmiles; value is the molecular identifier from the input SDF file """ # load in force field ff = ForceField(ffxml) # convert OEMols to open force field molecules off_mols = [] smi_dict = {} for i, key in enumerate(mols_dict): # get mol from the dict mymol = mols_dict[key]['structure'] # create openforcefield molecule from OEMol # note: stereo error raised even though coordinates present (todo?) off_mol = Molecule.from_openeye(mymol, allow_undefined_stereo=True) off_mols.append(off_mol) # form a dictionary to backtrace the iso_smiles to original molecule smi_dict[off_mol.to_smiles()] = key # remove duplicate molecules (else get_molecule_parameterIDs gives err) iso_smiles = [molecule.to_smiles() for molecule in off_mols] idx_of_duplicates = [ idx for idx, item in enumerate(iso_smiles) if item in iso_smiles[:idx] ] for index in sorted(idx_of_duplicates, reverse=True): del off_mols[index] # create dictionaries describing parameter assignment, # grouped both by molecule and by parameter parameters_by_molecule, parameters_by_ID = get_molecule_parameterIDs( off_mols, ff) return parameters_by_molecule, parameters_by_ID, smi_dict
def prepare_simulation(molecule, basedir, save_openmm=False): """ Prepare simulation systems Parameters ---------- molecule : openeye.oechem.OEMol The molecule to set up basedir : str The base directory for docking/ and fah/ directories save_openmm : bool, optional, default=False If True, save gzipped OpenMM System, State, Integrator """ # Parameters from simtk import unit, openmm water_model = 'tip3p' solvent_padding = 10.0 * unit.angstrom box_size = openmm.vec3.Vec3(3.4,3.4,3.4)*unit.nanometers ionic_strength = 100 * unit.millimolar # 100 pressure = 1.0 * unit.atmospheres collision_rate = 1.0 / unit.picoseconds temperature = 300.0 * unit.kelvin timestep = 4.0 * unit.femtoseconds nsteps_per_iteration = 250 iterations = 10000 # 10 ns (covalent score) protein_forcefield = 'amber14/protein.ff14SB.xml' small_molecule_forcefield = 'openff-1.1.0' #small_molecule_forcefield = 'gaff-2.11' # only if you really like atomtypes solvation_forcefield = 'amber14/tip3p.xml' # Create SystemGenerators import os from simtk.openmm import app from openforcefield.topology import Molecule off_molecule = Molecule.from_openeye(molecule, allow_undefined_stereo=True) print(off_molecule) barostat = openmm.MonteCarloBarostat(pressure, temperature) # docking directory docking_basedir = os.path.join(basedir, 'docking') # gromacs directory gromacs_basedir = os.path.join(basedir, 'gromacs') os.makedirs(gromacs_basedir, exist_ok=True) # openmm directory openmm_basedir = os.path.join(basedir, 'openmm') os.makedirs(openmm_basedir, exist_ok=True) # Cache directory cache = os.path.join(openmm_basedir, f'{molecule.GetTitle()}.json') common_kwargs = {'removeCMMotion': False, 'ewaldErrorTolerance': 5e-04, 'nonbondedMethod': app.PME, 'hydrogenMass': 3.0*unit.amu} unconstrained_kwargs = {'constraints': None, 'rigidWater': False} constrained_kwargs = {'constraints': app.HBonds, 'rigidWater': True} forcefields = [protein_forcefield, solvation_forcefield] from openmmforcefields.generators import SystemGenerator parmed_system_generator = SystemGenerator(forcefields=forcefields, molecules=[off_molecule], small_molecule_forcefield=small_molecule_forcefield, cache=cache, barostat=barostat, forcefield_kwargs={**common_kwargs, **unconstrained_kwargs}) openmm_system_generator = SystemGenerator(forcefields=forcefields, molecules=[off_molecule], small_molecule_forcefield=small_molecule_forcefield, cache=cache, barostat=barostat, forcefield_kwargs={**common_kwargs, **constrained_kwargs}) # Prepare phases import os print(f'Setting up simulation for {molecule.GetTitle()}...') for phase in ['complex', 'ligand']: phase_name = f'{molecule.GetTitle()} - {phase}' print(phase_name) pdb_filename = os.path.join(docking_basedir, phase_name + '.pdb') gro_filename = os.path.join(gromacs_basedir, phase_name + '.gro') top_filename = os.path.join(gromacs_basedir, phase_name + '.top') system_xml_filename = os.path.join(openmm_basedir, phase_name+'.system.xml.gz') integrator_xml_filename = os.path.join(openmm_basedir, phase_name+'.integrator.xml.gz') state_xml_filename = os.path.join(openmm_basedir, phase_name+'.state.xml.gz') # Check if we can skip setup gromacs_files_exist = os.path.exists(gro_filename) and os.path.exists(top_filename) openmm_files_exist = os.path.exists(system_xml_filename) and os.path.exists(state_xml_filename) and os.path.exists(integrator_xml_filename) if gromacs_files_exist and (not save_openmm or openmm_files_exist): continue # Filter out UNK atoms by spruce with open(pdb_filename, 'r') as infile: lines = [ line for line in infile if 'UNK' not in line ] from io import StringIO pdbfile_stringio = StringIO(''.join(lines)) # Read the unsolvated system into an OpenMM Topology pdbfile = app.PDBFile(pdbfile_stringio) topology, positions = pdbfile.topology, pdbfile.positions # Add solvent print('Adding solvent...') modeller = app.Modeller(topology, positions) if phase == 'ligand': kwargs = {'boxSize' : box_size} else: kwargs = {'padding' : solvent_padding} modeller.addSolvent(openmm_system_generator.forcefield, model='tip3p', ionicStrength=ionic_strength, **kwargs) # Create an OpenMM system system = openmm_system_generator.create_system(modeller.topology) # If monitoring covalent distance, add an unused force warheads_found = find_warheads(molecule) covalent = (len(warheads_found) > 0) if covalent and phase=='complex': # Find warhead atom indices sulfur_atom_index = None for atom in topology.atoms(): if (atom.residue.name == 'CYS') and (atom.residue.id == '145') and (atom.name == 'SG'): sulfur_atom_index = atom.index break if sulfur_atom_index is None: raise Exception('CYS145 SG atom cannot be found') print('Adding CustomCVForces...') custom_cv_force = openmm.CustomCVForce('0') for warhead_type, warhead_atom_index in warheads_found.items(): distance_force = openmm.CustomBondForce('r') distance_force.setUsesPeriodicBoundaryConditions(True) distance_force.addBond(sulfur_atom_index, warhead_atom_index, []) custom_cv_force.addCollectiveVariable(warhead_type, distance_force) force_index = system.addForce(custom_cv_force) # Create OpenM Context platform = openmm.Platform.getPlatformByName('CUDA') platform.setPropertyDefaultValue('Precision', 'mixed') integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep) context = openmm.Context(system, integrator, platform) context.setPositions(modeller.positions) # Report initial potential energy state = context.getState(getEnergy=True) print(f'{molecule.GetTitle()} {phase} : Initial potential energy is {state.getPotentialEnergy()/unit.kilocalories_per_mole:.3f} kcal/mol') # Minimize print('Minimizing...') openmm.LocalEnergyMinimizer.minimize(context) # Equilibrate print('Equilibrating...') from tqdm import tqdm import numpy as np distances = np.zeros([iterations], np.float32) for iteration in tqdm(range(iterations)): integrator.step(nsteps_per_iteration) if covalent and phase=='complex': # Get distance in Angstroms distances[iteration] = min(custom_cv_force.getCollectiveVariableValues(context)[:]) * 10 # Retrieve state state = context.getState(getPositions=True, getVelocities=True, getEnergy=True, getForces=True) system.setDefaultPeriodicBoxVectors(*state.getPeriodicBoxVectors()) modeller.topology.setPeriodicBoxVectors(state.getPeriodicBoxVectors()) print(f'{molecule.GetTitle()} {phase} : Final potential energy is {state.getPotentialEnergy()/unit.kilocalories_per_mole:.3f} kcal/mol') # Remove CustomCVForce if covalent and phase=='complex': print('Removing CustomCVForce...') system.removeForce(force_index) from pymbar.timeseries import detectEquilibration t0, g, Neff = detectEquilibration(distances) distances = distances[t0:] distance_min = distances.min() distance_mean = distances.mean() distance_stddev = distances.std() oechem.OESetSDData(molecule, 'covalent_distance_min', str(distance_min)) oechem.OESetSDData(molecule, 'covalent_distance_mean', str(distance_mean)) oechem.OESetSDData(molecule, 'covalent_distance_stddev', str(distance_stddev)) print(f'Covalent distance: mean {distance_mean:.3f} A : stddev {distance_stddev:.3f} A') # Save as OpenMM if save_openmm: print('Saving as OpenMM...') import gzip with gzip.open(integrator_xml_filename, 'wt') as f: f.write(openmm.XmlSerializer.serialize(integrator)) with gzip.open(state_xml_filename,'wt') as f: f.write(openmm.XmlSerializer.serialize(state)) with gzip.open(system_xml_filename,'wt') as f: f.write(openmm.XmlSerializer.serialize(system)) with gzip.open(os.path.join(openmm_basedir, phase_name+'-explicit.pdb.gz'), 'wt') as f: app.PDBFile.writeFile(modeller.topology, state.getPositions(), f) with gzip.open(os.path.join(openmm_basedir, phase_name+'-solute.pdb.gz'), 'wt') as f: import mdtraj mdtraj_topology = mdtraj.Topology.from_openmm(modeller.topology) mdtraj_trajectory = mdtraj.Trajectory([state.getPositions(asNumpy=True) / unit.nanometers], mdtraj_topology) selection = mdtraj_topology.select('not water') mdtraj_trajectory = mdtraj_trajectory.atom_slice(selection) app.PDBFile.writeFile(mdtraj_trajectory.topology.to_openmm(), mdtraj_trajectory.openmm_positions(0), f) # Convert to gromacs via ParmEd print('Saving as gromacs...') import parmed parmed_system = parmed_system_generator.create_system(modeller.topology) #parmed_system.setDefaultPeriodicBoxVectors(*state.getPeriodicBoxVectors()) structure = parmed.openmm.load_topology(modeller.topology, parmed_system, xyz=state.getPositions(asNumpy=True)) structure.save(gro_filename, overwrite=True) structure.save(top_filename, overwrite=True)
def compare_energies(mol_name="naphthalene", ref_mol_name="benzene", atom_expression=['Hybridization'], bond_expression=['Hybridization']): """ Make an atom map where the molecule at either lambda endpoint is identical, and check that the energies are also the same. """ from openmmtools.constants import kB from openmmtools import alchemy, states from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine from perses.annihilation.relative import HybridTopologyFactory from perses.rjmc.geometry import FFAllAngleGeometryEngine import simtk.openmm as openmm from perses.utils.openeye import iupac_to_oemol, extractPositionsFromOEMol, generate_conformers from perses.utils.openeye import generate_expression from openmmforcefields.generators import SystemGenerator from openmoltools.forcefield_generators import generateTopologyFromOEMol from perses.tests.utils import validate_endstate_energies temperature = 300 * unit.kelvin # Compute kT and inverse temperature. kT = kB * temperature beta = 1.0 / kT ENERGY_THRESHOLD = 1e-6 atom_expr, bond_expr = generate_expression( atom_expression), generate_expression(bond_expression) mol = iupac_to_oemol(mol_name) mol = generate_conformers(mol, max_confs=1) refmol = iupac_to_oemol(ref_mol_name) refmol = generate_conformers(refmol, max_confs=1) from openforcefield.topology import Molecule molecules = [Molecule.from_openeye(oemol) for oemol in [refmol, mol]] barostat = None forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield_kwargs = { 'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.NoCutoff, 'constraints': app.HBonds, 'hydrogenMass': 4 * unit.amus } system_generator = SystemGenerator(forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, small_molecule_forcefield='gaff-2.11', molecules=molecules, cache=None) topology = generateTopologyFromOEMol(refmol) system = system_generator.create_system(topology) positions = extractPositionsFromOEMol(refmol) proposal_engine = SmallMoleculeSetProposalEngine([refmol, mol], system_generator) proposal = proposal_engine.propose(system, topology, atom_expr=atom_expr, bond_expr=bond_expr) geometry_engine = FFAllAngleGeometryEngine() new_positions, _ = geometry_engine.propose( proposal, positions, beta=beta, validate_energy_bookkeeping=False) _ = geometry_engine.logp_reverse(proposal, new_positions, positions, beta) #make a topology proposal with the appropriate data: factory = HybridTopologyFactory(proposal, positions, new_positions) if not proposal.unique_new_atoms: assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})" assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})" vacuum_added_valence_energy = 0.0 else: added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential if not proposal.unique_old_atoms: assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})" assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})" subtracted_valence_energy = 0.0 else: subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential zero_state_error, one_state_error = validate_endstate_energies( factory._topology_proposal, factory, added_valence_energy, subtracted_valence_energy, beta=1.0 / (kB * temperature), ENERGY_THRESHOLD=ENERGY_THRESHOLD, platform=openmm.Platform.getPlatformByName('Reference')) return factory
def data_generator(): for mol in mols: try: mol = Molecule.from_openeye(mol) topology = Topology.from_molecules(mol) mol_sys = FF.create_openmm_system(topology) n_atoms = topology.n_topology_atoms atoms = tf.convert_to_tensor( [TRANSLATION[atom._atomic_number] for atom in mol.atoms], dtype=tf.float32) adjacency_map = np.zeros((n_atoms, n_atoms), dtype=np.float32) for bond in mol.bonds: assert bond.atom1_index < bond.atom2_index adjacency_map[bond.atom1_index, bond.atom2_index] = \ bond.bond_order adjacency_map = tf.convert_to_tensor( adjacency_map, dtype=tf.float32) top = Topology.from_molecules(mol) sys = FF.create_openmm_system(top) angles = tf.convert_to_tensor( [[x[0], x[1], x[2], (x[3]._value - 1.965) / 0.237, (x[4]._value - 507.28) / 396.80] for x in\ [sys.getForces( )[0].getAngleParameters(idx)\ for idx in range(sys.getForces( )[0].getNumAngles())]], dtype=tf.float32) bonds = tf.convert_to_tensor([[x[0], x[1], (x[2]._value - 0.126) / 0.0212, (x[3]._value - 274856) / 12213.203] for x in\ [sys.getForces( )[1].getBondParameters(idx)\ for idx in range(sys.getForces( )[1].getNumBonds())]], dtype=tf.float32) torsions = tf.convert_to_tensor([ [x[0], x[1], x[2], x[3], x[4], x[5]._value, x[6]._value] for x in\ [sys.getForces( )[3].getTorsionParameters(idx)\ for idx in range(sys.getForces( )[3].getNumTorsions())]], dtype=tf.float32) particle_params = tf.convert_to_tensor([[ (x[0]._value - 0.00195) / 0.269, (x[1]._value - 0.276) / 0.0654, (x[2]._value - 0.280) / 0.284 ] for x in\ [sys.getForces( )[2].getParticleParameters(idx)\ for idx in range(sys.getForces( )[2].getNumParticles())]]) yield atoms, adjacency_map, angles, bonds, torsions, particle_params except: pass
def setup_fah_run(destination_path, protein_pdb_filename, oemol=None, cache=None, restrain_rmsd=False): """ Prepare simulation Parameters ---------- destination_path : str The path to the RUN to be created protein_pdb_filename : str Path to protein PDB file oemol : openeye.oechem.OEMol, optional, default=None The molecule to parameterize, with SDData attached If None, don't include the small molecule restrain_rmsd : bool, optional, default=False If True, restrain RMSD during first equilibration phase """ # Parameters from simtk import unit, openmm protein_forcefield = 'amber14/protein.ff14SB.xml' solvent_forcefield = 'amber14/tip3p.xml' small_molecule_forcefield = 'openff-1.2.0' water_model = 'tip3p' solvent_padding = 10.0 * unit.angstrom ionic_strength = 70 * unit.millimolar # assay buffer: 20 mM HEPES pH 7.3, 1 mM TCEP, 50 mM NaCl, 0.01% Tween-20, 10% glycerol pressure = 1.0 * unit.atmospheres collision_rate = 1.0 / unit.picoseconds temperature = 300.0 * unit.kelvin timestep = 4.0 * unit.femtoseconds iterations = 1000 # 1 ns equilibration nsteps_per_iteration = 250 # Prepare phases import os system_xml_filename = os.path.join(destination_path, 'system.xml.bz2') integrator_xml_filename = os.path.join(destination_path, 'integrator.xml.bz2') state_xml_filename = os.path.join(destination_path, 'state.xml.bz2') # Check if we can skip setup openmm_files_exist = os.path.exists( system_xml_filename) and os.path.exists( state_xml_filename) and os.path.exists(integrator_xml_filename) if openmm_files_exist: return # Create barostat barostat = openmm.MonteCarloBarostat(pressure, temperature) # Create RUN directory if it does not yet exist os.makedirs(destination_path, exist_ok=True) # Load any molecule(s) molecule = None if oemol is not None: from openforcefield.topology import Molecule molecule = Molecule.from_openeye(oemol, allow_undefined_stereo=True) molecule.name = 'MOL' # Ensure residue is MOL print([res for res in molecule.to_topology().to_openmm().residues()]) # Create SystemGenerator import os from simtk.openmm import app forcefield_kwargs = { 'removeCMMotion': False, 'hydrogenMass': 3.0 * unit.amu, 'constraints': app.HBonds, 'rigidWater': True } periodic_kwargs = { 'nonbondedMethod': app.PME, 'ewaldErrorTolerance': 2.5e-04 } forcefields = [protein_forcefield, solvent_forcefield] from openmmforcefields.generators import SystemGenerator openmm_system_generator = SystemGenerator( forcefields=forcefields, molecules=molecule, small_molecule_forcefield=small_molecule_forcefield, cache=cache, barostat=barostat, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs=periodic_kwargs) # Read protein print(f'Reading protein from {protein_pdb_filename}...') pdbfile = app.PDBFile(protein_pdb_filename) modeller = app.Modeller(pdbfile.topology, pdbfile.positions) if oemol is not None: # Add small molecule to the system modeller.add(molecule.to_topology().to_openmm(), molecule.conformers[0]) # DEBUG : Check residue name with open(os.path.join(destination_path, 'initial-complex.pdb'), 'wt') as outfile: app.PDBFile.writeFile(modeller.topology, modeller.positions, outfile) # Add solvent print('Adding solvent...') kwargs = {'padding': solvent_padding} modeller.addSolvent(openmm_system_generator.forcefield, model='tip3p', ionicStrength=ionic_strength, **kwargs) # Create an OpenMM system print('Creating OpenMM system...') system = openmm_system_generator.create_system(modeller.topology) # Add a virtual bond between protein and ligand to make sure they are not imaged separately if oemol is not None: import mdtraj as md mdtop = md.Topology.from_openmm( modeller.topology) # excludes solvent and ions for res in mdtop.residues: print(res) protein_atom_indices = mdtop.select( '(protein and name CA)') # protein CA atoms ligand_atom_indices = mdtop.select( '((resname MOL) and (mass > 1))') # ligand heavy atoms protein_atom_index = int(protein_atom_indices[0]) ligand_atom_index = int(ligand_atom_indices[0]) force = openmm.CustomBondForce('0') force.addBond(protein_atom_index, ligand_atom_index, []) system.addForce(force) # Add RMSD restraints if requested if restrain_rmsd: print('Adding RMSD restraint...') kB = unit.AVOGADRO_CONSTANT_NA * unit.BOLTZMANN_CONSTANT_kB kT = kB * temperature import mdtraj as md mdtop = md.Topology.from_openmm( pdbfile.topology) # excludes solvent and ions #heavy_atom_indices = mdtop.select('mass > 1') # heavy solute atoms rmsd_atom_indices = mdtop.select( '(protein and (name CA)) or ((resname MOL) and (mass > 1))' ) # CA atoms and ligand heavy atoms rmsd_atom_indices = [int(index) for index in rmsd_atom_indices] custom_cv_force = openmm.CustomCVForce('(K_RMSD/2)*RMSD^2') custom_cv_force.addGlobalParameter('K_RMSD', kT / unit.angstrom**2) rmsd_force = openmm.RMSDForce(modeller.positions, rmsd_atom_indices) custom_cv_force.addCollectiveVariable('RMSD', rmsd_force) force_index = system.addForce(custom_cv_force) # Create OpenM Context platform = openmm.Platform.getPlatformByName('OpenCL') platform.setPropertyDefaultValue('Precision', 'mixed') from openmmtools import integrators integrator = integrators.LangevinIntegrator(temperature, collision_rate, timestep) context = openmm.Context(system, integrator, platform) context.setPositions(modeller.positions) # Report initial potential energy state = context.getState(getEnergy=True) print( f'Initial potential energy is {state.getPotentialEnergy()/unit.kilocalories_per_mole:.3f} kcal/mol' ) # Store snapshots in MDTraj trajectory to examine RMSD import mdtraj as md import numpy as np mdtop = md.Topology.from_openmm(pdbfile.topology) atom_indices = mdtop.select('all') # all solute atoms protein_atom_indices = mdtop.select( 'protein and (mass > 1)') # heavy solute atoms if oemol is not None: ligand_atom_indices = mdtop.select( '(resname MOL) and (mass > 1)') # ligand heavy atoms trajectory = md.Trajectory( np.zeros([iterations + 1, len(atom_indices), 3], np.float32), mdtop) trajectory.xyz[0, :, :] = context.getState(getPositions=True).getPositions( asNumpy=True)[atom_indices] / unit.nanometers # Minimize print('Minimizing...') openmm.LocalEnergyMinimizer.minimize(context) # Equilibrate (with RMSD restraint if needed) import numpy as np from rich.progress import track import time initial_time = time.time() for iteration in track(range(iterations), 'Equilibrating...'): integrator.step(nsteps_per_iteration) trajectory.xyz[iteration + 1, :, :] = context.getState( getPositions=True).getPositions( asNumpy=True)[atom_indices] / unit.nanometers elapsed_time = (time.time() - initial_time) * unit.seconds ns_per_day = (context.getState().getTime() / elapsed_time) / (unit.nanoseconds / unit.day) print(f'Performance: {ns_per_day:8.3f} ns/day') if restrain_rmsd: # Disable RMSD restraint context.setParameter('K_RMSD', 0.0) print('Minimizing...') openmm.LocalEnergyMinimizer.minimize(context) for iteration in track(range(iterations), 'Equilibrating without RMSD restraint...'): integrator.step(nsteps_per_iteration) # Retrieve state state = context.getState(getPositions=True, getVelocities=True, getEnergy=True, getForces=True) system.setDefaultPeriodicBoxVectors(*state.getPeriodicBoxVectors()) modeller.topology.setPeriodicBoxVectors(state.getPeriodicBoxVectors()) print( f'Final potential energy is {state.getPotentialEnergy()/unit.kilocalories_per_mole:.3f} kcal/mol' ) # Equilibrate again if we restrained the RMSD if restrain_rmsd: print('Removing RMSD restraint from system...') system.removeForce(force_index) #if oemol is not None: # # Check final RMSD # print('checking RMSD...') # trajectory.superpose(trajectory, atom_indices=protein_atom_indices) # protein_rmsd = md.rmsd(trajectory, trajectory[-1], atom_indices=protein_atom_indices)[-1] * 10 # Angstroms # oechem.OESetSDData(oemol, 'equil_protein_rmsd', f'{protein_rmsd:.2f} A') # ligand_rmsd = md.rmsd(trajectory, trajectory[-1], atom_indices=ligand_atom_indices)[-1] * 10 # Angstroms # oechem.OESetSDData(oemol, 'equil_ligand_rmsd', f'{ligand_rmsd:.2f} A') # print('RMSD after equilibration: protein {protein_rmsd:8.2f} A | ligand {ligand_rmsd:8.3f} A') # Save as OpenMM print('Exporting for OpenMM FAH simulation...') import bz2 with bz2.open(integrator_xml_filename, 'wt') as f: f.write(openmm.XmlSerializer.serialize(integrator)) with bz2.open(state_xml_filename, 'wt') as f: f.write(openmm.XmlSerializer.serialize(state)) with bz2.open(system_xml_filename, 'wt') as f: f.write(openmm.XmlSerializer.serialize(system)) with bz2.open(os.path.join(destination_path, 'equilibrated-all.pdb.gz'), 'wt') as f: app.PDBFile.writeFile(modeller.topology, state.getPositions(), f) with open(os.path.join(destination_path, 'equilibrated-solute.pdb'), 'wt') as f: import mdtraj mdtraj_topology = mdtraj.Topology.from_openmm(modeller.topology) mdtraj_trajectory = mdtraj.Trajectory( [state.getPositions(asNumpy=True) / unit.nanometers], mdtraj_topology) selection = mdtraj_topology.select('not water') mdtraj_trajectory = mdtraj_trajectory.atom_slice(selection) app.PDBFile.writeFile(mdtraj_trajectory.topology.to_openmm(), mdtraj_trajectory.openmm_positions(0), f) if oemol is not None: # Write molecule as SDF, SMILES, and mol2 for extension in ['sdf', 'mol2', 'smi', 'csv']: filename = os.path.join(destination_path, f'molecule.{extension}') with oechem.oemolostream(filename) as ofs: oechem.OEWriteMolecule(ofs, oemol) # Clean up del context, integrator