def OEMol_to_omm_ff(molecule, data_filename='data/gaff2.xml'): """ Convert an openeye.oechem.OEMol to a openmm system, positions and topology Parameters ---------- oemol : openeye.oechem.OEMol object input molecule to convert data_filename : str, default 'data/gaff2.xml' path to .xml forcefield file, default is gaff2.xml in perses package Return ------ system : openmm.system positions : openmm.positions topology : openmm.topology """ from perses.rjmc import topology_proposal from openmoltools import forcefield_generators from perses.utils.data import get_data_filename gaff_xml_filename = get_data_filename(data_filename) system_generator = topology_proposal.SystemGenerator([gaff_xml_filename]) topology = forcefield_generators.generateTopologyFromOEMol(molecule) system = system_generator.build_system(topology) positions = extractPositionsFromOEMol(molecule) return system, positions, topology
def createSystemFromIUPAC(iupac_name): """ Create an openmm system out of an oemol Parameters ---------- iupac_name : str IUPAC name Returns ------- molecule : openeye.OEMol OEMol molecule system : openmm.System object OpenMM system positions : [n,3] np.array of floats Positions topology : openmm.app.Topology object Topology """ from perses.utils.data import get_data_filename from perses.utils.openeye import extractPositionsFromOEMol # Create OEMol molecule = iupac_to_oemol(iupac_name) # Generate a topology. from openmoltools.forcefield_generators import generateTopologyFromOEMol topology = generateTopologyFromOEMol(molecule) # Initialize a forcefield with GAFF. # TODO: Fix path for `gaff.xml` since it is not yet distributed with OpenMM from simtk.openmm.app import ForceField gaff_xml_filename = get_data_filename('data/gaff.xml') forcefield = ForceField(gaff_xml_filename) # Generate template and parameters. from openmoltools.forcefield_generators import generateResidueTemplate [template, ffxml] = generateResidueTemplate(molecule) # Register the template. forcefield.registerResidueTemplate(template) # Add the parameters. forcefield.loadFile(StringIO(ffxml)) # Create the system. system = forcefield.createSystem(topology, removeCMMotion=False) # Extract positions positions = extractPositionsFromOEMol(molecule) return (molecule, system, positions, topology)
def test_mapping_strength_levels(pairs_of_smiles=[('Cc1ccccc1', 'c1ccc(cc1)N'), ('CC(c1ccccc1)', 'O=C(c1ccccc1)'), ('Oc1ccccc1', 'Sc1ccccc1')], test=True): from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine from perses.rjmc import topology_proposal gaff_xml_filename = get_data_filename('data/gaff.xml') correct_results = { 0: { 'default': (1, 0), 'weak': (1, 0), 'strong': (4, 3) }, 1: { 'default': (7, 3), 'weak': (5, 1), 'strong': (7, 3) }, 2: { 'default': (0, 0), 'weak': (0, 0), 'strong': (2, 2) } } mapping = ['weak', 'default', 'strong'] for example in mapping: for index, (lig_a, lig_b) in enumerate(pairs_of_smiles): initial_molecule = generate_initial_molecule(lig_a) proposed_molecule = generate_initial_molecule(lig_b) system_generator = topology_proposal.SystemGenerator( [gaff_xml_filename]) proposal_engine = topology_proposal.SmallMoleculeSetProposalEngine( [lig_a, lig_b], system_generator, map_strength=example) initial_system, initial_positions, initial_topology = OEMol_to_omm_ff( initial_molecule) proposal = proposal_engine.propose(initial_system, initial_topology) print(lig_a, lig_b, 'length OLD and NEW atoms', len(proposal.unique_old_atoms), len(proposal.unique_new_atoms)) if test: assert ((len(proposal.unique_old_atoms), len(proposal.unique_new_atoms) ) == correct_results[index][example]) render_atom_mapping(f'{index}-{example}.png', initial_molecule, proposed_molecule, proposal._new_to_old_atom_map)
def test_get_data_filename(datafile='data/gaff2.xml'): """ Checks that function returns real path Parameters ---------- datafile : str, default 'data/gaff2.xml' """ from perses.utils.data import get_data_filename import os path = get_data_filename(datafile) assert os.path.exists(path), "Either path to datafile is broken, or datafile does not exist"
def test_two_molecule_proposal_engine(): """ Test TwoMoleculeSetProposalEngine """ # Create a proposal engine for butane -> pentane old_mol = generate_initial_molecule('CCCC') new_mol = generate_initial_molecule('CCCCC') from perses.rjmc import topology_proposal gaff_xml_filename = get_data_filename('data/gaff.xml') system_generator = topology_proposal.SystemGenerator([gaff_xml_filename]) from perses.rjmc.topology_proposal import TwoMoleculeSetProposalEngine proposal_engine = TwoMoleculeSetProposalEngine(old_mol, new_mol, system_generator) initial_system, initial_positions, initial_topology = OEMol_to_omm_ff( old_mol) # Propose a transformation proposal = proposal_engine.propose(initial_system, initial_topology) # Check proposal assert ( proposal.new_system.getNumParticles() == 17 ), "new_system (pentane) should have 17 atoms (actual: %d)" % proposal.new_system.getNumParticles( ) assert ( proposal.old_system.getNumParticles() == 14 ), "old_system (butane) should have 14 atoms (actual: %d)" % proposal.old_system.getNumParticles( ) assert len( proposal.new_alchemical_atoms ) == 17, "new_alchemical_atoms should be 17 (actual: %d)" % proposal.new_alchemical_atoms assert len( proposal.old_alchemical_atoms ) == 14, "old_alchemical_atoms should be 14 (actual: %d)" % proposal.old_alchemical_atoms assert len( proposal.new_environment_atoms ) == 0, "new_environment_atoms should be 0 (actual: %d)" % proposal.new_environment_atoms assert len( proposal.old_environment_atoms ) == 0, "old_environment_atoms should be 0 (actual: %d)" % proposal.old_environment_atoms
def test_small_molecule_proposals(): """ Make sure the small molecule proposal engine generates molecules """ from perses.rjmc import topology_proposal from openmoltools import forcefield_generators from collections import defaultdict from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine import openeye.oechem as oechem list_of_smiles = ['CCCC', 'CCCCC', 'CCCCCC'] gaff_xml_filename = get_data_filename('data/gaff.xml') stats_dict = defaultdict(lambda: 0) system_generator = topology_proposal.SystemGenerator([gaff_xml_filename]) proposal_engine = topology_proposal.SmallMoleculeSetProposalEngine( list_of_smiles, system_generator) initial_molecule = generate_initial_molecule('CCCC') initial_system, initial_positions, initial_topology = OEMol_to_omm_ff( initial_molecule) proposal = proposal_engine.propose(initial_system, initial_topology) for i in range(50): #positions are ignored here, and we don't want to run the geometry engine new_proposal = proposal_engine.propose(proposal.old_system, proposal.old_topology) stats_dict[new_proposal.new_chemical_state_key] += 1 #check that the molecule it generated is actually the smiles we expect matching_molecules = [ res for res in proposal.new_topology.residues() if res.name == 'MOL' ] if len(matching_molecules) != 1: raise ValueError("More than one residue with the same name!") mol_res = matching_molecules[0] oemol = forcefield_generators.generateOEMolFromTopologyResidue(mol_res) smiles = SmallMoleculeSetProposalEngine.canonicalize_smiles( oechem.OEMolToSmiles(oemol)) assert smiles == proposal.new_chemical_state_key proposal = new_proposal
def generate_vacuum_hostguest_proposal(current_mol_name="B2", proposed_mol_name="MOL"): """ Generate a test vacuum topology proposal, current positions, and new positions triplet from two IUPAC molecule names. Parameters ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ from openmoltools import forcefield_generators from openmmtools import testsystems from perses.utils.openeye import smiles_to_oemol from perses.utils.data import get_data_filename host_guest = testsystems.HostGuestVacuum() unsolv_old_system, old_positions, top_old = host_guest.system, host_guest.positions, host_guest.topology ligand_topology = [res for res in top_old.residues()] current_mol = forcefield_generators.generateOEMolFromTopologyResidue( ligand_topology[1]) # guest is second residue in topology proposed_mol = smiles_to_oemol('C1CC2(CCC1(CC2)C)C') initial_smiles = oechem.OEMolToSmiles(current_mol) final_smiles = oechem.OEMolToSmiles(proposed_mol) gaff_xml_filename = get_data_filename("data/gaff.xml") forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator( forcefield_generators.gaffTemplateGenerator) solvated_system = forcefield.createSystem(top_old, removeCMMotion=False) gaff_filename = get_data_filename('data/gaff.xml') system_generator = SystemGenerator( [gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'], forcefield_kwargs={ 'removeCMMotion': False, 'nonbondedMethod': app.NoCutoff }) geometry_engine = geometry.FFAllAngleGeometryEngine() proposal_engine = SmallMoleculeSetProposalEngine( [initial_smiles, final_smiles], system_generator, residue_name=current_mol_name) #generate topology proposal topology_proposal = proposal_engine.propose(solvated_system, top_old, current_mol=current_mol, proposed_mol=proposed_mol) #generate new positions with geometry engine new_positions, _ = geometry_engine.propose(topology_proposal, old_positions, beta) return topology_proposal, old_positions, new_positions
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene", proposed_mol_name="benzene", current_mol_smiles=None, proposed_mol_smiles=None, vacuum=False, render_atom_mapping=False): """ This function will generate a topology proposal, old positions, and new positions with a geometry proposal (either vacuum or solvated) given a set of input iupacs or smiles. The function will (by default) read the iupac names first. If they are set to None, then it will attempt to read a set of current and new smiles. An atom mapping pdf will be generated if specified. Arguments ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule current_mol_smiles : str (default None) current mol smiles proposed_mol_smiles : str (default None) proposed mol smiles vacuum: bool (default False) whether to render a vacuum or solvated topology_proposal render_atom_mapping : bool (default False) whether to render the atom map of the current_mol_name and proposed_mol_name Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ import simtk.openmm.app as app from openmoltools import forcefield_generators from openeye import oechem from openmoltools.openeye import iupac_to_oemol, generate_conformers, smiles_to_oemol from openmoltools import forcefield_generators import perses.utils.openeye as openeye from perses.utils.data import get_data_filename from perses.rjmc.topology_proposal import TopologyProposal, SystemGenerator, SmallMoleculeSetProposalEngine import simtk.unit as unit from perses.rjmc.geometry import FFAllAngleGeometryEngine if current_mol_name != None and proposed_mol_name != None: try: old_oemol, new_oemol = iupac_to_oemol( current_mol_name), iupac_to_oemol(proposed_mol_name) old_smiles = oechem.OECreateSmiString( old_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) new_smiles = oechem.OECreateSmiString( new_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) except: raise Exception( f"either {current_mol_name} or {proposed_mol_name} is not compatible with 'iupac_to_oemol' function!" ) elif current_mol_smiles != None and proposed_mol_smiles != None: try: old_oemol, new_oemol = smiles_to_oemol( current_mol_smiles), smiles_to_oemol(proposed_mol_smiles) old_smiles = oechem.OECreateSmiString( old_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) new_smiles = oechem.OECreateSmiString( new_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) except: raise Exception(f"the variables are not compatible") else: raise Exception( f"either current_mol_name and proposed_mol_name must be specified as iupacs OR current_mol_smiles and proposed_mol_smiles must be specified as smiles strings." ) old_oemol, old_system, old_positions, old_topology = openeye.createSystemFromSMILES( old_smiles, title="MOL") #correct the old positions old_positions = openeye.extractPositionsFromOEMol(old_oemol) old_positions = old_positions.in_units_of(unit.nanometers) new_oemol, new_system, new_positions, new_topology = openeye.createSystemFromSMILES( new_smiles, title="NEW") ffxml = forcefield_generators.generateForceFieldFromMolecules( [old_oemol, new_oemol]) old_oemol.SetTitle('MOL') new_oemol.SetTitle('MOL') old_topology = forcefield_generators.generateTopologyFromOEMol(old_oemol) new_topology = forcefield_generators.generateTopologyFromOEMol(new_oemol) if not vacuum: nonbonded_method = app.PME barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300.0 * unit.kelvin, 50) else: nonbonded_method = app.NoCutoff barostat = None gaff_xml_filename = get_data_filename("data/gaff.xml") system_generator = SystemGenerator( [gaff_xml_filename, 'amber99sbildn.xml', 'tip3p.xml'], barostat=barostat, forcefield_kwargs={ 'removeCMMotion': False, 'nonbondedMethod': nonbonded_method, 'constraints': app.HBonds, 'hydrogenMass': 4.0 * unit.amu }) system_generator._forcefield.loadFile(StringIO(ffxml)) proposal_engine = SmallMoleculeSetProposalEngine([old_smiles, new_smiles], system_generator, residue_name='MOL') geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=1000, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=False) if not vacuum: #now to solvate modeller = app.Modeller(old_topology, old_positions) hs = [ atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name not in ['MOL', 'OLD', 'NEW'] ] modeller.delete(hs) modeller.addHydrogens(forcefield=system_generator._forcefield) modeller.addSolvent(system_generator._forcefield, model='tip3p', padding=9.0 * unit.angstroms) solvated_topology = modeller.getTopology() solvated_positions = modeller.getPositions() solvated_positions = unit.quantity.Quantity(value=np.array([ list(atom_pos) for atom_pos in solvated_positions.value_in_unit_system(unit.md_unit_system) ]), unit=unit.nanometers) solvated_system = system_generator.build_system(solvated_topology) #now to create proposal top_proposal = proposal_engine.propose( current_system=solvated_system, current_topology=solvated_topology, current_mol=old_oemol, proposed_mol=new_oemol) new_positions, _ = geometry_engine.propose(top_proposal, solvated_positions, beta) if render_atom_mapping: from perses.utils.smallmolecules import render_atom_mapping print( f"new_to_old: {proposal_engine.non_offset_new_to_old_atom_map}" ) render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol, new_oemol, proposal_engine.non_offset_new_to_old_atom_map) return top_proposal, solvated_positions, new_positions else: vacuum_system = system_generator.build_system(old_topology) top_proposal = proposal_engine.propose(current_system=vacuum_system, current_topology=old_topology, current_mol=old_oemol, proposed_mol=new_oemol) new_positions, _ = geometry_engine.propose(top_proposal, old_positions, beta) if render_atom_mapping: from perses.utils.smallmolecules import render_atom_mapping print(f"new_to_old: {top_proposal._new_to_old_atom_map}") render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol, new_oemol, top_proposal._new_to_old_atom_map) return top_proposal, old_positions, new_positions
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene", proposed_mol_name="benzene"): """ Generate a test solvated topology proposal, current positions, and new positions triplet from two IUPAC molecule names. Parameters ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ import simtk.openmm.app as app from openmoltools import forcefield_generators from openeye import oechem from perses.rjmc.topology_proposal import SystemGenerator, SmallMoleculeSetProposalEngine from perses.rjmc import geometry from perses.utils.data import get_data_filename current_mol, unsolv_old_system, pos_old, top_old = createSystemFromIUPAC(current_mol_name) proposed_mol = iupac_to_oemol(proposed_mol_name) proposed_mol.SetTitle("MOL") initial_smiles = oechem.OEMolToSmiles(current_mol) final_smiles = oechem.OEMolToSmiles(proposed_mol) gaff_xml_filename = get_data_filename("data/gaff.xml") forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator) modeller = app.Modeller(top_old, pos_old) modeller.addSolvent(forcefield, model='tip3p', padding=9.0*unit.angstrom) solvated_topology = modeller.getTopology() solvated_positions = modeller.getPositions() solvated_system = forcefield.createSystem(solvated_topology, nonbondedMethod=app.PME, removeCMMotion=False) barostat = openmm.MonteCarloBarostat(1.0*unit.atmosphere, temperature, 50) solvated_system.addForce(barostat) gaff_filename = get_data_filename('data/gaff.xml') system_generator = SystemGenerator([gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'], barostat=barostat, forcefield_kwargs={'removeCMMotion': False},periodic_forcefield_kwargs={'nonbondedMethod': app.PME}) geometry_engine = geometry.FFAllAngleGeometryEngine() canonicalized_smiles_list = [SmallMoleculeSetProposalEngine.canonicalize_smiles(smiles) for smiles in [initial_smiles, final_smiles]] proposal_engine = SmallMoleculeSetProposalEngine( canonicalized_smiles_list, system_generator, residue_name="MOL") #generate topology proposal topology_proposal = proposal_engine.propose(solvated_system, solvated_topology) #generate new positions with geometry engine new_positions, _ = geometry_engine.propose(topology_proposal, solvated_positions, beta) return topology_proposal, solvated_positions, new_positions