def test_parameterize_molecules_from_creation(self): """Test that SystemGenerator can parameterize pre-specified molecules in vacuum""" from openmmforcefields.generators import SystemGenerator for name, testsystem in self.testsystems.items(): print(testsystem) molecules = testsystem['molecules'] for small_molecule_forcefield in SystemGenerator.SMALL_MOLECULE_FORCEFIELDS: # Create a SystemGenerator for this force field generator = SystemGenerator(forcefields=self.amber_forcefields, small_molecule_forcefield=small_molecule_forcefield, molecules=molecules) # Parameterize molecules from openmmforcefields.utils import Timer for molecule in molecules: openmm_topology = molecule.to_topology().to_openmm() with Timer() as t1: system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms # Molecule should now be cached with Timer() as t2: system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms assert (t2.interval() < t1.interval())
def test_complex(self): """Test parameterizing a protein:ligand complex in vacuum""" from openmmforcefields.generators import SystemGenerator for name, testsystem in self.testsystems.items(): print(f'Testing parameterization of {name} in vacuum') molecules = testsystem['molecules'] # Select a complex from the set ligand_index = 0 complex_structure = testsystem['complex_structures'][ligand_index] molecule = molecules[ligand_index] openmm_topology = complex_structure.topology cache = os.path.join(get_data_filename(os.path.join('perses_jacs_systems', name)), 'cache.json') # Create a system in vacuum generator = SystemGenerator(forcefields=self.amber_forcefields, molecules=molecules, cache=cache) system = generator.create_system(openmm_topology) assert system.getNumParticles() == len(complex_structure.atoms) # Create solvated structure from simtk.openmm import app from simtk import unit modeller = app.Modeller(complex_structure.topology, complex_structure.positions) modeller.addSolvent(generator.forcefield, padding=0*unit.angstroms, ionicStrength=300*unit.millimolar) # Create a system with solvent and ions system = generator.create_system(modeller.topology) assert system.getNumParticles() == len(list(modeller.topology.atoms())) with open('test.pdb', 'w') as outfile: app.PDBFile.writeFile(modeller.topology, modeller.positions, outfile)
def test_add_molecules(self): """Test that Molecules can be added to SystemGenerator later""" from openmmforcefields.generators import SystemGenerator for small_molecule_forcefield in SystemGenerator.SMALL_MOLECULE_FORCEFIELDS: # Create a SystemGenerator for this force field generator = SystemGenerator(forcefields=self.amber_forcefields, small_molecule_forcefield=small_molecule_forcefield) # Add molecules for each test system separately for name, testsystem in self.testsystems.items(): molecules = testsystem['molecules'] # Add molecules generator.add_molecules(molecules) # Parameterize molecules from openmmforcefields.utils import Timer for molecule in molecules: openmm_topology = molecule.to_topology().to_openmm() with Timer() as t1: system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms # Molecule should now be cached with Timer() as t2: system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms assert (t2.interval() < t1.interval())
def generate_parameters(molecule, basepath='json-files', small_molecule_forcefield='openff-1.1.0'): """ Generate JSON parameter cache for a molecule in f'{basepath}/{molecule.name}.json' Parameters ---------- molecule : openforcefield.topology.Molecule The molecule to parameterize """ # Create generator import os cache_filename = f'parallel/{molecule.name}.json' if os.path.exists: return # Generate and cache parameters from openmmforcefields.generators import SystemGenerator system_generator = SystemGenerator( small_molecule_forcefield=small_molecule_forcefield, molecules=[molecule], cache=cache_filename) try: system_generator.create_system(molecule.to_topology().to_openmm()) except Exception as e: print(f'FAILED: {molecule.smiles}') print(e) del system_generator
def test_cache(self): """Test that SystemGenerator correctly manages a cache""" from openmmforcefields.generators import SystemGenerator from openmmforcefields.utils import Timer timing = dict( ) # timing[(small_molecule_forcefield, smiles)] is the time (in seconds) to parameterize molecule the first time with tempfile.TemporaryDirectory() as tmpdirname: # Create a single shared cache for all force fields cache = os.path.join(tmpdirname, 'db.json') # Test that we can parameterize all molecules for all test systems SMALL_MOLECULE_FORCEFIELDS = SystemGenerator.SMALL_MOLECULE_FORCEFIELDS if not CI else [ 'gaff-2.11', 'openff-1.1.0' ] for small_molecule_forcefield in SMALL_MOLECULE_FORCEFIELDS: # Create a SystemGenerator generator = SystemGenerator( forcefields=self.amber_forcefields, small_molecule_forcefield=small_molecule_forcefield, cache=cache) # Add molecules for each test system separately for name, testsystem in self.testsystems.items(): molecules = testsystem['molecules'] # Add molecules generator.add_molecules(molecules) # Parameterize molecules for molecule in molecules: openmm_topology = molecule.to_topology().to_openmm() with Timer() as timer: system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms # Record time timing[(small_molecule_forcefield, molecule.to_smiles())] = timer.interval() # Molecules should now be cached; test timing is faster the second time # Test that we can parameterize all molecules for all test systems SMALL_MOLECULE_FORCEFIELDS = SystemGenerator.SMALL_MOLECULE_FORCEFIELDS if not CI else [ 'gaff-2.11', 'openff-1.1.0' ] for small_molecule_forcefield in SMALL_MOLECULE_FORCEFIELDS: # Create a SystemGenerator generator = SystemGenerator( forcefields=self.amber_forcefields, small_molecule_forcefield=small_molecule_forcefield, cache=cache) # Add molecules for each test system separately for name, testsystem in self.testsystems.items(): molecules = testsystem['molecules'] # We don't need to add molecules that are already defined in the cache # Parameterize molecules for molecule in molecules: openmm_topology = molecule.to_topology().to_openmm() with Timer() as timer: system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms
def test_forcefield_kwargs(self): """Test that forcefield_kwargs and nonbonded method specifications work correctly""" from simtk import unit forcefield_kwargs = {'hydrogenMass': 4 * unit.amu} from openmmforcefields.generators import SystemGenerator for name, testsystem in self.testsystems.items(): print(testsystem) molecules = testsystem['molecules'] for small_molecule_forcefield in SystemGenerator.SMALL_MOLECULE_FORCEFIELDS: # Create a SystemGenerator for this force field from simtk import openmm from simtk.openmm import app generator = SystemGenerator( forcefields=self.amber_forcefields, small_molecule_forcefield=small_molecule_forcefield, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs={'nonbondedMethod': app.LJPME}, nonperiodic_forcefield_kwargs={ 'nonbondedMethod': app.CutoffNonPeriodic }, molecules=molecules) # Parameterize molecules for molecule in molecules: # Create non-periodic Topology nonperiodic_openmm_topology = molecule.to_topology( ).to_openmm() system = generator.create_system( nonperiodic_openmm_topology) forces = { force.__class__.__name__: force for force in system.getForces() } assert forces['NonbondedForce'].getNonbondedMethod( ) == openmm.NonbondedForce.CutoffNonPeriodic, "Expected CutoffNonPeriodic, got {forces['NonbondedForce'].getNonbondedMethod()}" # Create periodic Topology import numpy as np import copy box_vectors = unit.Quantity(np.diag([30, 30, 30]), unit.angstrom) periodic_openmm_topology = copy.deepcopy( nonperiodic_openmm_topology) periodic_openmm_topology.setPeriodicBoxVectors(box_vectors) system = generator.create_system(periodic_openmm_topology) forces = { force.__class__.__name__: force for force in system.getForces() } assert forces['NonbondedForce'].getNonbondedMethod( ) == openmm.NonbondedForce.LJPME, "Expected LJPME, got {forces['NonbondedForce'].getNonbondedMethod()}"
def generate_atp(phase = 'vacuum'): """ modify the AlanineDipeptideVacuum test system to be parametrized with amber14ffsb in vac or solvent (tip3p) """ import openmmtools.testsystems as ts from openmmforcefields.generators import SystemGenerator atp = ts.AlanineDipeptideVacuum(constraints = app.HBonds, hydrogenMass = 4 * unit.amus) forcefield_files = ['gaff.xml', 'amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] if phase == 'vacuum': barostat = None system_generator = SystemGenerator(forcefield_files, barostat = barostat, forcefield_kwargs = {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}, nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff}, small_molecule_forcefield = 'gaff-2.11', molecules = None, cache = None) atp.system = system_generator.create_system(atp.topology) #update the parametrization scheme to amberff14sb elif phase == 'solvent': barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50) system_generator = SystemGenerator(forcefield_files, barostat = barostat, forcefield_kwargs = {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.PME, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}, small_molecule_forcefield = 'gaff-2.11', molecules = None, cache = None) if phase == 'solvent': modeller = app.Modeller(atp.topology, atp.positions) modeller.addSolvent(system_generator._forcefield, model='tip3p', padding=9*unit.angstroms, ionicStrength=0.15*unit.molar) solvated_topology = modeller.getTopology() solvated_positions = modeller.getPositions() # canonicalize the solvated positions: turn tuples into np.array atp.positions = unit.quantity.Quantity(value = np.array([list(atom_pos) for atom_pos in solvated_positions.value_in_unit_system(unit.md_unit_system)]), unit = unit.nanometers) atp.topology = solvated_topology atp.system = system_generator.create_system(atp.topology) return atp, system_generator
def omm_system(input_sdf, input_system, forcefield, input_path, ff_files=[], template_ff='gaff-2.11'): from openmmforcefields.generators import SystemGenerator, GAFFTemplateGenerator from openff.toolkit.topology import Molecule # maybe possible to add same parameters that u give forcefield.createSystem() function forcefield_kwargs ={'constraints' : app.HBonds, 'rigidWater' : True, 'removeCMMotion' : False, 'hydrogenMass' : 4*amu } system_generator = SystemGenerator(forcefields=ff_files, small_molecule_forcefield=template_ff, forcefield_kwargs=forcefield_kwargs, cache='db.json') input_sdfs=[] for idx, sdf in enumerate(input_sdf, 1): path_sdf=f'{input_path}/{sdf}' if not os.path.exists(path_sdf): print(f'\tFile {path_sdf} not found!') else: print(f'\tAdding extra SDF file {idx} to pre-system: {path_sdf}') input_sdfs.append(path_sdf) molecules = Molecule.from_file(*input_sdfs, file_format='sdf') print(molecules) system = system_generator.create_system(topology=input_system.topology)#, molecules=molecules) gaff = GAFFTemplateGenerator(molecules=molecules, forcefield=template_ff) gaff.add_molecules(molecules) print(gaff) forcefield.registerTemplateGenerator(gaff.generator) #forcefield.registerResidueTemplate(template) print(system) print(forcefield) return system, forcefield
def test_barostat(self): """Test that barostat addition works correctly""" # Create a protein SystemGenerator generator = SystemGenerator(forcefields=self.amber_forcefields) # Create a template barostat from simtk.openmm import MonteCarloBarostat from simtk import unit pressure = 0.95 * unit.atmospheres temperature = 301.0 * unit.kelvin frequency = 23 generator.barostat = MonteCarloBarostat(pressure, temperature, frequency) # Load a PDB file import os from simtk.openmm.app import PDBFile pdb_filename = get_data_filename( os.path.join('perses_jacs_systems', 'bace', 'Bace_protein_fixed.pdb')) pdbfile = PDBFile(pdb_filename) # Delete hydrogens from terminal protein residues # TODO: Fix the input files so we don't need to do this from simtk.openmm import app modeller = app.Modeller(pdbfile.topology, pdbfile.positions) residues = [ residue for residue in modeller.topology.residues() if residue.name != 'UNL' ] termini_ids = [residues[0].id, residues[-1].id] #hs = [atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name != 'UNL'] hs = [ atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.id in termini_ids ] modeller.delete(hs) from simtk.openmm.app import PDBFile modeller.addHydrogens() # Create a System system = generator.create_system(modeller.topology) # Check barostat is present forces = { force.__class__.__name__: force for force in system.getForces() } assert 'MonteCarloBarostat' in forces.keys() # Check barostat parameters force = forces['MonteCarloBarostat'] assert force.getDefaultPressure() == pressure assert force.getDefaultTemperature() == temperature assert force.getFrequency() == frequency
def baseline_energy(self, g, suffix=None): if suffix is None: suffix = "_" + self.forcefield from openmmforcefields.generators import SystemGenerator # define a system generator system_generator = SystemGenerator( small_molecule_forcefield=self.forcefield, ) mol = g.mol # mol.assign_partial_charges("formal_charge") # create system system = system_generator.create_system( topology=mol.to_topology().to_openmm(), molecules=mol, ) # parameterize topology topology = g.mol.to_topology().to_openmm() integrator = openmm.LangevinIntegrator(TEMPERATURE, COLLISION_RATE, STEP_SIZE) # create simulation simulation = Simulation(topology=topology, system=system, integrator=integrator) us = [] xs = (Quantity( g.nodes["n1"].data["xyz"].detach().numpy(), esp.units.DISTANCE_UNIT, ).value_in_unit(unit.nanometer).transpose((1, 0, 2))) for x in xs: simulation.context.setPositions(x) us.append( simulation.context.getState( getEnergy=True).getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT)) g.nodes["g"].data["u%s" % suffix] = torch.tensor(us)[None, :] return g
def test_parameterize_molecules_specified_during_create_system(self): """Test that SystemGenerator can parameterize molecules specified during create_system""" from openmmforcefields.generators import SystemGenerator for name, testsystem in self.testsystems.items(): molecules = testsystem['molecules'] for small_molecule_forcefield in SystemGenerator.SMALL_MOLECULE_FORCEFIELDS: # Create a SystemGenerator for this force field generator = SystemGenerator(forcefields=self.amber_forcefields, small_molecule_forcefield=small_molecule_forcefield) # Parameterize molecules from openmmforcefields.utils import Timer for molecule in molecules: openmm_topology = molecule.to_topology().to_openmm() # Specify molecules during system creation system = generator.create_system(openmm_topology, molecules=molecules)
def simulation_from_graph(self, g): """ Create simulation from moleucle """ # assign partial charge if self.charge_method is not None: g.mol.assign_partial_charges(self.charge_method) # parameterize topology topology = g.mol.to_topology().to_openmm() generator = SystemGenerator( small_molecule_forcefield=self.forcefield, molecules=[g.mol], ) # create openmm system system = generator.create_system(topology, ) # set epsilon minimum to 0.05 kJ/mol for force in system.getForces(): if "Nonbonded" in force.__class__.__name__: force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff) for particle_index in range(force.getNumParticles()): charge, sigma, epsilon = force.getParticleParameters( particle_index) if epsilon < EPSILON_MIN: force.setParticleParameters(particle_index, charge, sigma, EPSILON_MIN) # use langevin integrator integrator = openmm.LangevinIntegrator(self.temperature, self.collision_rate, self.step_size) # initialize simulation simulation = Simulation( topology=topology, system=system, integrator=integrator, platform=openmm.Platform.getPlatformByName("Reference"), ) return simulation
def hmr_driver(mol, ff_name): """Given an OpenFF Molecule, run a short 4 fs HMR simulation. This function is adapted from https://github.com/openforcefield/openforcefields/issues/19#issuecomment-689816995""" print( f"Running HMR with force field {ff_name} and molecule with SMILES {mol.to_smiles()}" ) forcefield_kwargs = { "constraints": app.HBonds, "rigidWater": True, "removeCMMotion": False, "hydrogenMass": 4 * unit.amu, # Does this also _subtract_ mass from heavy atoms?:w } system_generator = SystemGenerator( small_molecule_forcefield=ff_name, forcefield_kwargs=forcefield_kwargs, molecules=mol, ) system = system_generator.create_system(mol.to_topology().to_openmm()) temperature = 300 * unit.kelvin collision_rate = 1.0 / unit.picoseconds timestep = 4.0 * unit.femtoseconds integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep) context = openmm.Context(system, integrator) mol.generate_conformers(n_conformers=1) context.setPositions(mol.conformers[0]) # Run for 10 ps integrator.step(2500) state = context.getState(getEnergy=True) pot = state.getPotentialEnergy() # OpenMM will silenty "fail" if energies aren't explicitly checked if np.isnan(pot / pot.unit): raise NANEnergyError()
periodic_forcefield_kwargs = { 'nonbondedMethod': app.LJPME, 'nonbondedCutoff': 1 * nanometer } membrane_barostat = MonteCarloMembraneBarostat( 1 * bar, 0.0 * bar * nanometer, 308 * kelvin, MonteCarloMembraneBarostat.XYIsotropic, MonteCarloMembraneBarostat.ZFree, 15) system_generator = SystemGenerator( forcefields=['amber/lipid17.xml', 'amber/tip3p_standard.xml'], small_molecule_forcefield='gaff-2.11', barostat=membrane_barostat, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs=periodic_forcefield_kwargs) system = system_generator.create_system(pdb.topology, molecules=molecule) integrator = LangevinIntegrator(300 * kelvin, 1 / picosecond, 0.002 * picosecond) platform = Platform.getPlatformByName('CUDA') simulation = app.Simulation(pdb.topology, system, integrator, platform) simulation.context.setPositions(pdb.positions) simulation.loadState('parent.xml') simulation.reporters.append( StateDataReporter('seg.nfo', 5000, step=True, potentialEnergy=True, kineticEnergy=True,
def _parametrize_gaff(self, g, n_max_phases=6): from openmmforcefields.generators import SystemGenerator # define a system generator system_generator = SystemGenerator( small_molecule_forcefield=self.forcefield, ) mol = g.mol # mol.assign_partial_charges("formal_charge") # create system sys = system_generator.create_system( topology=mol.to_topology().to_openmm(), molecules=mol, ) bond_lookup = { tuple(idxs.detach().numpy()): position for position, idxs in enumerate(g.nodes["n2"].data["idxs"]) } angle_lookup = { tuple(idxs.detach().numpy()): position for position, idxs in enumerate(g.nodes["n3"].data["idxs"]) } torsion_lookup = { tuple(idxs.detach().numpy()): position for position, idxs in enumerate(g.nodes["n4"].data["idxs"]) } improper_lookup = { tuple(idxs.detach().numpy()): position for position, idxs in enumerate( g.nodes["n4_improper"].data["idxs"]) } torsion_phases = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) torsion_periodicities = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) torsion_ks = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) improper_phases = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) improper_periodicities = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) improper_ks = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) for force in sys.getForces(): name = force.__class__.__name__ if "HarmonicBondForce" in name: assert (force.getNumBonds() * 2 == g.heterograph.number_of_nodes("n2")) g.nodes["n2"].data["eq_ref"] = torch.zeros( force.getNumBonds() * 2, 1) g.nodes["n2"].data["k_ref"] = torch.zeros( force.getNumBonds() * 2, 1) for idx in range(force.getNumBonds()): idx0, idx1, eq, k = force.getBondParameters(idx) position = bond_lookup[(idx0, idx1)] g.nodes["n2"].data["eq_ref"][position] = eq.value_in_unit( esp.units.DISTANCE_UNIT, ) g.nodes["n2"].data["k_ref"][position] = k.value_in_unit( esp.units.FORCE_CONSTANT_UNIT, ) position = bond_lookup[(idx1, idx0)] g.nodes["n2"].data["eq_ref"][position] = eq.value_in_unit( esp.units.DISTANCE_UNIT, ) g.nodes["n2"].data["k_ref"][position] = k.value_in_unit( esp.units.FORCE_CONSTANT_UNIT, ) if "HarmonicAngleForce" in name: assert (force.getNumAngles() * 2 == g.heterograph.number_of_nodes("n3")) g.nodes["n3"].data["eq_ref"] = torch.zeros( force.getNumAngles() * 2, 1) g.nodes["n3"].data["k_ref"] = torch.zeros( force.getNumAngles() * 2, 1) for idx in range(force.getNumAngles()): idx0, idx1, idx2, eq, k = force.getAngleParameters(idx) position = angle_lookup[(idx0, idx1, idx2)] g.nodes["n3"].data["eq_ref"][position] = eq.value_in_unit( esp.units.ANGLE_UNIT, ) g.nodes["n3"].data["k_ref"][position] = k.value_in_unit( esp.units.ANGLE_FORCE_CONSTANT_UNIT, ) position = angle_lookup[(idx2, idx1, idx0)] g.nodes["n3"].data["eq_ref"][position] = eq.value_in_unit( esp.units.ANGLE_UNIT, ) g.nodes["n3"].data["k_ref"][position] = k.value_in_unit( esp.units.ANGLE_FORCE_CONSTANT_UNIT, ) if "PeriodicTorsionForce" in name: for idx in range(force.getNumTorsions()): ( idx0, idx1, idx2, idx3, periodicity, phase, k, ) = force.getTorsionParameters(idx) if (idx0, idx1, idx2, idx3) in torsion_lookup: position = torsion_lookup[(idx0, idx1, idx2, idx3)] for sub_idx in range(n_max_phases): if torsion_ks[position, sub_idx] == 0: torsion_ks[position, sub_idx] = 0.5 * k.value_in_unit( esp.units.ENERGY_UNIT) torsion_phases[position, sub_idx] = phase.value_in_unit( esp.units.ANGLE_UNIT) torsion_periodicities[position, sub_idx] = periodicity position = torsion_lookup[(idx3, idx2, idx1, idx0)] torsion_ks[position, sub_idx] = 0.5 * k.value_in_unit( esp.units.ENERGY_UNIT) torsion_phases[position, sub_idx] = phase.value_in_unit( esp.units.ANGLE_UNIT) torsion_periodicities[position, sub_idx] = periodicity break g.heterograph.apply_nodes( lambda nodes: { "k_ref": torsion_ks, "periodicity_ref": torsion_periodicities, "phases_ref": torsion_phases, }, ntype="n4", ) """ g.heterograph.apply_nodes( lambda nodes: { "k_ref": improper_ks, "periodicity_ref": improper_periodicities, "phases_ref": improper_phases, }, ntype="n4_improper" ) """ """ def apply_torsion(node, n_max_phases=6): phases = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) periodicity = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) k = torch.zeros(g.heterograph.number_of_nodes("n4"), n_max_phases,) for idx in range(g.heterograph.number_of_nodes("n4")): idxs = tuple(node.data["idxs"][idx].numpy()) if idxs in force: _force = force[idxs] for sub_idx in range(len(_force.periodicity)): if hasattr(_force, "k%s" % sub_idx): k[idx, sub_idx] = getattr( _force, "k%s" % sub_idx ).value_in_unit(esp.units.ENERGY_UNIT) phases[idx, sub_idx] = getattr( _force, "phase%s" % sub_idx ).value_in_unit(esp.units.ANGLE_UNIT) periodicity[idx, sub_idx] = getattr( _force, "periodicity%s" % sub_idx ) return { "k_ref": k, "periodicity_ref": periodicity, "phases_ref": phases, } g.heterograph.apply_nodes(apply_torsion, ntype="n4") """ return g
forcefield_kwargs = { 'removeCMMotion': False, 'ewaldErrorTolerance': 5e-04, 'nonbondedMethod': app.PME, 'constraints': app.HBonds, 'hydrogenMass': hydrogen_mass } system_generator = SystemGenerator(forcefields=ffxml_filenames, barostat=barostat, forcefield_kwargs=forcefield_kwargs, molecules=[ligand], small_molecule_forcefield='gaff-2.11') # Create the OpenMM System print("Creating system for complex...") system_complex = system_generator.create_system(complex_topology) # Solvate print('Adding solvent...') modeller = app.Modeller(complex_topology, complex_positions) modeller.addSolvent(system_generator.forcefield, model='tip3p', padding=10.0 * unit.angstroms, ionicStrength=0.15 * unit.molar) print('System has %d atoms' % modeller.topology.getNumAtoms()) # Write initial model print('Writing initial solvated system to %s' % solvated_pdb_filename) with open(output_prefix + solvated_pdb_filename, 'w') as outfile: app.PDBFile.writeFile(modeller.topology, modeller.positions,
def subtract_nonbonded_force_except_14( g, forcefield="gaff-1.81", ): # parameterize topology topology = g.mol.to_topology().to_openmm() generator = SystemGenerator( small_molecule_forcefield=forcefield, molecules=[g.mol], ) # create openmm system system = generator.create_system(topology, ) # use langevin integrator, although it's not super useful here integrator = openmm.LangevinIntegrator(TEMPERATURE, COLLISION_RATE, STEP_SIZE) # create simulation simulation = Simulation(topology=topology, system=system, integrator=integrator) # get forces forces = list(system.getForces()) # loop through forces for force in forces: name = force.__class__.__name__ # turn off angle if "Angle" in name: for idx in range(force.getNumAngles()): id1, id2, id3, angle, k = force.getAngleParameters(idx) force.setAngleParameters(idx, id1, id2, id3, angle, 0.0) force.updateParametersInContext(simulation.context) elif "Bond" in name: for idx in range(force.getNumBonds()): id1, id2, length, k = force.getBondParameters(idx) force.setBondParameters( idx, id1, id2, length, 0.0, ) force.updateParametersInContext(simulation.context) elif "Torsion" in name: for idx in range(force.getNumTorsions()): ( id1, id2, id3, id4, periodicity, phase, k, ) = force.getTorsionParameters(idx) force.setTorsionParameters( idx, id1, id2, id3, id4, periodicity, phase, 0.0, ) force.updateParametersInContext(simulation.context) elif "Nonbonded" in name: for exception_index in range(force.getNumExceptions()): ( p1, p2, chargeprod, sigma, epsilon, ) = force.getExceptionParameters(exception_index) force.setExceptionParameters(exception_index, p1, p2, chargeprod, sigma, 1e-8 * epsilon) force.updateParametersInContext(simulation.context) # the snapshots xs = (Quantity( g.nodes["n1"].data["xyz"].detach().numpy(), esp.units.DISTANCE_UNIT, ).value_in_unit(unit.nanometer).transpose((1, 0, 2))) # loop through the snapshots energies = [] derivatives = [] for x in xs: simulation.context.setPositions(x) state = simulation.context.getState( getEnergy=True, getParameters=True, getForces=True, ) energy = state.getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT, ) derivative = state.getForces(asNumpy=True).value_in_unit( esp.units.FORCE_UNIT, ) energies.append(energy) derivatives.append(derivative) # put energies to a tensor energies = torch.tensor( energies, dtype=torch.get_default_dtype(), ).flatten()[None, :] derivatives = torch.tensor( np.stack(derivatives, axis=1), dtype=torch.get_default_dtype(), ) # subtract the energies g.heterograph.apply_nodes( lambda node: {"u_ref": node.data["u_ref"] - energies}, ntype="g", ) if "u_ref_prime" in g.nodes["n1"].data: g.heterograph.apply_nodes( lambda node: {"u_ref_prime": node.data["u_ref_prime"] - derivatives}, ntype="n1", ) return g
def create_systems(topologies_dict, positions_dict, output_directory, project_prefix, solvate=True): """ Generate the systems ready for equilibrium simulations from a dictionary of topologies and positions Parameters ---------- topologies_dict : dict of str: app.Topoology A dictionary of the topologies to prepare, indexed by SMILES strings positions_dict : dict of str: unit.Quantity array A dictionary of positions for the corresponding topologies, indexed by SMILES strings output_directory : str Location of output files project_prefix : str What to prepend to the names of files for this run solvate : bool, default True Whether to solvate the systems """ barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50) system_generator = SystemGenerator( [ 'amber14/protein.ff14SB.xml', 'gaff.xml', 'amber14/tip3p.xml', 'MCL1_ligands.xml' ], barostat=barostat, forcefield_kwargs={ 'constraints': app.HBonds, 'hydrogenMass': 4 * unit.amus }, periodic_forcefield_kwargs={'nonbondedMethod': app.PME}) list_of_smiles = list(topologies_dict.keys()) initial_smiles = list_of_smiles[0] initial_topology = topologies_dict[initial_smiles] initial_positions = positions_dict[initial_smiles] if solvate: solvated_initial_positions, solvated_topology, solvated_system = solvate_system( initial_topology.to_openmm(), initial_positions, system_generator) else: solvated_initial_positions = initial_positions solvated_topology = initial_topology solvated_system = system_generator.create_system(solvated_topology) md_topology = md.Topology.from_openmm(solvated_topology) if solvate: num_added = md_topology.n_residues - initial_topology.n_residues if not os.path.exists(output_directory): os.mkdir(output_directory) np.save("{}/{}_{}_initial.npy".format(output_directory, project_prefix, 0), (solvated_initial_positions, md_topology, solvated_system, initial_smiles)) for i in tqdm.trange(1, len(list_of_smiles)): smiles = list_of_smiles[i] topology = topologies_dict[smiles] positions = positions_dict[smiles] if solvate: solvated_positions, solvated_topology, solvated_system = solvate_system( topology.to_openmm(), positions, system_generator, padding=None, num_added=num_added) else: solvated_positions = initial_positions solvated_topology = initial_topology solvated_system = system_generator.create_system(solvated_topology) np.save( "{}/{}_{}_initial.npy".format(output_directory, project_prefix, i), (solvated_positions, md.Topology.from_openmm(solvated_topology), solvated_system, smiles))
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene", proposed_mol_name="benzene", current_mol_smiles=None, proposed_mol_smiles=None, vacuum=False, render_atom_mapping=False, atom_expression=['Hybridization'], bond_expression=['Hybridization']): """ This function will generate a topology proposal, old positions, and new positions with a geometry proposal (either vacuum or solvated) given a set of input iupacs or smiles. The function will (by default) read the iupac names first. If they are set to None, then it will attempt to read a set of current and new smiles. An atom mapping pdf will be generated if specified. Parameters ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule current_mol_smiles : str (default None) current mol smiles proposed_mol_smiles : str (default None) proposed mol smiles vacuum: bool (default False) whether to render a vacuum or solvated topology_proposal render_atom_mapping : bool (default False) whether to render the atom map of the current_mol_name and proposed_mol_name atom_expression : list(str), optional list of atom mapping criteria bond_expression : list(str), optional list of bond mapping criteria Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ import simtk.openmm.app as app from openmoltools import forcefield_generators from openeye import oechem from openmoltools.openeye import iupac_to_oemol, generate_conformers, smiles_to_oemol from openmoltools import forcefield_generators import perses.utils.openeye as openeye from perses.utils.data import get_data_filename from perses.rjmc.topology_proposal import TopologyProposal, SmallMoleculeSetProposalEngine import simtk.unit as unit from perses.rjmc.geometry import FFAllAngleGeometryEngine from perses.utils.openeye import generate_expression from openmmforcefields.generators import SystemGenerator from openforcefield.topology import Molecule atom_expr = generate_expression(atom_expression) bond_expr = generate_expression(bond_expression) if current_mol_name != None and proposed_mol_name != None: try: old_oemol, new_oemol = iupac_to_oemol( current_mol_name), iupac_to_oemol(proposed_mol_name) old_smiles = oechem.OECreateSmiString( old_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) new_smiles = oechem.OECreateSmiString( new_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) except: raise Exception( f"either {current_mol_name} or {proposed_mol_name} is not compatible with 'iupac_to_oemol' function!" ) elif current_mol_smiles != None and proposed_mol_smiles != None: try: old_oemol, new_oemol = smiles_to_oemol( current_mol_smiles), smiles_to_oemol(proposed_mol_smiles) old_smiles = oechem.OECreateSmiString( old_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) new_smiles = oechem.OECreateSmiString( new_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) except: raise Exception(f"the variables are not compatible") else: raise Exception( f"either current_mol_name and proposed_mol_name must be specified as iupacs OR current_mol_smiles and proposed_mol_smiles must be specified as smiles strings." ) old_oemol, old_system, old_positions, old_topology = openeye.createSystemFromSMILES( old_smiles, title="MOL") #correct the old positions old_positions = openeye.extractPositionsFromOEMol(old_oemol) old_positions = old_positions.in_units_of(unit.nanometers) new_oemol, new_system, new_positions, new_topology = openeye.createSystemFromSMILES( new_smiles, title="NEW") ffxml = forcefield_generators.generateForceFieldFromMolecules( [old_oemol, new_oemol]) old_oemol.SetTitle('MOL') new_oemol.SetTitle('MOL') old_topology = forcefield_generators.generateTopologyFromOEMol(old_oemol) new_topology = forcefield_generators.generateTopologyFromOEMol(new_oemol) if not vacuum: nonbonded_method = app.PME barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300.0 * unit.kelvin, 50) else: nonbonded_method = app.NoCutoff barostat = None forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield_kwargs = { 'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints': app.HBonds, 'hydrogenMass': 4 * unit.amus } periodic_forcefield_kwargs = {'nonbondedMethod': nonbonded_method} small_molecule_forcefield = 'gaff-2.11' system_generator = SystemGenerator( forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs=periodic_forcefield_kwargs, small_molecule_forcefield=small_molecule_forcefield, molecules=[ Molecule.from_openeye(mol) for mol in [old_oemol, new_oemol] ], cache=None) proposal_engine = SmallMoleculeSetProposalEngine([old_oemol, new_oemol], system_generator, residue_name='MOL', atom_expr=atom_expr, bond_expr=bond_expr, allow_ring_breaking=True) geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=1000, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=False) if not vacuum: #now to solvate modeller = app.Modeller(old_topology, old_positions) hs = [ atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name not in ['MOL', 'OLD', 'NEW'] ] modeller.delete(hs) modeller.addHydrogens(forcefield=system_generator.forcefield) modeller.addSolvent(system_generator.forcefield, model='tip3p', padding=9.0 * unit.angstroms) solvated_topology = modeller.getTopology() solvated_positions = modeller.getPositions() solvated_positions = unit.quantity.Quantity(value=np.array([ list(atom_pos) for atom_pos in solvated_positions.value_in_unit_system(unit.md_unit_system) ]), unit=unit.nanometers) solvated_system = system_generator.create_system(solvated_topology) #now to create proposal top_proposal = proposal_engine.propose( current_system=solvated_system, current_topology=solvated_topology, current_mol_id=0, proposed_mol_id=1) new_positions, _ = geometry_engine.propose(top_proposal, solvated_positions, beta) if render_atom_mapping: from perses.utils.smallmolecules import render_atom_mapping print( f"new_to_old: {proposal_engine.non_offset_new_to_old_atom_map}" ) render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol, new_oemol, proposal_engine.non_offset_new_to_old_atom_map) return top_proposal, solvated_positions, new_positions else: vacuum_system = system_generator.create_system(old_topology) top_proposal = proposal_engine.propose(current_system=vacuum_system, current_topology=old_topology, current_mol_id=0, proposed_mol_id=1) new_positions, _ = geometry_engine.propose(top_proposal, old_positions, beta) if render_atom_mapping: from perses.utils.smallmolecules import render_atom_mapping print(f"new_to_old: {top_proposal._new_to_old_atom_map}") render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol, new_oemol, top_proposal._new_to_old_atom_map) return top_proposal, old_positions, new_positions
class PDBLigandSystemBuilder: class Config(Config): __slots__ = ['pdb_file_name', 'ligand_file_name'] def update(self, k, v): self.__dict__[k] = v def __init__(self, config_dict): self.relax_ligand = config_dict['relax_ligand'] self.use_pdbfixer = config_dict['use_pdbfixer'] self.tempdir = None self.method = config_dict['method'] self.pdb_file_name = config_dict['pdb_file_name'] self.ligand_file_name = config_dict['ligand_file_name'] self.explicit = config_dict['explicit'] self.config_dict = config_dict def get_obj(self): return PDBLigandSystemBuilder(self) def __init__(self, config_: Config): self.config = config_ self.logger = make_message_writer(self.config.verbose, self.__class__.__name__) with self.logger("__init__") as logger: self.boxvec = None self.explicit = self.config.explicit self.system = None ofs = oechem.oemolistream(self.config.ligand_file_name) oemol = oechem.OEMol() oechem.OEReadMolecule(ofs, oemol) ofs.close() self.inital_ligand_smiles = oechem.OEMolToSmiles(oemol) self.params_written = 0 self.mol = Molecule.from_openeye(oemol, allow_undefined_stereo=True) fixer = PDBFixer(self.config.pdb_file_name) if self.config.use_pdbfixer: logger.log("Fixing with PDBFixer") fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keepWater=False) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) logger.log("Found missing residues: ", fixer.missingResidues) logger.log("Found missing terminals residues: ", fixer.missingTerminals) logger.log("Found missing atoms:", fixer.missingAtoms) logger.log("Found nonstandard residues:", fixer.nonstandardResidues) self.config.pdb_file_name = f"{self.config.tempdir(main_context=True)}/inital_fixed.pdb" with open(self.config.pdb_file_name, 'w') as f: app.PDBFile.writeFile(fixer.topology, fixer.positions, f) cmd.reinitialize() cmd.load(self.config.pdb_file_name) cmd.load(self.config.ligand_file_name, "UNL") cmd.alter("UNL", "resn='UNL'") cmd.save("{}".format(self.config.pdb_file_name)) def get_mobile(self): return len(self.pdb.positions) def __setup_system_ex_mm(self): with self.logger("__setup_system_ex_mm") as logger: if "openmm_system_generator" not in self.__dict__: amber_forcefields = ['amber/protein.ff14SB.xml', 'amber/phosaa10', 'amber/tip3p_standard.xml'] small_molecule_forcefield = 'openff-1.1.0' # small_molecule_forcefield = 'gaff-2.11' self.openmm_system_generator = SystemGenerator(forcefields=amber_forcefields, forcefield_kwargs=self.params, molecules=[self.mol], small_molecule_forcefield=small_molecule_forcefield, ) else: self.openmm_system_generator.add_molecules([self.mol]) self.modeller = app.Modeller(self.topology, self.positions) self.modeller.addSolvent(self.openmm_system_generator.forcefield, model='tip3p', ionicStrength=100 * unit.millimolar, padding=1.0 * unit.nanometers) self.boxvec = self.modeller.getTopology().getPeriodicBoxVectors() self.topology, self.positions = self.modeller.getTopology(), self.modeller.getPositions() self.system = self.openmm_system_generator.create_system(self.topology) self.system.setDefaultPeriodicBoxVectors(*self.modeller.getTopology().getPeriodicBoxVectors()) with open("{}".format(self.config.pdb_file_name), 'w') as f: app.PDBFile.writeFile(self.topology, self.positions, file=f, keepIds=True) logger.log("wrote ", "{}".format(self.config.pdb_file_name)) with open("{}".format(self.config.pdb_file_name), 'r') as f: self.pdb = app.PDBFile(f) return self.system, self.topology, self.positions def __setup_system_ex_amber(self, pdbfile: str = None): with self.logger("__setup_system_ex_amber") as logger: try: with tempfile.TemporaryDirectory() as dirpath: dirpath = self.config.tempdir() # Move inital file over to new system. shutil.copy(pdbfile, f"{dirpath}/init.pdb") # Assign charges and extract new ligand cmd.reinitialize() cmd.load(f'{dirpath}/init.pdb') cmd.remove("polymer") cmd.remove("resn HOH or resn Cl or resn Na") cmd.save(f'{dirpath}/lig.pdb') cmd.save(f'{dirpath}/lig.mol2') ifs = oechem.oemolistream(f'{dirpath}/lig.pdb') oemol = oechem.OEMol() oechem.OEReadMolecule(ifs, oemol) ifs.close() ofs = oechem.oemolostream() oemol.SetTitle("UNL") oechem.OEAddExplicitHydrogens(oemol) oequacpac.OEAssignCharges(oemol, oequacpac.OEAM1BCCCharges()) if ofs.open(f'{dirpath}/charged.mol2'): oechem.OEWriteMolecule(ofs, oemol) ofs.close() # remove hydrogens and ligand from PDB cmd.reinitialize() cmd.load(f'{dirpath}/init.pdb') cmd.remove("not polymer") cmd.remove("hydrogens") cmd.save(f'{dirpath}/apo.pdb') with working_directory(dirpath): subprocess.run( f'antechamber -i lig.pdb -fi pdb -o lig.mol2 -fo mol2 -pf y -an y -a charged.mol2 -fa mol2 -ao crg'.split( " "), check=True, capture_output=True) subprocess.run(f'parmchk2 -i lig.mol2 -f mol2 -o lig.frcmod'.split(" "), check=True, capture_output=True) try: subprocess.run('pdb4amber -i apo.pdb -o apo_new.pdb --reduce --dry'.split(" "), check=True, capture_output=True) except subprocess.CalledProcessError as e: logger.error("Known bug, pdb4amber returns error when there was no error", e.stdout, e.stderr) pass # Wrap tleap with open('leap.in', 'w+') as leap: leap.write("source leaprc.protein.ff14SB\n") leap.write("source leaprc.water.tip4pew\n") leap.write("source leaprc.phosaa10\n") leap.write("source leaprc.gaff2\n") leap.write("set default PBRadii mbondi3\n") leap.write("rec = loadPDB apo_new.pdb # May need full filepath?\n") leap.write("saveAmberParm rec apo.prmtop apo.inpcrd\n") leap.write("lig = loadmol2 lig.mol2\n") leap.write("loadAmberParams lig.frcmod\n") leap.write("saveAmberParm lig lig.prmtop lig.inpcrd\n") leap.write("com = combine {rec lig}\n") leap.write("saveAmberParm com us_com.prmtop us_com.inpcrd\n") leap.write("solvateBox com TIP4PEWBOX 12\n") leap.write("addions com Na+ 5\n") leap.write("addions com Cl- 5\n") leap.write("saveAmberParm com com.prmtop com.inpcrd\n") leap.write("quit\n") try: subprocess.run('tleap -f leap.in'.split(" "), check=True, capture_output=True) except subprocess.CalledProcessError as e: logger.error("tleap error", e.output.decode("UTF-8")) exit() prmtop = app.AmberPrmtopFile(f'com.prmtop') inpcrd = app.AmberInpcrdFile(f'com.inpcrd') for comp in ['us_com', 'com', 'apo', 'lig']: for ext in ['prmtop', 'inpcrd']: shutil.copy(f'{dirpath}/{comp}.{ext}', f"{self.config.tempdir()}/{comp}_{self.params_written}.{ext}") self.system = prmtop.createSystem(**self.params) if self.config.relax_ligand: mod_parms = copy.deepcopy(self.params) mod_parms['constraints'] = None self._unconstrained_system = prmtop.createSystem(**mod_parms) self.boxvec = self.system.getDefaultPeriodicBoxVectors() self.topology, self.positions = prmtop.topology, inpcrd.positions with open("{}".format(self.config.pdb_file_name), 'w') as f: app.PDBFile.writeFile(self.topology, self.positions, file=f, keepIds=True) logger.log("wrote ", "{}".format(self.config.pdb_file_name)) with open("{}".format(self.config.pdb_file_name), 'r') as f: self.pdb = app.PDBFile(f) self.params_written += 1 return self.system, self.topology, self.positions except Exception as e: logger.error("EXCEPTION CAUGHT BAD SPOT", e) def __setup_system_im(self, pdbfile: str = None): with self.logger("__setup_system_im") as logger: try: with tempfile.TemporaryDirectory() as dirpath: dirpath = self.config.tempdir() # Move inital file over to new system. shutil.copy(pdbfile, f"{dirpath}/init.pdb") # Assign charges and extract new ligand cmd.reinitialize() cmd.load(f'{dirpath}/init.pdb') cmd.remove("polymer") cmd.remove("resn HOH or resn Cl or resn Na") cmd.save(f'{dirpath}/lig.pdb') cmd.save(f'{dirpath}/lig.mol2') ifs = oechem.oemolistream(f'{dirpath}/lig.pdb') oemol = oechem.OEMol() oechem.OEReadMolecule(ifs, oemol) ifs.close() ofs = oechem.oemolostream() oemol.SetTitle("UNL") oechem.OEAddExplicitHydrogens(oemol) oequacpac.OEAssignCharges(oemol, oequacpac.OEAM1BCCCharges()) if ofs.open(f'{dirpath}/charged.mol2'): oechem.OEWriteMolecule(ofs, oemol) ofs.close() # remove hydrogens and ligand from PDB cmd.reinitialize() cmd.load(f'{dirpath}/init.pdb') cmd.remove("not polymer") cmd.remove("hydrogens") cmd.save(f'{dirpath}/apo.pdb') with working_directory(dirpath): subprocess.run( f'antechamber -i lig.pdb -fi pdb -o lig.mol2 -fo mol2 -pf y -an y -a charged.mol2 -fa mol2 -ao crg'.split( " "), check=True, capture_output=True) subprocess.run(f'parmchk2 -i lig.mol2 -f mol2 -o lig.frcmod'.split(" "), check=True, capture_output=True) try: subprocess.run('pdb4amber -i apo.pdb -o apo_new.pdb --reduce --dry'.split(" "), check=True, capture_output=True) except subprocess.CalledProcessError as e: logger.error("Known bug, pdb4amber returns error when there was no error", e.stdout, e.stderr) pass # Wrap tleap with open('leap.in', 'w+') as leap: leap.write("source leaprc.protein.ff14SBonlysc\n") leap.write("source leaprc.phosaa10\n") leap.write("source leaprc.gaff2\n") leap.write("set default PBRadii mbondi3\n") leap.write("rec = loadPDB apo_new.pdb # May need full filepath?\n") leap.write("saveAmberParm rec apo.prmtop apo.inpcrd\n") leap.write("lig = loadmol2 lig.mol2\n") leap.write("loadAmberParams lig.frcmod\n") leap.write("saveAmberParm lig lig.prmtop lig.inpcrd\n") leap.write("com = combine {rec lig}\n") leap.write("saveAmberParm com com.prmtop com.inpcrd\n") leap.write("quit\n") try: subprocess.run('tleap -f leap.in'.split(" "), check=True, capture_output=True) except subprocess.CalledProcessError as e: logger.error("tleap error", e.output.decode("UTF-8")) exit() prmtop = app.AmberPrmtopFile(f'com.prmtop') inpcrd = app.AmberInpcrdFile(f'com.inpcrd') for comp in ['com', 'apo', 'lig']: for ext in ['prmtop', 'inpcrd']: shutil.copy(f'{dirpath}/{comp}.{ext}', f"{self.config.tempdir()}/{comp}_{self.params_written}.{ext}") self.system = prmtop.createSystem(**self.params) if self.config.relax_ligand: mod_parms = copy.deepcopy(self.params) mod_parms['constraints'] = None self._unconstrained_system = prmtop.createSystem(**mod_parms) self.boxvec = self.system.getDefaultPeriodicBoxVectors() self.topology, self.positions = prmtop.topology, inpcrd.positions with open("{}".format(self.config.pdb_file_name), 'w') as f: app.PDBFile.writeFile(self.topology, self.positions, file=f, keepIds=True) logger.log("wrote ", "{}".format(self.config.pdb_file_name)) with open("{}".format(self.config.pdb_file_name), 'r') as f: self.pdb = app.PDBFile(f) self.params_written += 1 return self.system, self.topology, self.positions except Exception as e: logger.error("EXCEPTION CAUGHT BAD SPOT", e) def get_system(self, params): """ :param params: :return: """ with self.logger("get_system") as logger: self.params = params logger.log("Loading inital system", self.config.pdb_file_name) self.pdb = app.PDBFile(self.config.pdb_file_name) self.topology, self.positions = self.pdb.topology, self.pdb.positions if self.config.explicit and self.config.method == 'amber': self.system, self.topology, self.positions = self.__setup_system_ex_amber( pdbfile=self.config.pdb_file_name) elif self.config.explicit: self.system, self.topology, self.positions = self.__setup_system_ex_mm() else: self.system, self.topology, self.positions = self.__setup_system_im(pdbfile=self.config.pdb_file_name) return self.system def reload_system(self, ln: str, smis: oechem.OEMol, old_pdb: str, is_oe_already: bool = False): with self.logger("reload_system") as logger: logger.log("Loading {} with new smiles {}".format(old_pdb, ln)) with tempfile.TemporaryDirectory() as dirpath: ofs = oechem.oemolostream("{}/newlig.mol2".format(dirpath)) oechem.OEWriteMolecule(ofs, smis) ofs.close() cmd.reinitialize() cmd.load(old_pdb) cmd.remove("not polymer") cmd.load("{}/newlig.mol2".format(dirpath), "UNL") cmd.alter("UNL", "resn='UNL'") cmd.alter("UNL", "chain='A'") self.config.pdb_file_name = self.config.tempdir() + "reloaded.pdb" cmd.save(self.config.pdb_file_name) cmd.save(self.config.tempdir() + "apo.pdb") with open(self.config.pdb_file_name, 'r') as f: self.pdb = app.PDBFile(f) self.positions, self.topology = self.pdb.getPositions(), self.pdb.getTopology() if self.config.explicit and self.config.method == 'amber': self.system, self.topology, self.positions = self.__setup_system_ex_amber( pdbfile=self.config.pdb_file_name) elif self.config.explicit: self.system, self.topology, self.positions = self.__setup_system_ex_mm() else: self.system, self.topology, self.positions = self.__setup_system_im( pdbfile=self.config.pdb_file_name) return self.system def get_selection_ids(self, select_cmd): with tempfile.TemporaryDirectory() as dirname: with open(f'{dirname}/get_selection_ids.pdb', 'w') as f: app.PDBFile.writeFile(self.get_topology(), self.get_positions(), file=f) cmd.reinitialize() cmd.load(f'{dirname}/get_selection_ids.pdb', format='pdb') cmd.select("sele", select_cmd) stored.ids = list() cmd.iterate("sele", expression="stored.ids.append(ID)") ids = [int(i - 1) for i in list(stored.ids)] return ids def get_selection_solvent(self): ids = [i - 2 for i in self.get_selection_ids("not polymer and not (resn UNL)")] if len(ids) == 0: return [] if not ((min(ids) >= 0) and (max(ids) < len(self.positions))): self.logger.static_failure("get_selection_solvent", min(ids), max(ids), len(self.positions), exit_all=True) return ids def get_selection_ligand(self): ids = [i for i in self.get_selection_ids("resn UNL")] if len(ids) == 0: return [] if not ((min(ids) >= 0) and (max(ids) < len(self.positions))): self.logger.static_failure("get_selection_ligand", min(ids), max(ids), len(self.positions), exit_all=True) return ids def get_selection_protein(self): ids = self.get_selection_ids("polymer") if len(ids) == 0: return [] if not ((min(ids) >= 0) and (max(ids) < len(self.positions))): self.logger.static_failure("get_selection_protein", min(ids), max(ids), len(self.positions), exit_all=True) return ids def get_topology(self): return self.topology def get_positions(self): return self.positions
def prepare_simulation(molecule, basedir, save_openmm=False): """ Prepare simulation systems Parameters ---------- molecule : openeye.oechem.OEMol The molecule to set up basedir : str The base directory for docking/ and fah/ directories save_openmm : bool, optional, default=False If True, save gzipped OpenMM System, State, Integrator """ # Parameters from simtk import unit, openmm water_model = 'tip3p' solvent_padding = 10.0 * unit.angstrom box_size = openmm.vec3.Vec3(3.4,3.4,3.4)*unit.nanometers ionic_strength = 100 * unit.millimolar # 100 pressure = 1.0 * unit.atmospheres collision_rate = 1.0 / unit.picoseconds temperature = 300.0 * unit.kelvin timestep = 4.0 * unit.femtoseconds nsteps_per_iteration = 250 iterations = 10000 # 10 ns (covalent score) protein_forcefield = 'amber14/protein.ff14SB.xml' small_molecule_forcefield = 'openff-1.1.0' #small_molecule_forcefield = 'gaff-2.11' # only if you really like atomtypes solvation_forcefield = 'amber14/tip3p.xml' # Create SystemGenerators import os from simtk.openmm import app from openforcefield.topology import Molecule off_molecule = Molecule.from_openeye(molecule, allow_undefined_stereo=True) print(off_molecule) barostat = openmm.MonteCarloBarostat(pressure, temperature) # docking directory docking_basedir = os.path.join(basedir, 'docking') # gromacs directory gromacs_basedir = os.path.join(basedir, 'gromacs') os.makedirs(gromacs_basedir, exist_ok=True) # openmm directory openmm_basedir = os.path.join(basedir, 'openmm') os.makedirs(openmm_basedir, exist_ok=True) # Cache directory cache = os.path.join(openmm_basedir, f'{molecule.GetTitle()}.json') common_kwargs = {'removeCMMotion': False, 'ewaldErrorTolerance': 5e-04, 'nonbondedMethod': app.PME, 'hydrogenMass': 3.0*unit.amu} unconstrained_kwargs = {'constraints': None, 'rigidWater': False} constrained_kwargs = {'constraints': app.HBonds, 'rigidWater': True} forcefields = [protein_forcefield, solvation_forcefield] from openmmforcefields.generators import SystemGenerator parmed_system_generator = SystemGenerator(forcefields=forcefields, molecules=[off_molecule], small_molecule_forcefield=small_molecule_forcefield, cache=cache, barostat=barostat, forcefield_kwargs={**common_kwargs, **unconstrained_kwargs}) openmm_system_generator = SystemGenerator(forcefields=forcefields, molecules=[off_molecule], small_molecule_forcefield=small_molecule_forcefield, cache=cache, barostat=barostat, forcefield_kwargs={**common_kwargs, **constrained_kwargs}) # Prepare phases import os print(f'Setting up simulation for {molecule.GetTitle()}...') for phase in ['complex', 'ligand']: phase_name = f'{molecule.GetTitle()} - {phase}' print(phase_name) pdb_filename = os.path.join(docking_basedir, phase_name + '.pdb') gro_filename = os.path.join(gromacs_basedir, phase_name + '.gro') top_filename = os.path.join(gromacs_basedir, phase_name + '.top') system_xml_filename = os.path.join(openmm_basedir, phase_name+'.system.xml.gz') integrator_xml_filename = os.path.join(openmm_basedir, phase_name+'.integrator.xml.gz') state_xml_filename = os.path.join(openmm_basedir, phase_name+'.state.xml.gz') # Check if we can skip setup gromacs_files_exist = os.path.exists(gro_filename) and os.path.exists(top_filename) openmm_files_exist = os.path.exists(system_xml_filename) and os.path.exists(state_xml_filename) and os.path.exists(integrator_xml_filename) if gromacs_files_exist and (not save_openmm or openmm_files_exist): continue # Filter out UNK atoms by spruce with open(pdb_filename, 'r') as infile: lines = [ line for line in infile if 'UNK' not in line ] from io import StringIO pdbfile_stringio = StringIO(''.join(lines)) # Read the unsolvated system into an OpenMM Topology pdbfile = app.PDBFile(pdbfile_stringio) topology, positions = pdbfile.topology, pdbfile.positions # Add solvent print('Adding solvent...') modeller = app.Modeller(topology, positions) if phase == 'ligand': kwargs = {'boxSize' : box_size} else: kwargs = {'padding' : solvent_padding} modeller.addSolvent(openmm_system_generator.forcefield, model='tip3p', ionicStrength=ionic_strength, **kwargs) # Create an OpenMM system system = openmm_system_generator.create_system(modeller.topology) # If monitoring covalent distance, add an unused force warheads_found = find_warheads(molecule) covalent = (len(warheads_found) > 0) if covalent and phase=='complex': # Find warhead atom indices sulfur_atom_index = None for atom in topology.atoms(): if (atom.residue.name == 'CYS') and (atom.residue.id == '145') and (atom.name == 'SG'): sulfur_atom_index = atom.index break if sulfur_atom_index is None: raise Exception('CYS145 SG atom cannot be found') print('Adding CustomCVForces...') custom_cv_force = openmm.CustomCVForce('0') for warhead_type, warhead_atom_index in warheads_found.items(): distance_force = openmm.CustomBondForce('r') distance_force.setUsesPeriodicBoundaryConditions(True) distance_force.addBond(sulfur_atom_index, warhead_atom_index, []) custom_cv_force.addCollectiveVariable(warhead_type, distance_force) force_index = system.addForce(custom_cv_force) # Create OpenM Context platform = openmm.Platform.getPlatformByName('CUDA') platform.setPropertyDefaultValue('Precision', 'mixed') integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep) context = openmm.Context(system, integrator, platform) context.setPositions(modeller.positions) # Report initial potential energy state = context.getState(getEnergy=True) print(f'{molecule.GetTitle()} {phase} : Initial potential energy is {state.getPotentialEnergy()/unit.kilocalories_per_mole:.3f} kcal/mol') # Minimize print('Minimizing...') openmm.LocalEnergyMinimizer.minimize(context) # Equilibrate print('Equilibrating...') from tqdm import tqdm import numpy as np distances = np.zeros([iterations], np.float32) for iteration in tqdm(range(iterations)): integrator.step(nsteps_per_iteration) if covalent and phase=='complex': # Get distance in Angstroms distances[iteration] = min(custom_cv_force.getCollectiveVariableValues(context)[:]) * 10 # Retrieve state state = context.getState(getPositions=True, getVelocities=True, getEnergy=True, getForces=True) system.setDefaultPeriodicBoxVectors(*state.getPeriodicBoxVectors()) modeller.topology.setPeriodicBoxVectors(state.getPeriodicBoxVectors()) print(f'{molecule.GetTitle()} {phase} : Final potential energy is {state.getPotentialEnergy()/unit.kilocalories_per_mole:.3f} kcal/mol') # Remove CustomCVForce if covalent and phase=='complex': print('Removing CustomCVForce...') system.removeForce(force_index) from pymbar.timeseries import detectEquilibration t0, g, Neff = detectEquilibration(distances) distances = distances[t0:] distance_min = distances.min() distance_mean = distances.mean() distance_stddev = distances.std() oechem.OESetSDData(molecule, 'covalent_distance_min', str(distance_min)) oechem.OESetSDData(molecule, 'covalent_distance_mean', str(distance_mean)) oechem.OESetSDData(molecule, 'covalent_distance_stddev', str(distance_stddev)) print(f'Covalent distance: mean {distance_mean:.3f} A : stddev {distance_stddev:.3f} A') # Save as OpenMM if save_openmm: print('Saving as OpenMM...') import gzip with gzip.open(integrator_xml_filename, 'wt') as f: f.write(openmm.XmlSerializer.serialize(integrator)) with gzip.open(state_xml_filename,'wt') as f: f.write(openmm.XmlSerializer.serialize(state)) with gzip.open(system_xml_filename,'wt') as f: f.write(openmm.XmlSerializer.serialize(system)) with gzip.open(os.path.join(openmm_basedir, phase_name+'-explicit.pdb.gz'), 'wt') as f: app.PDBFile.writeFile(modeller.topology, state.getPositions(), f) with gzip.open(os.path.join(openmm_basedir, phase_name+'-solute.pdb.gz'), 'wt') as f: import mdtraj mdtraj_topology = mdtraj.Topology.from_openmm(modeller.topology) mdtraj_trajectory = mdtraj.Trajectory([state.getPositions(asNumpy=True) / unit.nanometers], mdtraj_topology) selection = mdtraj_topology.select('not water') mdtraj_trajectory = mdtraj_trajectory.atom_slice(selection) app.PDBFile.writeFile(mdtraj_trajectory.topology.to_openmm(), mdtraj_trajectory.openmm_positions(0), f) # Convert to gromacs via ParmEd print('Saving as gromacs...') import parmed parmed_system = parmed_system_generator.create_system(modeller.topology) #parmed_system.setDefaultPeriodicBoxVectors(*state.getPeriodicBoxVectors()) structure = parmed.openmm.load_topology(modeller.topology, parmed_system, xyz=state.getPositions(asNumpy=True)) structure.save(gro_filename, overwrite=True) structure.save(top_filename, overwrite=True)
print('Processing', pdb_in, 'to', pdb_out) fixer = PDBFixer(filename=pdb_in) fixer.findMissingResidues() fixer.findMissingAtoms() fixer.findNonstandardResidues() print('Residues:', fixer.missingResidues) print('Atoms:', fixer.missingAtoms) print('Terminals:', fixer.missingTerminals) print('Non-standard:', fixer.nonstandardResidues) fixer.addMissingAtoms() fixer.addMissingHydrogens(7.4) fixer.removeHeterogens(False) with open(pdb_out + '_fixed.pdb', 'w') as outfile: PDBFile.writeFile(fixer.topology, fixer.positions, file=outfile, keepIds=True) system_generator = SystemGenerator(forcefields=['amber/ff14SB.xml']) system = system_generator.create_system(fixer.topology) integrator = LangevinIntegrator(300 * unit.kelvin, 1 / unit.picosecond, 0.002 * unit.picoseconds) simulation = Simulation(fixer.topology, system, integrator) simulation.context.setPositions(fixer.positions) print('Minimising') simulation.minimizeEnergy() # write out the minimised PDB with open(pdb_out + '_minimised.pdb', 'w') as outfile: PDBFile.writeFile(fixer.topology, simulation.context.getState(getPositions=True, enforcePeriodicBox=False).getPositions(), file=outfile, keepIds=True) print('Done')
class PointMutationExecutor(object): """ Simple, stripped-down class to create a protein-ligand system and allow a mutation of a protein. this will allow support for the creation of _two_ relative free energy calculations: 1. 'wildtype' - 'point mutant' complex hybrid. 2. 'wildtype' - 'point mutant' receptor hybrid (i.e. with ligand of interest unbound) Example (create full point mutation executor and run parallel tempering on both complex and apo phases): receptor_path = 'data/perses_jacs_systems/thrombin/Thrombin_protein.pdb' ligands_path = 'data/perses_jacs_systems/thrombin/Thrombin_ligands.sdf' receptor_filename = resource_filename('openmmforcefields', receptor_path) ligand_filename = resource_filename('openmmforcefields', ligands_path) pm_delivery = PointMutationExecutor(receptor_filename = receptor_filename, ligand_filename = ligand_filename, mutation_chain_id = '2', mutation_residue_id = '198', proposed_residue = 'THR', phase = 'complex', conduct_endstate_validation = False, ligand_index = 0, forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'], barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50), forcefield_kwargs = {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.PME, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}, small_molecule_forcefields = 'gaff-2.11') complex_htf = pm_delivery.get_complex_htf() apo_htf = pm_delivery.get_apo_htf() #now we can build the hybrid repex samplers from perses.annihilation.lambda_protocol import LambdaProtocol from openmmtools.multistate import MultiStateReporter from perses.samplers.multistate import HybridRepexSampler from openmmtools import mcmc suffix = 'run'; selection = 'not water'; checkpoint_interval = 10; n_states = 11; n_cycles = 5000 for htf in [complex_htf, apo_htf]: lambda_protocol = LambdaProtocol(functions='default') reporter_file = pkl[:-3]+suffix+'.nc' reporter = MultiStateReporter(reporter_file, analysis_particle_indices = htf.hybrid_topology.select(selection), checkpoint_interval = checkpoint_interval) hss = HybridRepexSampler(mcmc_moves=mcmc.LangevinSplittingDynamicsMove(timestep= 4.0 * unit.femtoseconds, collision_rate=5.0 / unit.picosecond, n_steps=250, reassign_velocities=False, n_restart_attempts=20, splitting="V R R R O R R R V", constraint_tolerance=1e-06), hybrid_factory=htf, online_analysis_interval=10) hss.setup(n_states=n_states, temperature=300*unit.kelvin, storage_file = reporter, lambda_protocol = lambda_protocol, endstates=False) hss.extend(n_cycles) """ def __init__( self, receptor_filename, ligand_filename, mutation_chain_id, mutation_residue_id, proposed_residue, phase='complex', conduct_endstate_validation=False, ligand_index=0, forcefield_files=[ 'amber14/protein.ff14SB.xml', 'amber14/tip3p.xml' ], barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50), forcefield_kwargs={ 'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.PME, 'constraints': app.HBonds, 'hydrogenMass': 4 * unit.amus }, small_molecule_forcefields='gaff-2.11', **kwargs): """ arguments receptor_filename : str path to receptor; .pdb ligand_filename : str path to ligand of interest; .sdf or .pdb mutation_chain_id : str name of the chain to be mutated mutation_residue_id : str residue id to change proposed_residue : str three letter code of the residue to mutate to phase : str, default complex if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p conduct_endstate_validation : bool, default True whether to conduct an endstate validation of the hybrid topology factory ligand_index : int, default 0 which ligand to use forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield files for proteins and solvent barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50) barostat to use forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.NoCutoff, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus} forcefield kwargs for system parametrization small_molecule_forcefields : str, default 'gaff-2.11' the forcefield string for small molecule parametrization TODO : allow argument for separate apo structure if it exists separately allow argument for specator ligands besides the 'ligand_filename' """ from openforcefield.topology import Molecule from openmmforcefields.generators import SystemGenerator # first thing to do is make a complex and apo... pdbfile = open(receptor_filename, 'r') pdb = app.PDBFile(pdbfile) pdbfile.close() receptor_positions, receptor_topology, receptor_md_topology = pdb.positions, pdb.topology, md.Topology.from_openmm( pdb.topology) receptor_topology = receptor_md_topology.to_openmm() receptor_n_atoms = receptor_md_topology.n_atoms molecules = [] ligand_mol = createOEMolFromSDF(ligand_filename, index=ligand_index) ligand_mol = generate_unique_atom_names(ligand_mol) molecules.append( Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False)) ligand_positions, ligand_topology = extractPositionsFromOEMol( ligand_mol), forcefield_generators.generateTopologyFromOEMol( ligand_mol) ligand_md_topology = md.Topology.from_openmm(ligand_topology) ligand_n_atoms = ligand_md_topology.n_atoms #now create a complex complex_md_topology = receptor_md_topology.join(ligand_md_topology) complex_topology = complex_md_topology.to_openmm() complex_positions = unit.Quantity(np.zeros( [receptor_n_atoms + ligand_n_atoms, 3]), unit=unit.nanometers) complex_positions[:receptor_n_atoms, :] = receptor_positions complex_positions[receptor_n_atoms:, :] = ligand_positions #now for a system_generator self.system_generator = SystemGenerator( forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, small_molecule_forcefield=small_molecule_forcefields, molecules=molecules, cache=None) #create complex and apo inputs... complex_topology, complex_positions, complex_system = self._solvate( complex_topology, complex_positions, 'tip3p', phase=phase) apo_topology, apo_positions, apo_system = self._solvate( receptor_topology, receptor_positions, 'tip3p', phase='phase') geometry_engine = FFAllAngleGeometryEngine( metadata=None, use_sterics=False, n_bond_divisions=100, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=False, use_14_nonbondeds=True) #run pipeline... htfs = [] for (top, pos, sys) in zip([complex_topology, apo_topology], [complex_positions, apo_positions], [complex_system, apo_system]): point_mutation_engine = PointMutationEngine( wildtype_topology=top, system_generator=self.system_generator, chain_id= mutation_chain_id, #denote the chain id allowed to mutate (it's always a string variable) max_point_mutants=1, residues_allowed_to_mutate=[ mutation_residue_id ], #the residue ids allowed to mutate allowed_mutations=[ (mutation_residue_id, proposed_residue) ], #the residue ids allowed to mutate with the three-letter code allowed to change aggregate=True) #always allow aggregation topology_proposal = point_mutation_engine.propose(sys, top) new_positions, logp_proposal = geometry_engine.propose( topology_proposal, pos, beta) logp_reverse = geometry_engine.logp_reverse( topology_proposal, new_positions, pos, beta) forward_htf = HybridTopologyFactory( topology_proposal=topology_proposal, current_positions=pos, new_positions=new_positions, use_dispersion_correction=False, functions=None, softcore_alpha=None, bond_softening_constant=1.0, angle_softening_constant=1.0, soften_only_new=False, neglected_new_angle_terms=[], neglected_old_angle_terms=[], softcore_LJ_v2=True, softcore_electrostatics=True, softcore_LJ_v2_alpha=0.85, softcore_electrostatics_alpha=0.3, softcore_sigma_Q=1.0, interpolate_old_and_new_14s=False, omitted_terms=None) if not topology_proposal.unique_new_atoms: assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})" assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})" vacuum_added_valence_energy = 0.0 else: added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential if not topology_proposal.unique_old_atoms: assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})" assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})" subtracted_valence_energy = 0.0 else: subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential if conduct_endstate_validation: zero_state_error, one_state_error = validate_endstate_energies( forward_htf._topology_proposal, forward_htf, added_valence_energy, subtracted_valence_energy, beta=beta, ENERGY_THRESHOLD=ENERGY_THRESHOLD) else: pass htfs.append(forward_htf) self.complex_htf = htfs[0] self.apo_htf = htfs[1] def get_complex_htf(self): return copy.deepcopy(self.complex_htf) def get_apo_htf(self): return copy.deepcopy(self.apo_htf) def _solvate(self, topology, positions, model, phase): """ Generate a solvated topology, positions, and system for a given input topology and positions. For generating the system, the forcefield files provided in the constructor will be used. Parameters ---------- topology : app.Topology Topology of the system to solvate positions : [n, 3] ndarray of Quantity nm the positions of the unsolvated system forcefield : SystemGenerator.forcefield forcefield file of solvent to add model : str, default 'tip3p' solvent model to use for solvation Returns ------- solvated_topology : app.Topology Topology of the system with added waters solvated_positions : [n + 3(n_waters), 3] ndarray of Quantity nm Solvated positions solvated_system : openmm.System The parameterized system, containing a barostat if one was specified. """ from pdbfixer import PDBFixer from simtk.openmm.app import PDBFile import os modeller = app.Modeller(topology, positions) #now we have to add missing atoms if phase != 'vacuum': modeller.addSolvent(self.system_generator.forcefield, model=model, padding=1.0 * unit.nanometers, ionicStrength=0.15 * unit.molar) else: pass solvated_topology = modeller.getTopology() solvated_positions = modeller.getPositions() # canonicalize the solvated positions: turn tuples into np.array solvated_positions = unit.quantity.Quantity(value=np.array([ list(atom_pos) for atom_pos in solvated_positions.value_in_unit_system(unit.md_unit_system) ]), unit=unit.nanometers) solvated_system = self.system_generator.create_system( solvated_topology) return solvated_topology, solvated_positions, solvated_system
print('System has %d atoms' % modeller.topology.getNumAtoms()) # Solvate print('Adding solvent...') # we use the 'padding' option to define the periodic box. The PDB file does not contain any # unit cell information so we just create a box that has a 10A padding around the complex. modeller.addSolvent(system_generator.forcefield, model='tip3p', padding=10.0 * unit.angstroms) print('System has %d atoms' % modeller.topology.getNumAtoms()) with open(output_complex, 'w') as outfile: PDBFile.writeFile(modeller.topology, modeller.positions, outfile) # Create the system using the SystemGenerator system = system_generator.create_system(modeller.topology, molecules=ligand_mol) integrator = LangevinIntegrator(temperature, 1 / unit.picosecond, 0.002 * unit.picoseconds) system.addForce( openmm.MonteCarloBarostat(1 * unit.atmospheres, temperature, 25)) print('Default Periodic box:', system.getDefaultPeriodicBoxVectors()) simulation = Simulation(modeller.topology, system, integrator, platform=platform) context = simulation.context context.setPositions(modeller.positions) print('Minimising ...') simulation.minimizeEnergy()
def _generate_openmm_system(self, molecule: "offtop.Molecule", method: str, keywords: Dict = None) -> "openmm.System": """ Generate an OpenMM System object from the input molecule method and basis. """ from openmmforcefields.generators import SystemGenerator from simtk.openmm import app from simtk import unit # create a hash based on the input options hashstring = molecule.to_smiles( isomeric=True, explicit_hydrogens=True, mapped=True) + method for value in keywords.values(): hashstring += str(value) key = hashlib.sha256(hashstring.encode()).hexdigest() # now look for the system? if key in self._CACHE: system = self._get_cache(key) else: # make the system from the inputs # set up available options for openmm _constraint_types = { "hbonds": app.HBonds, "allbonds": app.AllBonds, "hangles": app.HAngles } _periodic_nonbond_types = { "ljpme": app.LJPME, "pme": app.PME, "ewald": app.Ewald } _non_periodic_nonbond_types = { "nocutoff": app.NoCutoff, "cutoffnonperiodic": app.CutoffNonPeriodic } if "constraints" in keywords: constraints = keywords["constraints"] try: forcefield_kwargs = { "constraints": _constraint_types[constraints.lower()] } except (KeyError, AttributeError): raise ValueError( f"constraint '{constraints}' not supported, valid constraints are {_constraint_types.keys()}" ) else: forcefield_kwargs = None nonbondedmethod = keywords.get("nonbondedMethod", None) if nonbondedmethod is not None: if nonbondedmethod.lower() in _periodic_nonbond_types: periodic_forcefield_kwargs = { "nonbondedMethod": _periodic_nonbond_types[nonbondedmethod.lower()] } nonperiodic_forcefield_kwargs = None elif nonbondedmethod.lower() in _non_periodic_nonbond_types: periodic_forcefield_kwargs = None nonperiodic_forcefield_kwargs = { "nonbondedMethod": _non_periodic_nonbond_types[nonbondedmethod.lower()] } else: raise ValueError( f"nonbondedmethod '{nonbondedmethod}' not supported, valid nonbonded methods are periodic: {_periodic_nonbond_types.keys()}" f" or non_periodic: {_non_periodic_nonbond_types.keys()}." ) else: periodic_forcefield_kwargs = None nonperiodic_forcefield_kwargs = None # now start the system generator system_generator = SystemGenerator( small_molecule_forcefield=method, forcefield_kwargs=forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, periodic_forcefield_kwargs=periodic_forcefield_kwargs, ) topology = molecule.to_topology() system = system_generator.create_system( topology=topology.to_openmm(), molecules=[molecule]) self._cache_it(key, system) return system
def setup_fah_run(destination_path, protein_pdb_filename, oemol=None, cache=None, restrain_rmsd=False): """ Prepare simulation Parameters ---------- destination_path : str The path to the RUN to be created protein_pdb_filename : str Path to protein PDB file oemol : openeye.oechem.OEMol, optional, default=None The molecule to parameterize, with SDData attached If None, don't include the small molecule restrain_rmsd : bool, optional, default=False If True, restrain RMSD during first equilibration phase """ # Parameters from simtk import unit, openmm protein_forcefield = 'amber14/protein.ff14SB.xml' solvent_forcefield = 'amber14/tip3p.xml' small_molecule_forcefield = 'openff-1.2.0' water_model = 'tip3p' solvent_padding = 10.0 * unit.angstrom ionic_strength = 70 * unit.millimolar # assay buffer: 20 mM HEPES pH 7.3, 1 mM TCEP, 50 mM NaCl, 0.01% Tween-20, 10% glycerol pressure = 1.0 * unit.atmospheres collision_rate = 1.0 / unit.picoseconds temperature = 300.0 * unit.kelvin timestep = 4.0 * unit.femtoseconds iterations = 1000 # 1 ns equilibration nsteps_per_iteration = 250 # Prepare phases import os system_xml_filename = os.path.join(destination_path, 'system.xml.bz2') integrator_xml_filename = os.path.join(destination_path, 'integrator.xml.bz2') state_xml_filename = os.path.join(destination_path, 'state.xml.bz2') # Check if we can skip setup openmm_files_exist = os.path.exists( system_xml_filename) and os.path.exists( state_xml_filename) and os.path.exists(integrator_xml_filename) if openmm_files_exist: return # Create barostat barostat = openmm.MonteCarloBarostat(pressure, temperature) # Create RUN directory if it does not yet exist os.makedirs(destination_path, exist_ok=True) # Load any molecule(s) molecule = None if oemol is not None: from openforcefield.topology import Molecule molecule = Molecule.from_openeye(oemol, allow_undefined_stereo=True) molecule.name = 'MOL' # Ensure residue is MOL print([res for res in molecule.to_topology().to_openmm().residues()]) # Create SystemGenerator import os from simtk.openmm import app forcefield_kwargs = { 'removeCMMotion': False, 'hydrogenMass': 3.0 * unit.amu, 'constraints': app.HBonds, 'rigidWater': True } periodic_kwargs = { 'nonbondedMethod': app.PME, 'ewaldErrorTolerance': 2.5e-04 } forcefields = [protein_forcefield, solvent_forcefield] from openmmforcefields.generators import SystemGenerator openmm_system_generator = SystemGenerator( forcefields=forcefields, molecules=molecule, small_molecule_forcefield=small_molecule_forcefield, cache=cache, barostat=barostat, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs=periodic_kwargs) # Read protein print(f'Reading protein from {protein_pdb_filename}...') pdbfile = app.PDBFile(protein_pdb_filename) modeller = app.Modeller(pdbfile.topology, pdbfile.positions) if oemol is not None: # Add small molecule to the system modeller.add(molecule.to_topology().to_openmm(), molecule.conformers[0]) # DEBUG : Check residue name with open(os.path.join(destination_path, 'initial-complex.pdb'), 'wt') as outfile: app.PDBFile.writeFile(modeller.topology, modeller.positions, outfile) # Add solvent print('Adding solvent...') kwargs = {'padding': solvent_padding} modeller.addSolvent(openmm_system_generator.forcefield, model='tip3p', ionicStrength=ionic_strength, **kwargs) # Create an OpenMM system print('Creating OpenMM system...') system = openmm_system_generator.create_system(modeller.topology) # Add a virtual bond between protein and ligand to make sure they are not imaged separately if oemol is not None: import mdtraj as md mdtop = md.Topology.from_openmm( modeller.topology) # excludes solvent and ions for res in mdtop.residues: print(res) protein_atom_indices = mdtop.select( '(protein and name CA)') # protein CA atoms ligand_atom_indices = mdtop.select( '((resname MOL) and (mass > 1))') # ligand heavy atoms protein_atom_index = int(protein_atom_indices[0]) ligand_atom_index = int(ligand_atom_indices[0]) force = openmm.CustomBondForce('0') force.addBond(protein_atom_index, ligand_atom_index, []) system.addForce(force) # Add RMSD restraints if requested if restrain_rmsd: print('Adding RMSD restraint...') kB = unit.AVOGADRO_CONSTANT_NA * unit.BOLTZMANN_CONSTANT_kB kT = kB * temperature import mdtraj as md mdtop = md.Topology.from_openmm( pdbfile.topology) # excludes solvent and ions #heavy_atom_indices = mdtop.select('mass > 1') # heavy solute atoms rmsd_atom_indices = mdtop.select( '(protein and (name CA)) or ((resname MOL) and (mass > 1))' ) # CA atoms and ligand heavy atoms rmsd_atom_indices = [int(index) for index in rmsd_atom_indices] custom_cv_force = openmm.CustomCVForce('(K_RMSD/2)*RMSD^2') custom_cv_force.addGlobalParameter('K_RMSD', kT / unit.angstrom**2) rmsd_force = openmm.RMSDForce(modeller.positions, rmsd_atom_indices) custom_cv_force.addCollectiveVariable('RMSD', rmsd_force) force_index = system.addForce(custom_cv_force) # Create OpenM Context platform = openmm.Platform.getPlatformByName('OpenCL') platform.setPropertyDefaultValue('Precision', 'mixed') from openmmtools import integrators integrator = integrators.LangevinIntegrator(temperature, collision_rate, timestep) context = openmm.Context(system, integrator, platform) context.setPositions(modeller.positions) # Report initial potential energy state = context.getState(getEnergy=True) print( f'Initial potential energy is {state.getPotentialEnergy()/unit.kilocalories_per_mole:.3f} kcal/mol' ) # Store snapshots in MDTraj trajectory to examine RMSD import mdtraj as md import numpy as np mdtop = md.Topology.from_openmm(pdbfile.topology) atom_indices = mdtop.select('all') # all solute atoms protein_atom_indices = mdtop.select( 'protein and (mass > 1)') # heavy solute atoms if oemol is not None: ligand_atom_indices = mdtop.select( '(resname MOL) and (mass > 1)') # ligand heavy atoms trajectory = md.Trajectory( np.zeros([iterations + 1, len(atom_indices), 3], np.float32), mdtop) trajectory.xyz[0, :, :] = context.getState(getPositions=True).getPositions( asNumpy=True)[atom_indices] / unit.nanometers # Minimize print('Minimizing...') openmm.LocalEnergyMinimizer.minimize(context) # Equilibrate (with RMSD restraint if needed) import numpy as np from rich.progress import track import time initial_time = time.time() for iteration in track(range(iterations), 'Equilibrating...'): integrator.step(nsteps_per_iteration) trajectory.xyz[iteration + 1, :, :] = context.getState( getPositions=True).getPositions( asNumpy=True)[atom_indices] / unit.nanometers elapsed_time = (time.time() - initial_time) * unit.seconds ns_per_day = (context.getState().getTime() / elapsed_time) / (unit.nanoseconds / unit.day) print(f'Performance: {ns_per_day:8.3f} ns/day') if restrain_rmsd: # Disable RMSD restraint context.setParameter('K_RMSD', 0.0) print('Minimizing...') openmm.LocalEnergyMinimizer.minimize(context) for iteration in track(range(iterations), 'Equilibrating without RMSD restraint...'): integrator.step(nsteps_per_iteration) # Retrieve state state = context.getState(getPositions=True, getVelocities=True, getEnergy=True, getForces=True) system.setDefaultPeriodicBoxVectors(*state.getPeriodicBoxVectors()) modeller.topology.setPeriodicBoxVectors(state.getPeriodicBoxVectors()) print( f'Final potential energy is {state.getPotentialEnergy()/unit.kilocalories_per_mole:.3f} kcal/mol' ) # Equilibrate again if we restrained the RMSD if restrain_rmsd: print('Removing RMSD restraint from system...') system.removeForce(force_index) #if oemol is not None: # # Check final RMSD # print('checking RMSD...') # trajectory.superpose(trajectory, atom_indices=protein_atom_indices) # protein_rmsd = md.rmsd(trajectory, trajectory[-1], atom_indices=protein_atom_indices)[-1] * 10 # Angstroms # oechem.OESetSDData(oemol, 'equil_protein_rmsd', f'{protein_rmsd:.2f} A') # ligand_rmsd = md.rmsd(trajectory, trajectory[-1], atom_indices=ligand_atom_indices)[-1] * 10 # Angstroms # oechem.OESetSDData(oemol, 'equil_ligand_rmsd', f'{ligand_rmsd:.2f} A') # print('RMSD after equilibration: protein {protein_rmsd:8.2f} A | ligand {ligand_rmsd:8.3f} A') # Save as OpenMM print('Exporting for OpenMM FAH simulation...') import bz2 with bz2.open(integrator_xml_filename, 'wt') as f: f.write(openmm.XmlSerializer.serialize(integrator)) with bz2.open(state_xml_filename, 'wt') as f: f.write(openmm.XmlSerializer.serialize(state)) with bz2.open(system_xml_filename, 'wt') as f: f.write(openmm.XmlSerializer.serialize(system)) with bz2.open(os.path.join(destination_path, 'equilibrated-all.pdb.gz'), 'wt') as f: app.PDBFile.writeFile(modeller.topology, state.getPositions(), f) with open(os.path.join(destination_path, 'equilibrated-solute.pdb'), 'wt') as f: import mdtraj mdtraj_topology = mdtraj.Topology.from_openmm(modeller.topology) mdtraj_trajectory = mdtraj.Trajectory( [state.getPositions(asNumpy=True) / unit.nanometers], mdtraj_topology) selection = mdtraj_topology.select('not water') mdtraj_trajectory = mdtraj_trajectory.atom_slice(selection) app.PDBFile.writeFile(mdtraj_trajectory.topology.to_openmm(), mdtraj_trajectory.openmm_positions(0), f) if oemol is not None: # Write molecule as SDF, SMILES, and mol2 for extension in ['sdf', 'mol2', 'smi', 'csv']: filename = os.path.join(destination_path, f'molecule.{extension}') with oechem.oemolostream(filename) as ofs: oechem.OEWriteMolecule(ofs, oemol) # Clean up del context, integrator
class PointMutationExecutor(object): """ Simple, stripped-down class to create a protein-ligand system and allow a mutation of a protein. this will allow support for the creation of _two_ relative free energy calculations: 1. 'wildtype' - 'point mutant' complex hybrid. 2. 'wildtype' - 'point mutant' protein hybrid (i.e. with ligand of interest unbound) Example (create full point mutation executor and run parallel tempering on both complex and apo phases): from pkg_resources import resource_filename protein_path = 'data/perses_jacs_systems/thrombin/Thrombin_protein.pdb' ligands_path = 'data/perses_jacs_systems/thrombin/Thrombin_ligands.sdf' protein_filename = resource_filename('openmmforcefields', protein_path) ligand_input = resource_filename('openmmforcefields', ligands_path) pm_delivery = PointMutationExecutor(protein_filename=protein_filename, mutation_chain_id='2', mutation_residue_id='198', proposed_residue='THR', phase='complex', conduct_endstate_validation=False, ligand_input=ligand_input, ligand_index=0, forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'], barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50), forcefield_kwargs={'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.PME, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}, small_molecule_forcefields='gaff-2.11') complex_htf = pm_delivery.get_complex_htf() apo_htf = pm_delivery.get_apo_htf() # Now we can build the hybrid repex samplers from perses.annihilation.lambda_protocol import LambdaProtocol from openmmtools.multistate import MultiStateReporter from perses.samplers.multistate import HybridRepexSampler from openmmtools import mcmc suffix = 'run'; selection = 'not water'; checkpoint_interval = 10; n_states = 11; n_cycles = 5000 for htf in [complex_htf, apo_htf]: lambda_protocol = LambdaProtocol(functions='default') reporter_file = 'reporter.nc' reporter = MultiStateReporter(reporter_file, analysis_particle_indices = htf.hybrid_topology.select(selection), checkpoint_interval = checkpoint_interval) hss = HybridRepexSampler(mcmc_moves=mcmc.LangevinSplittingDynamicsMove(timestep= 4.0 * unit.femtoseconds, collision_rate=5.0 / unit.picosecond, n_steps=250, reassign_velocities=False, n_restart_attempts=20, splitting="V R R R O R R R V", constraint_tolerance=1e-06), hybrid_factory=htf, online_analysis_interval=10) hss.setup(n_states=n_states, temperature=300*unit.kelvin, storage_file=reporter, lambda_protocol=lambda_protocol, endstates=False) hss.extend(n_cycles) """ def __init__(self, protein_filename, mutation_chain_id, mutation_residue_id, proposed_residue, phase='complex', conduct_endstate_validation=True, ligand_input=None, ligand_index=0, water_model='tip3p', ionic_strength=0.15 * unit.molar, forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'], barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50), forcefield_kwargs={'removeCMMotion': False, 'ewaldErrorTolerance': 0.00025, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}, periodic_forcefield_kwargs={'nonbondedMethod': app.PME}, nonperiodic_forcefield_kwargs=None, small_molecule_forcefields='gaff-2.11', complex_box_dimensions=None, apo_box_dimensions=None, flatten_torsions=False, flatten_exceptions=False, repartitioned_endstate=None, **kwargs): """ arguments protein_filename : str path to protein (to mutate); .pdb mutation_chain_id : str name of the chain to be mutated mutation_residue_id : str residue id to change proposed_residue : str three letter code of the residue to mutate to phase : str, default complex if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p conduct_endstate_validation : bool, default True whether to conduct an endstate validation of the HybridTopologyFactory. If using the RepartitionedHybridTopologyFactory, endstate validation cannot and will not be conducted. ligand_file : str, default None path to ligand of interest (i.e. small molecule or protein); .sdf or .pdb ligand_index : int, default 0 which ligand to use water_model : str, default 'tip3p' solvent model to use for solvation ionic_strength : float * unit.molar, default 0.15 * unit.molar the total concentration of ions (both positive and negative) to add using Modeller. This does not include ions that are added to neutralize the system. Note that only monovalent ions are currently supported. forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield files for proteins and solvent barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50) barostat to use forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus} forcefield kwargs for system parametrization periodic_forcefield_kwargs : dict, default {'nonbondedMethod': app.PME} periodic forcefield kwargs for system parametrization nonperiodic_forcefield_kwargs : dict, default None non-periodic forcefield kwargs for system parametrization small_molecule_forcefields : str, default 'gaff-2.11' the forcefield string for small molecule parametrization complex_box_dimensions : Vec3, default None define box dimensions of complex phase; if None, padding is 1nm apo_box_dimensions : Vec3, default None define box dimensions of apo phase phase; if None, padding is 1nm flatten_torsions : bool, default False in the htf, flatten torsions involving unique new atoms at lambda = 0 and unique old atoms are lambda = 1 flatten_exceptions : bool, default False in the htf, flatten exceptions involving unique new atoms at lambda = 0 and unique old atoms at lambda = 1 repartitioned_endstate : int, default None the endstate (0 or 1) at which to build the RepartitionedHybridTopologyFactory. By default, this is None, meaning a vanilla HybridTopologyFactory will be built. TODO : allow argument for spectator ligands besides the 'ligand_file' """ # First thing to do is load the apo protein to mutate... protein_pdbfile = open(protein_filename, 'r') protein_pdb = app.PDBFile(protein_pdbfile) protein_pdbfile.close() protein_positions, protein_topology, protein_md_topology = protein_pdb.positions, protein_pdb.topology, md.Topology.from_openmm(protein_pdb.topology) protein_topology = protein_md_topology.to_openmm() protein_n_atoms = protein_md_topology.n_atoms # Load the ligand, if present molecules = [] if ligand_input: if isinstance(ligand_input, str): if ligand_input.endswith('.sdf'): # small molecule ligand_mol = createOEMolFromSDF(ligand_input, index=ligand_index) molecules.append(Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False)) ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_mol), forcefield_generators.generateTopologyFromOEMol(ligand_mol) ligand_md_topology = md.Topology.from_openmm(ligand_topology) ligand_n_atoms = ligand_md_topology.n_atoms if ligand_input.endswith('pdb'): # protein ligand_pdbfile = open(ligand_input, 'r') ligand_pdb = app.PDBFile(ligand_pdbfile) ligand_pdbfile.close() ligand_positions, ligand_topology, ligand_md_topology = ligand_pdb.positions, ligand_pdb.topology, md.Topology.from_openmm( ligand_pdb.topology) ligand_n_atoms = ligand_md_topology.n_atoms elif isinstance(ligand_input, oechem.OEMol): # oemol object molecules.append(Molecule.from_openeye(ligand_input, allow_undefined_stereo=False)) ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_input), forcefield_generators.generateTopologyFromOEMol(ligand_input) ligand_md_topology = md.Topology.from_openmm(ligand_topology) ligand_n_atoms = ligand_md_topology.n_atoms else: _logger.warning(f'ligand filetype not recognised. Please provide a path to a .pdb or .sdf file') return # Now create a complex complex_md_topology = protein_md_topology.join(ligand_md_topology) complex_topology = complex_md_topology.to_openmm() complex_positions = unit.Quantity(np.zeros([protein_n_atoms + ligand_n_atoms, 3]), unit=unit.nanometers) complex_positions[:protein_n_atoms, :] = protein_positions complex_positions[protein_n_atoms:, :] = ligand_positions # Now for a system_generator self.system_generator = SystemGenerator(forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs=periodic_forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, small_molecule_forcefield=small_molecule_forcefields, molecules=molecules, cache=None) # Solvate apo and complex... apo_input = list(self._solvate(protein_topology, protein_positions, water_model, phase, ionic_strength, apo_box_dimensions)) inputs = [apo_input] if ligand_input: inputs.append(self._solvate(complex_topology, complex_positions, water_model, phase, ionic_strength, complex_box_dimensions)) geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=100, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles = False, use_14_nonbondeds = True) # Run pipeline... htfs = [] for (top, pos, sys) in inputs: point_mutation_engine = PointMutationEngine(wildtype_topology=top, system_generator=self.system_generator, chain_id=mutation_chain_id, # Denote the chain id allowed to mutate (it's always a string variable) max_point_mutants=1, residues_allowed_to_mutate=[mutation_residue_id], # The residue ids allowed to mutate allowed_mutations=[(mutation_residue_id, proposed_residue)], # The residue ids allowed to mutate with the three-letter code allowed to change aggregate=True) # Always allow aggregation topology_proposal = point_mutation_engine.propose(sys, top) # Only validate energy bookkeeping if the WT and proposed residues do not involve rings old_res = [res for res in top.residues() if res.id == mutation_residue_id][0] validate_bool = False if old_res.name in ring_amino_acids or proposed_residue in ring_amino_acids else True new_positions, logp_proposal = geometry_engine.propose(topology_proposal, pos, beta, validate_energy_bookkeeping=validate_bool) logp_reverse = geometry_engine.logp_reverse(topology_proposal, new_positions, pos, beta, validate_energy_bookkeeping=validate_bool) if repartitioned_endstate is None: factory = HybridTopologyFactory elif repartitioned_endstate in [0, 1]: factory = RepartitionedHybridTopologyFactory forward_htf = factory(topology_proposal=topology_proposal, current_positions=pos, new_positions=new_positions, use_dispersion_correction=False, functions=None, softcore_alpha=None, bond_softening_constant=1.0, angle_softening_constant=1.0, soften_only_new=False, neglected_new_angle_terms=[], neglected_old_angle_terms=[], softcore_LJ_v2=True, softcore_electrostatics=True, softcore_LJ_v2_alpha=0.85, softcore_electrostatics_alpha=0.3, softcore_sigma_Q=1.0, interpolate_old_and_new_14s=flatten_exceptions, omitted_terms=None, endstate=repartitioned_endstate, flatten_torsions=flatten_torsions) if not topology_proposal.unique_new_atoms: assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})" assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})" else: added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential if not topology_proposal.unique_old_atoms: assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})" assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})" subtracted_valence_energy = 0.0 else: subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential if conduct_endstate_validation and repartitioned_endstate is None: zero_state_error, one_state_error = validate_endstate_energies(forward_htf._topology_proposal, forward_htf, added_valence_energy, subtracted_valence_energy, beta=beta, ENERGY_THRESHOLD=ENERGY_THRESHOLD) if zero_state_error > ENERGY_THRESHOLD: _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 0 state is above the threshold ({ENERGY_THRESHOLD}): {zero_state_error}") if one_state_error > ENERGY_THRESHOLD: _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 1 state is above the threshold ({ENERGY_THRESHOLD}): {one_state_error}") else: pass htfs.append(forward_htf) self.apo_htf = htfs[0] self.complex_htf = htfs[1] if ligand_input else None def get_complex_htf(self): return self.complex_htf def get_apo_htf(self): return self.apo_htf def _solvate(self, topology, positions, water_model, phase, ionic_strength, box_dimensions=None): """ Generate a solvated topology, positions, and system for a given input topology and positions. For generating the system, the forcefield files provided in the constructor will be used. Parameters ---------- topology : app.Topology Topology of the system to solvate positions : [n, 3] ndarray of Quantity nm the positions of the unsolvated system forcefield : SystemGenerator.forcefield forcefield file of solvent to add water_model : str solvent model to use for solvation phase : str if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p ionic_strength : float * unit.molar the total concentration of ions (both positive and negative) to add using Modeller. This does not include ions that are added to neutralize the system. Note that only monovalent ions are currently supported. Returns ------- solvated_topology : app.Topology Topology of the system with added waters solvated_positions : [n + 3(n_waters), 3] ndarray of Quantity nm Solvated positions solvated_system : openmm.System The parameterized system, containing a barostat if one was specified. """ modeller = app.Modeller(topology, positions) # Now we have to add missing atoms if phase != 'vacuum': _logger.info(f"solvating at {ionic_strength} using {water_model}") if not box_dimensions: modeller.addSolvent(self.system_generator.forcefield, model=water_model, padding=0.9 * unit.nanometers, ionicStrength=ionic_strength) else: modeller.addSolvent(self.system_generator.forcefield, model=water_model, boxSize=box_dimensions, ionicStrength=ionic_strength) else: pass solvated_topology = modeller.getTopology() if box_dimensions: solvated_topology.setUnitCellDimensions(box_dimensions) solvated_positions = modeller.getPositions() # Canonicalize the solvated positions: turn tuples into np.array solvated_positions = unit.quantity.Quantity(value=np.array([list(atom_pos) for atom_pos in solvated_positions.value_in_unit_system(unit.md_unit_system)]), unit=unit.nanometers) solvated_system = self.system_generator.create_system(solvated_topology) return solvated_topology, solvated_positions, solvated_system
def get_coulomb_force( g, forcefield="gaff-1.81", ): # parameterize topology topology = g.mol.to_topology().to_openmm() generator = SystemGenerator( small_molecule_forcefield=forcefield, molecules=[g.mol], forcefield_kwargs={ "constraints": None, "removeCMMotion": False }, ) # create openmm system system = generator.create_system(topology, ) # get forces forces = list(system.getForces()) for force in forces: name = force.__class__.__name__ if "Nonbonded" in name: force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff) # use langevin integrator, although it's not super useful here integrator = openmm.VerletIntegrator(0.0) # create simulation simulation = Simulation(topology=topology, system=system, integrator=integrator) # the snapshots xs = (Quantity( g.nodes["n1"].data["xyz"].detach().numpy(), esp.units.DISTANCE_UNIT, ).value_in_unit(unit.nanometer).transpose((1, 0, 2))) # loop through the snapshots energies = [] derivatives = [] for x in xs: simulation.context.setPositions(x) state = simulation.context.getState( getEnergy=True, getParameters=True, getForces=True, ) energy = state.getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT, ) derivative = state.getForces(asNumpy=True).value_in_unit( esp.units.FORCE_UNIT, ) energies.append(energy) derivatives.append(derivative) # put energies to a tensor energies = torch.tensor( energies, dtype=torch.get_default_dtype(), ).flatten()[None, :] derivatives = torch.tensor( np.stack(derivatives, axis=1), dtype=torch.get_default_dtype(), ) # loop through forces forces = list(system.getForces()) for force in forces: name = force.__class__.__name__ if "Nonbonded" in name: force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff) for idx in range(force.getNumParticles()): q, sigma, epsilon = force.getParticleParameters(idx) force.setParticleParameters(idx, 0.0, sigma, epsilon) for idx in range(force.getNumExceptions()): idx0, idx1, q, sigma, epsilon = force.getExceptionParameters( idx) force.setExceptionParameters(idx, idx0, idx1, 0.0, sigma, epsilon) force.updateParametersInContext(simulation.context) # the snapshots xs = (Quantity( g.nodes["n1"].data["xyz"].detach().numpy(), esp.units.DISTANCE_UNIT, ).value_in_unit(unit.nanometer).transpose((1, 0, 2))) # loop through the snapshots new_energies = [] new_derivatives = [] for x in xs: simulation.context.setPositions(x) state = simulation.context.getState( getEnergy=True, getParameters=True, getForces=True, ) energy = state.getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT, ) derivative = state.getForces(asNumpy=True).value_in_unit( esp.units.FORCE_UNIT, ) new_energies.append(energy) new_derivatives.append(derivative) # put energies to a tensor new_energies = torch.tensor( new_energies, dtype=torch.get_default_dtype(), ).flatten()[None, :] new_derivatives = torch.tensor( np.stack(new_derivatives, axis=1), dtype=torch.get_default_dtype(), ) return energies - new_energies, derivatives - new_derivatives
} system_generator = SystemGenerator( forcefields=[protein_forcefield, solvation_forcefield], barostat=barostat, forcefield_kwargs=forcefield_kwargs, molecules=[ligand], small_molecule_forcefield=small_molecule_forcefield) modeller = app.Modeller(complex_structure.topology, complex_structure.positions) modeller.addSolvent(system_generator.forcefield, model='tip3p', padding=solvent_padding, ionicStrength=ionic_strength) system = system_generator.create_system(modeller.topology) solvated_structure = parmed.openmm.load_topology(modeller.topology, system, xyz=modeller.positions) integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep) with open(f'{output_prefix}/integrator.xml', 'w') as f: f.write(openmm.XmlSerializer.serialize(integrator)) # minimize and equilibrate print('Minimizing...') platform = openmm.Platform.getPlatformByName('CUDA') platform.setPropertyDefaultValue('CudaDeviceIndex', '0') context = openmm.Context(system, integrator, platform) context.setPositions(modeller.positions)
def compare_energies(mol_name="naphthalene", ref_mol_name="benzene", atom_expression=['Hybridization'], bond_expression=['Hybridization']): """ Make an atom map where the molecule at either lambda endpoint is identical, and check that the energies are also the same. """ from openmmtools.constants import kB from openmmtools import alchemy, states from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine from perses.annihilation.relative import HybridTopologyFactory from perses.rjmc.geometry import FFAllAngleGeometryEngine import simtk.openmm as openmm from perses.utils.openeye import iupac_to_oemol, extractPositionsFromOEMol, generate_conformers from perses.utils.openeye import generate_expression from openmmforcefields.generators import SystemGenerator from openmoltools.forcefield_generators import generateTopologyFromOEMol from perses.tests.utils import validate_endstate_energies temperature = 300 * unit.kelvin # Compute kT and inverse temperature. kT = kB * temperature beta = 1.0 / kT ENERGY_THRESHOLD = 1e-6 atom_expr, bond_expr = generate_expression( atom_expression), generate_expression(bond_expression) mol = iupac_to_oemol(mol_name) mol = generate_conformers(mol, max_confs=1) refmol = iupac_to_oemol(ref_mol_name) refmol = generate_conformers(refmol, max_confs=1) from openforcefield.topology import Molecule molecules = [Molecule.from_openeye(oemol) for oemol in [refmol, mol]] barostat = None forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield_kwargs = { 'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.NoCutoff, 'constraints': app.HBonds, 'hydrogenMass': 4 * unit.amus } system_generator = SystemGenerator(forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, small_molecule_forcefield='gaff-2.11', molecules=molecules, cache=None) topology = generateTopologyFromOEMol(refmol) system = system_generator.create_system(topology) positions = extractPositionsFromOEMol(refmol) proposal_engine = SmallMoleculeSetProposalEngine([refmol, mol], system_generator) proposal = proposal_engine.propose(system, topology, atom_expr=atom_expr, bond_expr=bond_expr) geometry_engine = FFAllAngleGeometryEngine() new_positions, _ = geometry_engine.propose( proposal, positions, beta=beta, validate_energy_bookkeeping=False) _ = geometry_engine.logp_reverse(proposal, new_positions, positions, beta) #make a topology proposal with the appropriate data: factory = HybridTopologyFactory(proposal, positions, new_positions) if not proposal.unique_new_atoms: assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})" assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})" vacuum_added_valence_energy = 0.0 else: added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential if not proposal.unique_old_atoms: assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})" assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})" subtracted_valence_energy = 0.0 else: subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential zero_state_error, one_state_error = validate_endstate_energies( factory._topology_proposal, factory, added_valence_energy, subtracted_valence_energy, beta=1.0 / (kB * temperature), ENERGY_THRESHOLD=ENERGY_THRESHOLD, platform=openmm.Platform.getPlatformByName('Reference')) return factory