def generate_vacuum_hybrid_topology(mol_name="naphthalene", ref_mol_name="benzene"): from topology_proposal import SmallMoleculeSetProposalEngine, TopologyProposal import simtk.openmm.app as app from openmoltools import forcefield_generators from perses.tests.utils import createOEMolFromIUPAC, createSystemFromIUPAC, get_data_filename m, unsolv_old_system, pos_old, top_old = createSystemFromIUPAC(mol_name) refmol = createOEMolFromIUPAC(ref_mol_name) initial_smiles = oechem.OEMolToSmiles(m) final_smiles = oechem.OEMolToSmiles(refmol) gaff_xml_filename = get_data_filename("data/gaff.xml") forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator) solvated_system = forcefield.createSystem(top_old) gaff_filename = get_data_filename('data/gaff.xml') system_generator = SystemGenerator([gaff_filename, 'amber99sbildn.xml', 'tip3p.xml']) geometry_engine = FFAllAngleGeometryEngine() proposal_engine = SmallMoleculeSetProposalEngine( [initial_smiles, final_smiles], system_generator, residue_name=mol_name) #generate topology proposal topology_proposal = proposal_engine.propose(solvated_system, top_old) #generate new positions with geometry engine new_positions, _ = geometry_engine.propose(topology_proposal, pos_old, beta) return topology_proposal, pos_old, new_positions
def run_rj_proposals(top_prop, configuration_traj, use_sterics, ncmc_nsteps, n_replicates, box_vectors, temperature=300.0*unit.kelvin): ncmc_engine = NCMCEngine(nsteps=ncmc_nsteps, pressure=1.0*unit.atmosphere) geometry_engine = FFAllAngleGeometryEngine(use_sterics=use_sterics) initial_thermodynamic_state = states.ThermodynamicState(top_prop.old_system, temperature=temperature, pressure=1.0*unit.atmosphere) final_thermodynamic_state = states.ThermodynamicState(top_prop.new_system, temperature=temperature, pressure=1.0*unit.atmosphere) traj_indices = np.arange(0, configuration_traj.n_frames) results = np.zeros([n_replicates, 7]) beta = 1.0 / (temperature * constants.kB) for i in tqdm.trange(n_replicates): frame_index = np.random.choice(traj_indices) initial_sampler_state = traj_frame_to_sampler_state(configuration_traj, frame_index,box_vectors) initial_logP = - compute_reduced_potential(initial_thermodynamic_state, initial_sampler_state) proposed_geometry, logP_geometry_forward = geometry_engine.propose(top_prop, initial_sampler_state.positions, beta) proposed_sampler_state = states.SamplerState(proposed_geometry, box_vectors=initial_sampler_state.box_vectors) final_old_sampler_state, final_sampler_state, logP_work, initial_hybrid_logP, final_hybrid_logP = ncmc_engine.integrate(top_prop, initial_sampler_state, proposed_sampler_state) final_logP = - compute_reduced_potential(final_thermodynamic_state, final_sampler_state) logP_reverse = geometry_engine.logp_reverse(top_prop, final_sampler_state.positions, final_old_sampler_state.positions, beta) results[i, 0] = initial_logP results[i, 1] = logP_reverse results[i, 2] = final_logP results[i, 3] = logP_work results[i, 4] = initial_hybrid_logP results[i, 5] = final_hybrid_logP results[i, 6] = logP_geometry_forward return results
def run_rj_proposals(top_prop, configuration_traj, use_sterics, ncmc_nsteps, n_replicates, bond_softening_constant=1.0, angle_softening_constant=1.0): ncmc_engine = NCMCEngine(nsteps=ncmc_nsteps, pressure=1.0*unit.atmosphere, bond_softening_constant=bond_softening_constant, angle_softening_constant=angle_softening_constant) geometry_engine = FFAllAngleGeometryEngine(use_sterics=use_sterics, bond_softening_constant=bond_softening_constant, angle_softening_constant=angle_softening_constant) initial_thermodynamic_state = states.ThermodynamicState(top_prop.old_system, temperature=temperature, pressure=1.0*unit.atmosphere) final_thermodynamic_state = states.ThermodynamicState(top_prop.new_system, temperature=temperature, pressure=1.0*unit.atmosphere) traj_indices = np.arange(0, configuration_traj.n_frames) results = np.zeros([n_replicates, 4]) for i in tqdm.trange(n_replicates): frame_index = np.random.choice(traj_indices) initial_sampler_state = traj_frame_to_sampler_state(configuration_traj, frame_index) initial_logP = - compute_reduced_potential(initial_thermodynamic_state, initial_sampler_state) proposed_geometry, logP_geometry_forward = geometry_engine.propose(top_prop, initial_sampler_state.positions, beta) proposed_sampler_state = states.SamplerState(proposed_geometry, box_vectors=initial_sampler_state.box_vectors) final_old_sampler_state, final_sampler_state, logP_work, initial_hybrid_logP, final_hybrid_logP = ncmc_engine.integrate(top_prop, initial_sampler_state, proposed_sampler_state) final_logP = - compute_reduced_potential(final_thermodynamic_state, final_sampler_state) logP_reverse = geometry_engine.logp_reverse(top_prop, final_sampler_state.positions, final_old_sampler_state.positions, beta) results[i, 0] = initial_hybrid_logP - initial_logP results[i, 1] = logP_reverse - logP_geometry_forward results[i, 2] = final_logP - final_hybrid_logP results[i, 3] = logP_work return results
def generate_top_pos_sys(topology, old_oemol, new_oemol, system, positions, system_generator, map_strength): """generate point mutation engine, geometry_engine, and conduct topology proposal, geometry propsal, and hybrid factory generation""" #create the point mutation engine print(f"generating point mutation engine") proposal_engine = SmallMoleculeSetProposalEngine(['CCCCO', 'CCCCS'], system_generator, map_strength=map_strength, residue_name='MOL') #create a geometry engine print(f"generating geometry engine") geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=100, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=False, use_14_nonbondeds=False) #create a top proposal print(f"making topology proposal") topology_proposal = proposal_engine.propose(system, topology, old_oemol, new_oemol) #make a geometry proposal forward print(f"making geometry proposal") forward_new_positions, logp_proposal = geometry_engine.propose( topology_proposal, positions, beta) #create a hybrid topology factory f"making forward hybridtopologyfactory" forward_htf = HybridTopologyFactory(topology_proposal=topology_proposal, current_positions=positions, new_positions=forward_new_positions, use_dispersion_correction=False, functions=None, softcore_alpha=None, bond_softening_constant=1.0, angle_softening_constant=1.0, soften_only_new=False, neglected_new_angle_terms=[], neglected_old_angle_terms=[], softcore_LJ_v2=True, softcore_electrostatics=True, softcore_LJ_v2_alpha=0.85, softcore_electrostatics_alpha=0.3, softcore_sigma_Q=1.0, interpolate_old_and_new_14s=False, omitted_terms=None) return topology_proposal, forward_new_positions, forward_htf
def run_rj_simple_system(self, configurations_initial, topology_proposal, n_replicates): """ Function to execute reversibje jump MC Arguments --------- configurations_initial: openmm.Quantity n_replicate frames of equilibrium simulation of initial system topology_proposal: dict perses.topology_proposal object n_replicates: int number of replicates to simulate Returns ------- logPs: numpy ndarray shape = (n_replicates, 4) where logPs[i] = (reduced potential of initial molecule, log proposal probability, reversed log proposal probability, reduced potential of proposed molecule) final_positions: list list of openmm position objects for final molecule proposal """ import tqdm from perses.rjmc.geometry import FFAllAngleGeometryEngine final_positions = [] logPs = np.zeros([n_replicates, 4]) _geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=1000, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles = True) for _replicate_idx in tqdm.trange(n_replicates): _old_positions = configurations_initial[_replicate_idx, :, :] _new_positions, _lp = _geometry_engine.propose(topology_proposal, _old_positions, beta) _lp_reverse = _geometry_engine.logp_reverse(topology_proposal, _new_positions, _old_positions, beta) _initial_rp = self.compute_rp(topology_proposal.old_system, _old_positions) if not topology_proposal.unique_old_atoms: #the geometry engine doesn't run the backward proposal logPs[_replicate_idx, 0] = _geometry_engine.forward_atoms_with_positions_reduced_potential logPs[_replicate_idx, 3] = _geometry_engine.forward_final_context_reduced_potential elif not topology_proposal.unique_new_atoms: #the geometry engine doesn't run forward logPs[_replicate_idx, 0] = _geometry_engine.reverse_final_context_reduced_potential logPs[_replicate_idx, 3] = _geometry_engine.reverse_atoms_with_positions_reduced_potential else: logPs[_replicate_idx, 0] = _geometry_engine.reverse_final_context_reduced_potential logPs[_replicate_idx, 3] = _geometry_engine.forward_final_context_reduced_potential logPs[_replicate_idx, 1] = _lp logPs[_replicate_idx, 2] = _lp_reverse final_rp = self.compute_rp(topology_proposal.new_system, _new_positions) final_positions.append(_new_positions) return logPs, final_positions
def __init__(self, sampler, topology, state_key, proposal_engine, log_weights=None, scheme='ncmc-geometry-ncmc', options=dict(), platform=None): """ Create an expanded ensemble sampler. p(x,k) \propto \exp[-u_k(x) + g_k] where g_k is the log weight. Parameters ---------- sampler : MCMCSampler MCMCSampler initialized with current SamplerState topology : simtk.openmm.app.Topology Current topology state : hashable object Current chemical state proposal_engine : ProposalEngine ProposalEngine to use for proposing new chemical states log_weights : dict of object : float Log weights to use for expanded ensemble biases. scheme : str, optional, default='ncmc-geometry-ncmc' Update scheme. One of ['ncmc-geometry-ncmc', 'geometry-ncmc-geometry', 'geometry-ncmc'] options : dict, optional, default=dict() Options for initializing switching scheme, such as 'timestep', 'nsteps', 'functions' for NCMC platform : simtk.openmm.Platform, optional, default=None Platform to use for NCMC switching. If `None`, default (fastest) platform is used. """ # Keep copies of initializing arguments. # TODO: Make deep copies? self.sampler = sampler self.topology = topology self.state_key = state_key self.proposal_engine = proposal_engine self.log_weights = log_weights self.scheme = scheme if self.log_weights is None: self.log_weights = dict() # Initialize self.iteration = 0 option_names = ['timestep', 'nsteps', 'functions'] for option_name in option_names: if option_name not in options: options[option_name] = None from perses.annihilation.ncmc_switching import NCMCEngine self.ncmc_engine = NCMCEngine(temperature=self.sampler.thermodynamic_state.temperature, timestep=options['timestep'], nsteps=options['nsteps'], functions=options['functions'], platform=platform) from perses.rjmc.geometry import FFAllAngleGeometryEngine self.geometry_engine = FFAllAngleGeometryEngine({'data': 0}) self.naccepted = 0 self.nrejected = 0 self.number_of_state_visits = dict() self.verbose = False self.pdbfile = None # if not None, write PDB file self.geometry_pdbfile = None # if not None, write PDB file of geometry proposals self.accept_everything = False # if True, will accept anything that doesn't lead to NaNs
def run_proposals(proposal_list): """ Run a list of geometry proposal namedtuples, checking if they render NaN energies Parameters ---------- proposal_list : list of namedtuple """ import logging logging.basicConfig(level=logging.DEBUG) import time start_time = time.time() from perses.rjmc.geometry import FFAllAngleGeometryEngine geometry_engine = FFAllAngleGeometryEngine() for proposal in proposal_list: current_time = time.time() #print("proposing") top_proposal = proposal.topology_proposal current_positions = proposal.current_positions new_positions, logp = geometry_engine.propose(top_proposal, current_positions, beta) #print("Proposal time is %s" % str(time.time()-current_time)) integrator = openmm.VerletIntegrator(1*unit.femtoseconds) platform = openmm.Platform.getPlatformByName("Reference") context = openmm.Context(top_proposal.new_system, integrator, platform) context.setPositions(new_positions) state = context.getState(getEnergy=True) potential = state.getPotentialEnergy() potential_without_units = potential / potential.unit #print(str(potential)) #print(" ") #print(' ') #print(" ") if np.isnan(potential_without_units): print("NanN potential!") if np.isnan(logp): print("logp is nan") del context, integrator
def compare_energies(mol_name="naphthalene", ref_mol_name="benzene", atom_expression=['Hybridization'], bond_expression=['Hybridization']): """ Make an atom map where the molecule at either lambda endpoint is identical, and check that the energies are also the same. """ from openmmtools.constants import kB from openmmtools import alchemy, states from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine from perses.annihilation.relative import HybridTopologyFactory from perses.rjmc.geometry import FFAllAngleGeometryEngine import simtk.openmm as openmm from perses.utils.openeye import iupac_to_oemol, extractPositionsFromOEMol, generate_conformers from perses.utils.openeye import generate_expression from openmmforcefields.generators import SystemGenerator from openmoltools.forcefield_generators import generateTopologyFromOEMol from perses.tests.utils import validate_endstate_energies temperature = 300 * unit.kelvin # Compute kT and inverse temperature. kT = kB * temperature beta = 1.0 / kT ENERGY_THRESHOLD = 1e-6 atom_expr, bond_expr = generate_expression( atom_expression), generate_expression(bond_expression) mol = iupac_to_oemol(mol_name) mol = generate_conformers(mol, max_confs=1) refmol = iupac_to_oemol(ref_mol_name) refmol = generate_conformers(refmol, max_confs=1) from openforcefield.topology import Molecule molecules = [Molecule.from_openeye(oemol) for oemol in [refmol, mol]] barostat = None forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield_kwargs = { 'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.NoCutoff, 'constraints': app.HBonds, 'hydrogenMass': 4 * unit.amus } system_generator = SystemGenerator(forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, small_molecule_forcefield='gaff-2.11', molecules=molecules, cache=None) topology = generateTopologyFromOEMol(refmol) system = system_generator.create_system(topology) positions = extractPositionsFromOEMol(refmol) proposal_engine = SmallMoleculeSetProposalEngine([refmol, mol], system_generator) proposal = proposal_engine.propose(system, topology, atom_expr=atom_expr, bond_expr=bond_expr) geometry_engine = FFAllAngleGeometryEngine() new_positions, _ = geometry_engine.propose( proposal, positions, beta=beta, validate_energy_bookkeeping=False) _ = geometry_engine.logp_reverse(proposal, new_positions, positions, beta) #make a topology proposal with the appropriate data: factory = HybridTopologyFactory(proposal, positions, new_positions) if not proposal.unique_new_atoms: assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})" assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})" vacuum_added_valence_energy = 0.0 else: added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential if not proposal.unique_old_atoms: assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})" assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})" subtracted_valence_energy = 0.0 else: subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential zero_state_error, one_state_error = validate_endstate_energies( factory._topology_proposal, factory, added_valence_energy, subtracted_valence_energy, beta=1.0 / (kB * temperature), ENERGY_THRESHOLD=ENERGY_THRESHOLD, platform=openmm.Platform.getPlatformByName('Reference')) return factory
def __init__( self, receptor_filename, ligand_filename, mutation_chain_id, mutation_residue_id, proposed_residue, phase='complex', conduct_endstate_validation=False, ligand_index=0, forcefield_files=[ 'amber14/protein.ff14SB.xml', 'amber14/tip3p.xml' ], barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50), forcefield_kwargs={ 'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.PME, 'constraints': app.HBonds, 'hydrogenMass': 4 * unit.amus }, small_molecule_forcefields='gaff-2.11', **kwargs): """ arguments receptor_filename : str path to receptor; .pdb ligand_filename : str path to ligand of interest; .sdf or .pdb mutation_chain_id : str name of the chain to be mutated mutation_residue_id : str residue id to change proposed_residue : str three letter code of the residue to mutate to phase : str, default complex if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p conduct_endstate_validation : bool, default True whether to conduct an endstate validation of the hybrid topology factory ligand_index : int, default 0 which ligand to use forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield files for proteins and solvent barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50) barostat to use forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.NoCutoff, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus} forcefield kwargs for system parametrization small_molecule_forcefields : str, default 'gaff-2.11' the forcefield string for small molecule parametrization TODO : allow argument for separate apo structure if it exists separately allow argument for specator ligands besides the 'ligand_filename' """ from openforcefield.topology import Molecule from openmmforcefields.generators import SystemGenerator # first thing to do is make a complex and apo... pdbfile = open(receptor_filename, 'r') pdb = app.PDBFile(pdbfile) pdbfile.close() receptor_positions, receptor_topology, receptor_md_topology = pdb.positions, pdb.topology, md.Topology.from_openmm( pdb.topology) receptor_topology = receptor_md_topology.to_openmm() receptor_n_atoms = receptor_md_topology.n_atoms molecules = [] ligand_mol = createOEMolFromSDF(ligand_filename, index=ligand_index) ligand_mol = generate_unique_atom_names(ligand_mol) molecules.append( Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False)) ligand_positions, ligand_topology = extractPositionsFromOEMol( ligand_mol), forcefield_generators.generateTopologyFromOEMol( ligand_mol) ligand_md_topology = md.Topology.from_openmm(ligand_topology) ligand_n_atoms = ligand_md_topology.n_atoms #now create a complex complex_md_topology = receptor_md_topology.join(ligand_md_topology) complex_topology = complex_md_topology.to_openmm() complex_positions = unit.Quantity(np.zeros( [receptor_n_atoms + ligand_n_atoms, 3]), unit=unit.nanometers) complex_positions[:receptor_n_atoms, :] = receptor_positions complex_positions[receptor_n_atoms:, :] = ligand_positions #now for a system_generator self.system_generator = SystemGenerator( forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, small_molecule_forcefield=small_molecule_forcefields, molecules=molecules, cache=None) #create complex and apo inputs... complex_topology, complex_positions, complex_system = self._solvate( complex_topology, complex_positions, 'tip3p', phase=phase) apo_topology, apo_positions, apo_system = self._solvate( receptor_topology, receptor_positions, 'tip3p', phase='phase') geometry_engine = FFAllAngleGeometryEngine( metadata=None, use_sterics=False, n_bond_divisions=100, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=False, use_14_nonbondeds=True) #run pipeline... htfs = [] for (top, pos, sys) in zip([complex_topology, apo_topology], [complex_positions, apo_positions], [complex_system, apo_system]): point_mutation_engine = PointMutationEngine( wildtype_topology=top, system_generator=self.system_generator, chain_id= mutation_chain_id, #denote the chain id allowed to mutate (it's always a string variable) max_point_mutants=1, residues_allowed_to_mutate=[ mutation_residue_id ], #the residue ids allowed to mutate allowed_mutations=[ (mutation_residue_id, proposed_residue) ], #the residue ids allowed to mutate with the three-letter code allowed to change aggregate=True) #always allow aggregation topology_proposal = point_mutation_engine.propose(sys, top) new_positions, logp_proposal = geometry_engine.propose( topology_proposal, pos, beta) logp_reverse = geometry_engine.logp_reverse( topology_proposal, new_positions, pos, beta) forward_htf = HybridTopologyFactory( topology_proposal=topology_proposal, current_positions=pos, new_positions=new_positions, use_dispersion_correction=False, functions=None, softcore_alpha=None, bond_softening_constant=1.0, angle_softening_constant=1.0, soften_only_new=False, neglected_new_angle_terms=[], neglected_old_angle_terms=[], softcore_LJ_v2=True, softcore_electrostatics=True, softcore_LJ_v2_alpha=0.85, softcore_electrostatics_alpha=0.3, softcore_sigma_Q=1.0, interpolate_old_and_new_14s=False, omitted_terms=None) if not topology_proposal.unique_new_atoms: assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})" assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})" vacuum_added_valence_energy = 0.0 else: added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential if not topology_proposal.unique_old_atoms: assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})" assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})" subtracted_valence_energy = 0.0 else: subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential if conduct_endstate_validation: zero_state_error, one_state_error = validate_endstate_energies( forward_htf._topology_proposal, forward_htf, added_valence_energy, subtracted_valence_energy, beta=beta, ENERGY_THRESHOLD=ENERGY_THRESHOLD) else: pass htfs.append(forward_htf) self.complex_htf = htfs[0] self.apo_htf = htfs[1]
def generate_topology_proposal(old_mol_iupac="pentane", new_mol_iupac="butane"): """ Utility function to generate a topologyproposal for tests Parameters ---------- old_mol_iupac : str, optional name of old mol, default pentane new_mol_iupac : str, optional name of new mol, default butane Returns ------- topology_proposal : perses.rjmc.topology_proposal.TopologyProposal the topology proposal corresponding to the given transformation old_positions : [n, 3] np.ndarray of float positions of old mol new_positions : [m, 3] np.ndarray of float positions of new mol """ from perses.rjmc.topology_proposal import TwoMoleculeSetProposalEngine, SystemGenerator from perses.rjmc.geometry import FFAllAngleGeometryEngine from perses.tests.utils import createSystemFromIUPAC, get_data_filename import openmoltools.forcefield_generators as forcefield_generators from io import StringIO from openmmtools.constants import kB temperature = 300.0 * unit.kelvin kT = kB * temperature beta = 1.0 / kT gaff_filename = get_data_filename("data/gaff.xml") forcefield_files = [gaff_filename, 'amber99sbildn.xml'] #generate systems and topologies old_mol, old_system, old_positions, old_topology = createSystemFromIUPAC( old_mol_iupac) new_mol, new_system, new_positions, new_topology = createSystemFromIUPAC( new_mol_iupac) #set names old_mol.SetTitle("MOL") new_mol.SetTitle("MOL") #generate forcefield and ProposalEngine #ffxml=forcefield_generators.generateForceFieldFromMolecules([old_mol, new_mol]) system_generator = SystemGenerator( forcefield_files, forcefield_kwargs={'removeCMMotion': False}) proposal_engine = TwoMoleculeSetProposalEngine(old_mol, new_mol, system_generator, residue_name="pentane") geometry_engine = FFAllAngleGeometryEngine() #create a TopologyProposal topology_proposal = proposal_engine.propose(old_system, old_topology) new_positions_geometry, _ = geometry_engine.propose( topology_proposal, old_positions, beta) return topology_proposal, old_positions, new_positions_geometry
def HybridTopologyFactory_energies( current_mol='toluene', proposed_mol='1,2-bis(trifluoromethyl) benzene'): """ Test whether the difference in the nonalchemical zero and alchemical zero states is the forward valence energy. Also test for the one states. """ from perses.tests.utils import generate_solvated_hybrid_test_topology, generate_endpoint_thermodynamic_states import openmmtools.cache as cache #Just test the solvated system top_proposal, old_positions, _ = generate_solvated_hybrid_test_topology( current_mol_name=current_mol, proposed_mol_name=proposed_mol) #remove the dispersion correction top_proposal._old_system.getForce(3).setUseDispersionCorrection(False) top_proposal._new_system.getForce(3).setUseDispersionCorrection(False) # run geometry engine to generate old and new positions _geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=100, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=False) _new_positions, _lp = _geometry_engine.propose(top_proposal, old_positions, beta) _lp_rev = _geometry_engine.logp_reverse(top_proposal, _new_positions, old_positions, beta) # make the hybrid system, reset the CustomNonbondedForce cutoff HTF = HybridTopologyFactory(top_proposal, old_positions, _new_positions) hybrid_system = HTF.hybrid_system nonalch_zero, nonalch_one, alch_zero, alch_one = generate_endpoint_thermodynamic_states( hybrid_system, top_proposal) # compute reduced energies #for the nonalchemical systems... attrib_list = [(nonalch_zero, old_positions, top_proposal._old_system.getDefaultPeriodicBoxVectors()), (alch_zero, HTF._hybrid_positions, hybrid_system.getDefaultPeriodicBoxVectors()), (alch_one, HTF._hybrid_positions, hybrid_system.getDefaultPeriodicBoxVectors()), (nonalch_one, _new_positions, top_proposal._new_system.getDefaultPeriodicBoxVectors())] rp_list = [] for (state, pos, box_vectors) in attrib_list: context, integrator = cache.global_context_cache.get_context(state) samplerstate = SamplerState(positions=pos, box_vectors=box_vectors) samplerstate.apply_to_context(context) rp = state.reduced_potential(context) rp_list.append(rp) #valence energy definitions forward_added_valence_energy = _geometry_engine.forward_final_context_reduced_potential - _geometry_engine.forward_atoms_with_positions_reduced_potential reverse_subtracted_valence_energy = _geometry_engine.reverse_final_context_reduced_potential - _geometry_engine.reverse_atoms_with_positions_reduced_potential nonalch_zero_rp, alch_zero_rp, alch_one_rp, nonalch_one_rp = rp_list[ 0], rp_list[1], rp_list[2], rp_list[3] # print(f"Difference between zeros: {nonalch_zero_rp - alch_zero_rp}; forward added: {forward_added_valence_energy}") # print(f"Difference between ones: {nonalch_zero_rp - alch_zero_rp}; forward added: {forward_added_valence_energy}") assert abs( nonalch_zero_rp - alch_zero_rp + forward_added_valence_energy ) < ENERGY_THRESHOLD, f"The zero state alchemical and nonalchemical energy absolute difference {abs(nonalch_zero_rp - alch_zero_rp + forward_added_valence_energy)} is greater than the threshold of {ENERGY_THRESHOLD}." assert abs( nonalch_one_rp - alch_one_rp + reverse_subtracted_valence_energy ) < ENERGY_THRESHOLD, f"The one state alchemical and nonalchemical energy absolute difference {abs(nonalch_one_rp - alch_one_rp + reverse_subtracted_valence_energy)} is greater than the threshold of {ENERGY_THRESHOLD}." print( f"Abs difference in zero alchemical vs nonalchemical systems: {abs(nonalch_zero_rp - alch_zero_rp + forward_added_valence_energy)}" ) print( f"Abs difference in one alchemical vs nonalchemical systems: {abs(nonalch_one_rp - alch_one_rp + reverse_subtracted_valence_energy)}" )
def __init__(self, protein_pdb_filename, ligand_file, old_ligand_index, new_ligand_index, forcefield_files, pressure=1.0 * unit.atmosphere, temperature=300.0 * unit.kelvin, solvent_padding=9.0 * unit.angstroms): """ Initialize a NonequilibriumFEPSetup object Parameters ---------- protein_pdb_filename : str The name of the protein pdb file ligand_file : str the name of the ligand file (any openeye supported format) ligand_smiles : list of two str The SMILES strings representing the two ligands forcefield_files : list of str The list of ffxml files that contain the forcefields that will be used pressure : Quantity, units of pressure Pressure to use in the barostat temperature : Quantity, units of temperature Temperature to use for the Langevin integrator solvent_padding : Quantity, units of length The amount of padding to use when adding solvent """ self._protein_pdb_filename = protein_pdb_filename self._pressure = pressure self._temperature = temperature self._barostat_period = 50 self._padding = solvent_padding self._ligand_file = ligand_file self._old_ligand_index = old_ligand_index self._new_ligand_index = new_ligand_index self._old_ligand_oemol = self.load_sdf(self._ligand_file, index=self._old_ligand_index) self._new_ligand_oemol = self.load_sdf(self._ligand_file, index=self._new_ligand_index) self._old_ligand_positions = extractPositionsFromOEMOL( self._old_ligand_oemol) ffxml = forcefield_generators.generateForceFieldFromMolecules( [self._old_ligand_oemol, self._new_ligand_oemol]) self._old_ligand_oemol.SetTitle("MOL") self._new_ligand_oemol.SetTitle("MOL") self._new_ligand_smiles = oechem.OECreateSmiString( self._new_ligand_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) #self._old_ligand_smiles = '[H]c1c(c(c(c(c1N([H])c2nc3c(c(n2)OC([H])([H])C4(C(C(C(C(C4([H])[H])([H])[H])([H])[H])([H])[H])([H])[H])[H])nc(n3[H])[H])[H])[H])S(=O)(=O)C([H])([H])[H])[H]' self._old_ligand_smiles = oechem.OECreateSmiString( self._old_ligand_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) print(self._new_ligand_smiles) print(self._old_ligand_smiles) self._old_ligand_topology = forcefield_generators.generateTopologyFromOEMol( self._old_ligand_oemol) self._old_ligand_md_topology = md.Topology.from_openmm( self._old_ligand_topology) self._new_ligand_topology = forcefield_generators.generateTopologyFromOEMol( self._new_ligand_oemol) self._new_liands_md_topology = md.Topology.from_openmm( self._new_ligand_topology) protein_pdbfile = open(self._protein_pdb_filename, 'r') pdb_file = app.PDBFile(protein_pdbfile) protein_pdbfile.close() self._protein_topology_old = pdb_file.topology self._protein_md_topology_old = md.Topology.from_openmm( self._protein_topology_old) self._protein_positions_old = pdb_file.positions self._forcefield = app.ForceField(*forcefield_files) self._forcefield.loadFile(StringIO(ffxml)) print("Generated forcefield") self._complex_md_topology_old = self._protein_md_topology_old.join( self._old_ligand_md_topology) self._complex_topology_old = self._complex_md_topology_old.to_openmm() n_atoms_complex_old = self._complex_topology_old.getNumAtoms() n_atoms_protein_old = self._protein_topology_old.getNumAtoms() self._complex_positions_old = unit.Quantity(np.zeros( [n_atoms_complex_old, 3]), unit=unit.nanometers) self._complex_positions_old[: n_atoms_protein_old, :] = self._protein_positions_old self._complex_positions_old[ n_atoms_protein_old:, :] = self._old_ligand_positions if pressure is not None: barostat = openmm.MonteCarloBarostat(self._pressure, self._temperature, self._barostat_period) self._system_generator = SystemGenerator( forcefield_files, barostat=barostat, forcefield_kwargs={'nonbondedMethod': app.PME}) else: self._system_generator = SystemGenerator(forcefield_files) #self._complex_proposal_engine = TwoMoleculeSetProposalEngine(self._old_ligand_smiles, self._new_ligand_smiles, self._system_generator, residue_name="MOL") self._complex_proposal_engine = TwoMoleculeSetProposalEngine( self._old_ligand_oemol, self._new_ligand_oemol, self._system_generator, residue_name="MOL") self._geometry_engine = FFAllAngleGeometryEngine() self._complex_topology_old_solvated, self._complex_positions_old_solvated, self._complex_system_old_solvated = self._solvate_system( self._complex_topology_old, self._complex_positions_old) self._complex_md_topology_old_solvated = md.Topology.from_openmm( self._complex_topology_old_solvated) print(self._complex_proposal_engine._smiles_list) beta = 1.0 / (kB * temperature) self._complex_topology_proposal = self._complex_proposal_engine.propose( self._complex_system_old_solvated, self._complex_topology_old_solvated) self._complex_positions_new_solvated, _ = self._geometry_engine.propose( self._complex_topology_proposal, self._complex_positions_old_solvated, beta) #now generate the equivalent objects for the solvent phase. First, generate the ligand-only topologies and atom map self._solvent_topology_proposal, self._old_solvent_positions = self._generate_ligand_only_topologies( self._complex_positions_old_solvated, self._complex_positions_new_solvated) self._new_solvent_positions, _ = self._geometry_engine.propose( self._solvent_topology_proposal, self._old_solvent_positions, beta)
class NonequilibriumFEPSetup(object): """ This class is a helper class for nonequilibrium FEP. It generates the input objects that are necessary for the two legs of a relative FEP calculation. For each leg, that is a TopologyProposal, old_positions, and new_positions. Importantly, it ensures that the atom maps in the solvent and complex phases match correctly. """ def __init__(self, protein_pdb_filename, ligand_file, old_ligand_index, new_ligand_index, forcefield_files, pressure=1.0 * unit.atmosphere, temperature=300.0 * unit.kelvin, solvent_padding=9.0 * unit.angstroms): """ Initialize a NonequilibriumFEPSetup object Parameters ---------- protein_pdb_filename : str The name of the protein pdb file ligand_file : str the name of the ligand file (any openeye supported format) ligand_smiles : list of two str The SMILES strings representing the two ligands forcefield_files : list of str The list of ffxml files that contain the forcefields that will be used pressure : Quantity, units of pressure Pressure to use in the barostat temperature : Quantity, units of temperature Temperature to use for the Langevin integrator solvent_padding : Quantity, units of length The amount of padding to use when adding solvent """ self._protein_pdb_filename = protein_pdb_filename self._pressure = pressure self._temperature = temperature self._barostat_period = 50 self._padding = solvent_padding self._ligand_file = ligand_file self._old_ligand_index = old_ligand_index self._new_ligand_index = new_ligand_index self._old_ligand_oemol = self.load_sdf(self._ligand_file, index=self._old_ligand_index) self._new_ligand_oemol = self.load_sdf(self._ligand_file, index=self._new_ligand_index) self._old_ligand_positions = extractPositionsFromOEMOL( self._old_ligand_oemol) ffxml = forcefield_generators.generateForceFieldFromMolecules( [self._old_ligand_oemol, self._new_ligand_oemol]) self._old_ligand_oemol.SetTitle("MOL") self._new_ligand_oemol.SetTitle("MOL") self._new_ligand_smiles = oechem.OECreateSmiString( self._new_ligand_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) #self._old_ligand_smiles = '[H]c1c(c(c(c(c1N([H])c2nc3c(c(n2)OC([H])([H])C4(C(C(C(C(C4([H])[H])([H])[H])([H])[H])([H])[H])([H])[H])[H])nc(n3[H])[H])[H])[H])S(=O)(=O)C([H])([H])[H])[H]' self._old_ligand_smiles = oechem.OECreateSmiString( self._old_ligand_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) print(self._new_ligand_smiles) print(self._old_ligand_smiles) self._old_ligand_topology = forcefield_generators.generateTopologyFromOEMol( self._old_ligand_oemol) self._old_ligand_md_topology = md.Topology.from_openmm( self._old_ligand_topology) self._new_ligand_topology = forcefield_generators.generateTopologyFromOEMol( self._new_ligand_oemol) self._new_liands_md_topology = md.Topology.from_openmm( self._new_ligand_topology) protein_pdbfile = open(self._protein_pdb_filename, 'r') pdb_file = app.PDBFile(protein_pdbfile) protein_pdbfile.close() self._protein_topology_old = pdb_file.topology self._protein_md_topology_old = md.Topology.from_openmm( self._protein_topology_old) self._protein_positions_old = pdb_file.positions self._forcefield = app.ForceField(*forcefield_files) self._forcefield.loadFile(StringIO(ffxml)) print("Generated forcefield") self._complex_md_topology_old = self._protein_md_topology_old.join( self._old_ligand_md_topology) self._complex_topology_old = self._complex_md_topology_old.to_openmm() n_atoms_complex_old = self._complex_topology_old.getNumAtoms() n_atoms_protein_old = self._protein_topology_old.getNumAtoms() self._complex_positions_old = unit.Quantity(np.zeros( [n_atoms_complex_old, 3]), unit=unit.nanometers) self._complex_positions_old[: n_atoms_protein_old, :] = self._protein_positions_old self._complex_positions_old[ n_atoms_protein_old:, :] = self._old_ligand_positions if pressure is not None: barostat = openmm.MonteCarloBarostat(self._pressure, self._temperature, self._barostat_period) self._system_generator = SystemGenerator( forcefield_files, barostat=barostat, forcefield_kwargs={'nonbondedMethod': app.PME}) else: self._system_generator = SystemGenerator(forcefield_files) #self._complex_proposal_engine = TwoMoleculeSetProposalEngine(self._old_ligand_smiles, self._new_ligand_smiles, self._system_generator, residue_name="MOL") self._complex_proposal_engine = TwoMoleculeSetProposalEngine( self._old_ligand_oemol, self._new_ligand_oemol, self._system_generator, residue_name="MOL") self._geometry_engine = FFAllAngleGeometryEngine() self._complex_topology_old_solvated, self._complex_positions_old_solvated, self._complex_system_old_solvated = self._solvate_system( self._complex_topology_old, self._complex_positions_old) self._complex_md_topology_old_solvated = md.Topology.from_openmm( self._complex_topology_old_solvated) print(self._complex_proposal_engine._smiles_list) beta = 1.0 / (kB * temperature) self._complex_topology_proposal = self._complex_proposal_engine.propose( self._complex_system_old_solvated, self._complex_topology_old_solvated) self._complex_positions_new_solvated, _ = self._geometry_engine.propose( self._complex_topology_proposal, self._complex_positions_old_solvated, beta) #now generate the equivalent objects for the solvent phase. First, generate the ligand-only topologies and atom map self._solvent_topology_proposal, self._old_solvent_positions = self._generate_ligand_only_topologies( self._complex_positions_old_solvated, self._complex_positions_new_solvated) self._new_solvent_positions, _ = self._geometry_engine.propose( self._solvent_topology_proposal, self._old_solvent_positions, beta) def load_sdf(self, sdf_filename, index=0): """ Load an SDF file into an OEMol. Since SDF files can contain multiple molecules, an index can be provided as well. Parameters ---------- sdf_filename : str The name of the SDF file index : int, default 0 The index of the molecule in the SDF file Returns ------- mol : openeye.oechem.OEMol object The loaded oemol object """ ifs = oechem.oemolistream() ifs.open(sdf_filename) #get the list of molecules mol_list = [oechem.OEMol(mol) for mol in ifs.GetOEMols()] #we'll always take the first for now mol_to_return = mol_list[index] return mol_to_return def _solvate_system(self, topology, positions, model='tip3p'): """ Generate a solvated topology, positions, and system for a given input topology and positions. For generating the system, the forcefield files provided in the constructor will be used. Parameters ---------- topology : app.Topology Topology of the system to solvate positions : [n, 3] ndarray of Quantity nm the positions of the unsolvated system Returns ------- solvated_topology : app.Topology Topology of the system with added waters solvated_positions : [n + 3(n_waters), 3] ndarray of Quantity nm Solvated positions solvated_system : openmm.System The parameterized system, containing a barostat if one was specified. """ modeller = app.Modeller(topology, positions) hs = [ atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name != "MOL" ] modeller.delete(hs) modeller.addHydrogens(forcefield=self._forcefield) print("preparing to add solvent") modeller.addSolvent(self._forcefield, model=model, padding=self._padding) solvated_topology = modeller.getTopology() solvated_positions = modeller.getPositions() print("solvent added, parameterizing") solvated_system = self._system_generator.build_system( solvated_topology) print("System parameterized") return solvated_topology, solvated_positions, solvated_system def _generate_ligand_only_topologies(self, old_positions, new_positions): """ This method generates ligand-only topologies and positions from a TopologyProposal containing a solvated complex. The output of this method is then used when building the solvent-phase simulation with the same atom map. Parameters ---------- topology_proposal : perses.rjmc.TopologyProposal TopologyProposal representing the solvated complex transformation Returns ------- old_ligand_topology : app.Topology The old topology without the receptor or solvent new_ligand_topology : app.Topology The new topology without the receptor or solvent old_ligand_positions : [m, 3] ndarray of Quantity nm The positions of the old ligand without receptor or solvent new_ligand_positions : [n, 3] ndarray of Quantity nm The positions of the new ligand without receptor or solvent atom_map : dict of int: it The mapping between the two topologies without ligand or solvent. """ old_complex = md.Topology.from_openmm( self._complex_topology_proposal.old_topology) new_complex = md.Topology.from_openmm( self._complex_topology_proposal.new_topology) complex_atom_map = self._complex_topology_proposal.old_to_new_atom_map old_mol_start_index, old_mol_len = self._complex_proposal_engine._find_mol_start_index( old_complex.to_openmm()) new_mol_start_index, new_mol_len = self._complex_proposal_engine._find_mol_start_index( new_complex.to_openmm()) old_pos = unit.Quantity(np.zeros([len(old_positions), 3]), unit=unit.nanometers) old_pos[:, :] = old_positions old_ligand_positions = old_pos[old_mol_start_index:( old_mol_start_index + old_mol_len), :] new_ligand_positions = new_positions[new_mol_start_index:( new_mol_start_index + new_mol_len), :] #atom_map_adjusted = {} #loop through the atoms in the map. If the old index is creater than the old_mol_start_index but less than that #plus the old mol length, then it is valid to include its adjusted value in the map. #for old_idx, new_idx in complex_atom_map.items(): # if old_idx > old_mol_start_index and old_idx < old_mol_len + old_mol_start_index: # atom_map_adjusted[old_idx - old_mol_len] = new_idx - new_mol_start_index #subset the topologies: old_ligand_topology = old_complex.subset( old_complex.select("resname == 'MOL' ")) new_ligand_topology = new_complex.subset( new_complex.select("resname == 'MOL' ")) #solvate the old ligand topology: old_solvated_topology, old_solvated_positions, old_solvated_system = self._solvate_system( old_ligand_topology.to_openmm(), old_ligand_positions) old_solvated_md_topology = md.Topology.from_openmm( old_solvated_topology) #now remove the old ligand, leaving only the solvent solvent_only_topology = old_solvated_md_topology.subset( old_solvated_md_topology.select("water")) #append the solvent to the new ligand-only topology: new_solvated_ligand_md_topology = new_ligand_topology.join( solvent_only_topology) nsl, b = new_solvated_ligand_md_topology.to_dataframe() #dirty hack because new_solvated_ligand_md_topology.to_openmm() was throwing bond topology error new_solvated_ligand_md_topology = md.Topology.from_dataframe(nsl, b) new_solvated_ligand_omm_topology = new_solvated_ligand_md_topology.to_openmm( ) new_solvated_ligand_omm_topology.setPeriodicBoxVectors( old_solvated_topology.getPeriodicBoxVectors()) #create the new ligand system: new_solvated_system = self._system_generator.build_system( new_solvated_ligand_omm_topology) new_to_old_atom_map = { complex_atom_map[x] - new_mol_start_index: x - old_mol_start_index for x in old_complex.select("resname == 'MOL' ") if x in complex_atom_map.keys() } #adjust the atom map to account for the presence of solvent degrees of freedom: #By design, all atoms after the ligands are water, and should be mapped. n_water_atoms = solvent_only_topology.to_openmm().getNumAtoms() for i in range(n_water_atoms): new_to_old_atom_map[new_mol_len + i] = old_mol_len + i #change the map to accomodate the TP: #new_to_old_atom_map = {value : key for key, value in atom_map_adjusted.items()} #make a TopologyProposal ligand_topology_proposal = TopologyProposal( new_topology=new_solvated_ligand_omm_topology, new_system=new_solvated_system, old_topology=old_solvated_topology, old_system=old_solvated_system, new_to_old_atom_map=new_to_old_atom_map, old_chemical_state_key='A', new_chemical_state_key='B') return ligand_topology_proposal, old_solvated_positions @property def complex_topology_proposal(self): return self._complex_topology_proposal @property def complex_old_positions(self): return self._complex_positions_old_solvated @property def complex_new_positions(self): return self._complex_positions_new_solvated @property def solvent_topology_proposal(self): return self._solvent_topology_proposal @property def solvent_old_positions(self): return self._old_solvent_positions @property def solvent_new_positions(self): return self._new_solvent_positions
def generate_top_pos_sys(topology, new_res, system, positions, system_generator): """generate point mutation engine, geometry_engine, and conduct topology proposal, geometry propsal, and hybrid factory generation""" #create the point mutation engine print(f"generating point mutation engine") point_mutation_engine = PointMutationEngine( wildtype_topology=topology, system_generator=system_generator, chain_id= '1', #denote the chain id allowed to mutate (it's always a string variable) max_point_mutants=1, residues_allowed_to_mutate=['2'], #the residue ids allowed to mutate allowed_mutations=[ ('2', new_res) ], #the residue ids allowed to mutate with the three-letter code allowed to change aggregate=True) #always allow aggregation #create a geometry engine print(f"generating geometry engine") geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=100, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=False, use_14_nonbondeds=False) #create a top proposal print(f"making topology proposal") topology_proposal, local_map_stereo_sidechain, new_oemol_sidechain, old_oemol_sidechain = point_mutation_engine.propose( current_system=system, current_topology=topology) #make a geometry proposal forward print(f"making geometry proposal") forward_new_positions, logp_proposal = geometry_engine.propose( topology_proposal, positions, beta) #create a hybrid topology factory f"making forward hybridtopologyfactory" forward_htf = HybridTopologyFactory(topology_proposal=topology_proposal, current_positions=positions, new_positions=forward_new_positions, use_dispersion_correction=False, functions=None, softcore_alpha=None, bond_softening_constant=1.0, angle_softening_constant=1.0, soften_only_new=False, neglected_new_angle_terms=[], neglected_old_angle_terms=[], softcore_LJ_v2=True, softcore_electrostatics=True, softcore_LJ_v2_alpha=0.85, softcore_electrostatics_alpha=0.3, softcore_sigma_Q=1.0, interpolate_old_and_new_14s=False, omitted_terms=None) return topology_proposal, forward_new_positions, forward_htf, local_map_stereo_sidechain, old_oemol_sidechain, new_oemol_sidechain
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene", proposed_mol_name="benzene", current_mol_smiles=None, proposed_mol_smiles=None, vacuum=False, render_atom_mapping=False): """ This function will generate a topology proposal, old positions, and new positions with a geometry proposal (either vacuum or solvated) given a set of input iupacs or smiles. The function will (by default) read the iupac names first. If they are set to None, then it will attempt to read a set of current and new smiles. An atom mapping pdf will be generated if specified. Arguments ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule current_mol_smiles : str (default None) current mol smiles proposed_mol_smiles : str (default None) proposed mol smiles vacuum: bool (default False) whether to render a vacuum or solvated topology_proposal render_atom_mapping : bool (default False) whether to render the atom map of the current_mol_name and proposed_mol_name Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ import simtk.openmm.app as app from openmoltools import forcefield_generators from openeye import oechem from openmoltools.openeye import iupac_to_oemol, generate_conformers, smiles_to_oemol from openmoltools import forcefield_generators import perses.utils.openeye as openeye from perses.utils.data import get_data_filename from perses.rjmc.topology_proposal import TopologyProposal, SystemGenerator, SmallMoleculeSetProposalEngine import simtk.unit as unit from perses.rjmc.geometry import FFAllAngleGeometryEngine if current_mol_name != None and proposed_mol_name != None: try: old_oemol, new_oemol = iupac_to_oemol( current_mol_name), iupac_to_oemol(proposed_mol_name) old_smiles = oechem.OECreateSmiString( old_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) new_smiles = oechem.OECreateSmiString( new_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) except: raise Exception( f"either {current_mol_name} or {proposed_mol_name} is not compatible with 'iupac_to_oemol' function!" ) elif current_mol_smiles != None and proposed_mol_smiles != None: try: old_oemol, new_oemol = smiles_to_oemol( current_mol_smiles), smiles_to_oemol(proposed_mol_smiles) old_smiles = oechem.OECreateSmiString( old_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) new_smiles = oechem.OECreateSmiString( new_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) except: raise Exception(f"the variables are not compatible") else: raise Exception( f"either current_mol_name and proposed_mol_name must be specified as iupacs OR current_mol_smiles and proposed_mol_smiles must be specified as smiles strings." ) old_oemol, old_system, old_positions, old_topology = openeye.createSystemFromSMILES( old_smiles, title="MOL") #correct the old positions old_positions = openeye.extractPositionsFromOEMol(old_oemol) old_positions = old_positions.in_units_of(unit.nanometers) new_oemol, new_system, new_positions, new_topology = openeye.createSystemFromSMILES( new_smiles, title="NEW") ffxml = forcefield_generators.generateForceFieldFromMolecules( [old_oemol, new_oemol]) old_oemol.SetTitle('MOL') new_oemol.SetTitle('MOL') old_topology = forcefield_generators.generateTopologyFromOEMol(old_oemol) new_topology = forcefield_generators.generateTopologyFromOEMol(new_oemol) if not vacuum: nonbonded_method = app.PME barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300.0 * unit.kelvin, 50) else: nonbonded_method = app.NoCutoff barostat = None gaff_xml_filename = get_data_filename("data/gaff.xml") system_generator = SystemGenerator( [gaff_xml_filename, 'amber99sbildn.xml', 'tip3p.xml'], barostat=barostat, forcefield_kwargs={ 'removeCMMotion': False, 'nonbondedMethod': nonbonded_method, 'constraints': app.HBonds, 'hydrogenMass': 4.0 * unit.amu }) system_generator._forcefield.loadFile(StringIO(ffxml)) proposal_engine = SmallMoleculeSetProposalEngine([old_smiles, new_smiles], system_generator, residue_name='MOL') geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=1000, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=False) if not vacuum: #now to solvate modeller = app.Modeller(old_topology, old_positions) hs = [ atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name not in ['MOL', 'OLD', 'NEW'] ] modeller.delete(hs) modeller.addHydrogens(forcefield=system_generator._forcefield) modeller.addSolvent(system_generator._forcefield, model='tip3p', padding=9.0 * unit.angstroms) solvated_topology = modeller.getTopology() solvated_positions = modeller.getPositions() solvated_positions = unit.quantity.Quantity(value=np.array([ list(atom_pos) for atom_pos in solvated_positions.value_in_unit_system(unit.md_unit_system) ]), unit=unit.nanometers) solvated_system = system_generator.build_system(solvated_topology) #now to create proposal top_proposal = proposal_engine.propose( current_system=solvated_system, current_topology=solvated_topology, current_mol=old_oemol, proposed_mol=new_oemol) new_positions, _ = geometry_engine.propose(top_proposal, solvated_positions, beta) if render_atom_mapping: from perses.utils.smallmolecules import render_atom_mapping print( f"new_to_old: {proposal_engine.non_offset_new_to_old_atom_map}" ) render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol, new_oemol, proposal_engine.non_offset_new_to_old_atom_map) return top_proposal, solvated_positions, new_positions else: vacuum_system = system_generator.build_system(old_topology) top_proposal = proposal_engine.propose(current_system=vacuum_system, current_topology=old_topology, current_mol=old_oemol, proposed_mol=new_oemol) new_positions, _ = geometry_engine.propose(top_proposal, old_positions, beta) if render_atom_mapping: from perses.utils.smallmolecules import render_atom_mapping print(f"new_to_old: {top_proposal._new_to_old_atom_map}") render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol, new_oemol, top_proposal._new_to_old_atom_map) return top_proposal, old_positions, new_positions
def validate_rjmc_work_variance(top_prop, positions, geometry_method=0, num_iterations=10, md_steps=250, compute_timeseries=False, md_system=None, prespecified_conformers=None): """ Arguments ---------- top_prop : perses.rjmc.topology_proposal.TopologyProposal object topology_proposal md_system : openmm.System object, default None system from which md is conducted; the default is the top_prop._old_system geometry_method : int which geometry proposal method to use 0: neglect_angles = True (this is supposed to be the zero-variance method) 1: neglect_angles = False (this will accumulate variance) 2: use_sterics = True (this is experimental) num_iterations: int number of times to run md_steps integrator md_steps: int number of md_steps to run in each num_iteration compute_timeseries = bool (default False) whether to use pymbar detectEquilibration and subsampleCorrelated data from the MD run (the potential energy is the data) prespecified_conformers = None or unit.Quantity(np.array([num_iterations, system.getNumParticles(), 3]), unit = unit.nanometers) whether to input a unit.Quantity of conformers and bypass the conformer_generation/pymbar stage; None will default conduct this phase Returns ------- conformers : unit.Quantity(np.array([num_iterations, system.getNumParticles(), 3]), unit = unit.nanometers) decorrelated positions of the md run rj_works : list work from each conformer proposal """ from openmmtools import integrators from perses.utils.openeye import smiles_to_oemol import simtk.unit as unit import simtk.openmm as openmm from openmmtools.constants import kB from perses.rjmc.geometry import FFAllAngleGeometryEngine import tqdm temperature = 300.0 * unit.kelvin # unit-bearing temperature kT = kB * temperature # unit-bearing thermal energy beta = 1.0 / kT # unit-bearing inverse thermal energy #first, we must extract the top_prop relevant quantities topology = top_prop._old_topology if md_system == None: system = top_prop._old_system else: system = md_system if prespecified_conformers == None: #now we can specify conformations from MD integrator = integrators.LangevinIntegrator( collision_rate=1.0 / unit.picosecond, timestep=4.0 * unit.femtosecond, temperature=temperature) context = openmm.Context(system, integrator) context.setPositions(positions) openmm.LocalEnergyMinimizer.minimize(context) minimized_positions = context.getState(getPositions=True).getPositions( asNumpy=True) print(f"completed initial minimization") context.setPositions(minimized_positions) zeros = np.zeros([num_iterations, int(system.getNumParticles()), 3]) conformers = unit.Quantity(zeros, unit=unit.nanometers) rps = np.zeros((num_iterations)) print(f"conducting md sampling") for iteration in tqdm.trange(num_iterations): integrator.step(md_steps) state = context.getState(getPositions=True, getEnergy=True) new_positions = state.getPositions(asNumpy=True) conformers[iteration, :, :] = new_positions rp = state.getPotentialEnergy() * beta rps[iteration] = rp del context, integrator if compute_timeseries: print(f"computing production and data correlation") from pymbar import timeseries t0, g, Neff = timeseries.detectEquilibration(rps) series = timeseries.subsampleCorrelatedData(np.arange( t0, num_iterations), g=g) print(f"production starts at index {t0} of {num_iterations}") print(f"the number of effective samples is {Neff}") indices = t0 + series print(f"the filtered indices are {indices}") else: indices = range(num_iterations) else: conformers = prespecified_conformers indices = range(len(conformers)) #now we can define a geometry_engine if geometry_method == 0: geometry_engine = FFAllAngleGeometryEngine( metadata=None, use_sterics=False, n_bond_divisions=1000, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=True) elif geometry_method == 1: geometry_engine = FFAllAngleGeometryEngine( metadata=None, use_sterics=False, n_bond_divisions=1000, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=False) elif geometry_method == 2: geometry_engine = FFAllAngleGeometryEngine( metadata=None, use_sterics=True, n_bond_divisions=1000, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=False) else: raise Exception(f"there is no geometry method for {geometry_method}") rj_works = [] print(f"conducting geometry proposals...") for indx in tqdm.trange(len(indices)): index = indices[indx] print(f"index {indx}") new_positions, logp_forward = geometry_engine.propose( top_prop, conformers[index], beta) logp_backward = geometry_engine.logp_reverse(top_prop, new_positions, conformers[index], beta) print( f"\tlogp_forward, logp_backward: {logp_forward}, {logp_backward}") added_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential subtracted_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential print( f"\tadded_energy, subtracted_energy: {added_energy}, {subtracted_energy}" ) work = logp_forward - logp_backward + added_energy - subtracted_energy rj_works.append(work) print(f"\ttotal work: {work}") return conformers, rj_works
def generate_dipeptide_top_pos_sys(topology, new_res, system, positions, system_generator, conduct_geometry_prop=True, conduct_htf_prop=False): """generate point mutation engine, geometry_engine, and conduct topology proposal, geometry propsal, and hybrid factory generation""" from perses.tests.utils import validate_endstate_energies if conduct_htf_prop: assert conduct_geometry_prop, f"the htf prop can only be conducted if there is a geometry proposal" #create the point mutation engine from perses.rjmc.topology_proposal import PointMutationEngine point_mutation_engine = PointMutationEngine( wildtype_topology=topology, system_generator=system_generator, chain_id= '1', #denote the chain id allowed to mutate (it's always a string variable) max_point_mutants=1, residues_allowed_to_mutate=['2'], #the residue ids allowed to mutate allowed_mutations=[ ('2', new_res) ], #the residue ids allowed to mutate with the three-letter code allowed to change aggregate=True) #always allow aggregation #create a top proposal print(f"making topology proposal") topology_proposal = point_mutation_engine.propose( current_system=system, current_topology=topology) if not conduct_geometry_prop: return topology_proposal if conduct_geometry_prop: #create a geometry engine print(f"generating geometry engine") from perses.rjmc.geometry import FFAllAngleGeometryEngine geometry_engine = FFAllAngleGeometryEngine( metadata=None, use_sterics=False, n_bond_divisions=100, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=False, use_14_nonbondeds=True) #make a geometry proposal forward print( f"making geometry proposal from {list(topology.residues())[1].name} to {new_res}" ) forward_new_positions, logp_proposal = geometry_engine.propose( topology_proposal, positions, beta) logp_reverse = geometry_engine.logp_reverse(topology_proposal, forward_new_positions, positions, beta) if not conduct_htf_prop: return (topology_proposal, forward_new_positions, logp_proposal, logp_reverse) if conduct_htf_prop: #create a hybrid topology factory from perses.annihilation.relative import HybridTopologyFactory forward_htf = HybridTopologyFactory( topology_proposal=topology_proposal, current_positions=positions, new_positions=forward_new_positions, use_dispersion_correction=False, functions=None, softcore_alpha=None, bond_softening_constant=1.0, angle_softening_constant=1.0, soften_only_new=False, neglected_new_angle_terms=[], neglected_old_angle_terms=[], softcore_LJ_v2=True, softcore_electrostatics=True, softcore_LJ_v2_alpha=0.85, softcore_electrostatics_alpha=0.3, softcore_sigma_Q=1.0, interpolate_old_and_new_14s=False, omitted_terms=None) if not topology_proposal.unique_new_atoms: assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})" assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})" vacuum_added_valence_energy = 0.0 else: added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential if not topology_proposal.unique_old_atoms: assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})" assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})" subtracted_valence_energy = 0.0 else: subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential zero_state_error, one_state_error = validate_endstate_energies( forward_htf._topology_proposal, forward_htf, added_valence_energy, subtracted_valence_energy, beta=1.0 / (kB * temperature), ENERGY_THRESHOLD=ENERGY_THRESHOLD, platform=openmm.Platform.getPlatformByName('Reference')) print(f"zero state error : {zero_state_error}") print(f"one state error : {one_state_error}") return forward_htf
def generate_vacuum_topology_proposal(current_mol_name="benzene", proposed_mol_name="toluene"): """ Generate a test vacuum topology proposal, current positions, and new positions triplet from two IUPAC molecule names. Parameters ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ from openmoltools import forcefield_generators from perses.tests.utils import createOEMolFromIUPAC, createSystemFromIUPAC, get_data_filename current_mol, unsolv_old_system, pos_old, top_old = createSystemFromIUPAC( current_mol_name) proposed_mol = createOEMolFromIUPAC(proposed_mol_name) initial_smiles = oechem.OEMolToSmiles(current_mol) final_smiles = oechem.OEMolToSmiles(proposed_mol) gaff_xml_filename = get_data_filename("data/gaff.xml") forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator( forcefield_generators.gaffTemplateGenerator) solvated_system = forcefield.createSystem(top_old, removeCMMotion=False) gaff_filename = get_data_filename('data/gaff.xml') system_generator = SystemGenerator( [gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'], forcefield_kwargs={ 'removeCMMotion': False, 'nonbondedMethod': app.NoCutoff }) geometry_engine = FFAllAngleGeometryEngine() proposal_engine = SmallMoleculeSetProposalEngine( [initial_smiles, final_smiles], system_generator, residue_name=current_mol_name) #generate topology proposal topology_proposal = proposal_engine.propose(solvated_system, top_old, current_mol=current_mol, proposed_mol=proposed_mol) #generate new positions with geometry engine new_positions, _ = geometry_engine.propose(topology_proposal, pos_old, beta) return topology_proposal, pos_old, new_positions
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene", proposed_mol_name="benzene"): """ Generate a test solvated topology proposal, current positions, and new positions triplet from two IUPAC molecule names. Parameters ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ import simtk.openmm.app as app from openmoltools import forcefield_generators from perses.tests.utils import createOEMolFromIUPAC, createSystemFromIUPAC, get_data_filename current_mol, unsolv_old_system, pos_old, top_old = createSystemFromIUPAC( current_mol_name) proposed_mol = createOEMolFromIUPAC(proposed_mol_name) initial_smiles = oechem.OEMolToSmiles(current_mol) final_smiles = oechem.OEMolToSmiles(proposed_mol) gaff_xml_filename = get_data_filename("data/gaff.xml") forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator( forcefield_generators.gaffTemplateGenerator) modeller = app.Modeller(top_old, pos_old) modeller.addSolvent(forcefield, model='tip3p', padding=9.0 * unit.angstrom) solvated_topology = modeller.getTopology() solvated_positions = modeller.getPositions() solvated_system = forcefield.createSystem(solvated_topology, nonbondedMethod=app.PME, removeCMMotion=False) barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50) solvated_system.addForce(barostat) gaff_filename = get_data_filename('data/gaff.xml') system_generator = SystemGenerator( [gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'], barostat=barostat, forcefield_kwargs={ 'removeCMMotion': False, 'nonbondedMethod': app.PME }) geometry_engine = FFAllAngleGeometryEngine() proposal_engine = SmallMoleculeSetProposalEngine( [initial_smiles, final_smiles], system_generator, residue_name=current_mol_name) #generate topology proposal topology_proposal = proposal_engine.propose(solvated_system, solvated_topology) #generate new positions with geometry engine new_positions, _ = geometry_engine.propose(topology_proposal, solvated_positions, beta) return topology_proposal, solvated_positions, new_positions
def __init__(self, protein_filename, mutation_chain_id, mutation_residue_id, proposed_residue, phase='complex', conduct_endstate_validation=True, ligand_input=None, ligand_index=0, water_model='tip3p', ionic_strength=0.15 * unit.molar, forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'], barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50), forcefield_kwargs={'removeCMMotion': False, 'ewaldErrorTolerance': 0.00025, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}, periodic_forcefield_kwargs={'nonbondedMethod': app.PME}, nonperiodic_forcefield_kwargs=None, small_molecule_forcefields='gaff-2.11', complex_box_dimensions=None, apo_box_dimensions=None, flatten_torsions=False, flatten_exceptions=False, repartitioned_endstate=None, **kwargs): """ arguments protein_filename : str path to protein (to mutate); .pdb mutation_chain_id : str name of the chain to be mutated mutation_residue_id : str residue id to change proposed_residue : str three letter code of the residue to mutate to phase : str, default complex if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p conduct_endstate_validation : bool, default True whether to conduct an endstate validation of the HybridTopologyFactory. If using the RepartitionedHybridTopologyFactory, endstate validation cannot and will not be conducted. ligand_file : str, default None path to ligand of interest (i.e. small molecule or protein); .sdf or .pdb ligand_index : int, default 0 which ligand to use water_model : str, default 'tip3p' solvent model to use for solvation ionic_strength : float * unit.molar, default 0.15 * unit.molar the total concentration of ions (both positive and negative) to add using Modeller. This does not include ions that are added to neutralize the system. Note that only monovalent ions are currently supported. forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield files for proteins and solvent barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50) barostat to use forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus} forcefield kwargs for system parametrization periodic_forcefield_kwargs : dict, default {'nonbondedMethod': app.PME} periodic forcefield kwargs for system parametrization nonperiodic_forcefield_kwargs : dict, default None non-periodic forcefield kwargs for system parametrization small_molecule_forcefields : str, default 'gaff-2.11' the forcefield string for small molecule parametrization complex_box_dimensions : Vec3, default None define box dimensions of complex phase; if None, padding is 1nm apo_box_dimensions : Vec3, default None define box dimensions of apo phase phase; if None, padding is 1nm flatten_torsions : bool, default False in the htf, flatten torsions involving unique new atoms at lambda = 0 and unique old atoms are lambda = 1 flatten_exceptions : bool, default False in the htf, flatten exceptions involving unique new atoms at lambda = 0 and unique old atoms at lambda = 1 repartitioned_endstate : int, default None the endstate (0 or 1) at which to build the RepartitionedHybridTopologyFactory. By default, this is None, meaning a vanilla HybridTopologyFactory will be built. TODO : allow argument for spectator ligands besides the 'ligand_file' """ # First thing to do is load the apo protein to mutate... protein_pdbfile = open(protein_filename, 'r') protein_pdb = app.PDBFile(protein_pdbfile) protein_pdbfile.close() protein_positions, protein_topology, protein_md_topology = protein_pdb.positions, protein_pdb.topology, md.Topology.from_openmm(protein_pdb.topology) protein_topology = protein_md_topology.to_openmm() protein_n_atoms = protein_md_topology.n_atoms # Load the ligand, if present molecules = [] if ligand_input: if isinstance(ligand_input, str): if ligand_input.endswith('.sdf'): # small molecule ligand_mol = createOEMolFromSDF(ligand_input, index=ligand_index) molecules.append(Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False)) ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_mol), forcefield_generators.generateTopologyFromOEMol(ligand_mol) ligand_md_topology = md.Topology.from_openmm(ligand_topology) ligand_n_atoms = ligand_md_topology.n_atoms if ligand_input.endswith('pdb'): # protein ligand_pdbfile = open(ligand_input, 'r') ligand_pdb = app.PDBFile(ligand_pdbfile) ligand_pdbfile.close() ligand_positions, ligand_topology, ligand_md_topology = ligand_pdb.positions, ligand_pdb.topology, md.Topology.from_openmm( ligand_pdb.topology) ligand_n_atoms = ligand_md_topology.n_atoms elif isinstance(ligand_input, oechem.OEMol): # oemol object molecules.append(Molecule.from_openeye(ligand_input, allow_undefined_stereo=False)) ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_input), forcefield_generators.generateTopologyFromOEMol(ligand_input) ligand_md_topology = md.Topology.from_openmm(ligand_topology) ligand_n_atoms = ligand_md_topology.n_atoms else: _logger.warning(f'ligand filetype not recognised. Please provide a path to a .pdb or .sdf file') return # Now create a complex complex_md_topology = protein_md_topology.join(ligand_md_topology) complex_topology = complex_md_topology.to_openmm() complex_positions = unit.Quantity(np.zeros([protein_n_atoms + ligand_n_atoms, 3]), unit=unit.nanometers) complex_positions[:protein_n_atoms, :] = protein_positions complex_positions[protein_n_atoms:, :] = ligand_positions # Now for a system_generator self.system_generator = SystemGenerator(forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs=periodic_forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, small_molecule_forcefield=small_molecule_forcefields, molecules=molecules, cache=None) # Solvate apo and complex... apo_input = list(self._solvate(protein_topology, protein_positions, water_model, phase, ionic_strength, apo_box_dimensions)) inputs = [apo_input] if ligand_input: inputs.append(self._solvate(complex_topology, complex_positions, water_model, phase, ionic_strength, complex_box_dimensions)) geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=100, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles = False, use_14_nonbondeds = True) # Run pipeline... htfs = [] for (top, pos, sys) in inputs: point_mutation_engine = PointMutationEngine(wildtype_topology=top, system_generator=self.system_generator, chain_id=mutation_chain_id, # Denote the chain id allowed to mutate (it's always a string variable) max_point_mutants=1, residues_allowed_to_mutate=[mutation_residue_id], # The residue ids allowed to mutate allowed_mutations=[(mutation_residue_id, proposed_residue)], # The residue ids allowed to mutate with the three-letter code allowed to change aggregate=True) # Always allow aggregation topology_proposal = point_mutation_engine.propose(sys, top) # Only validate energy bookkeeping if the WT and proposed residues do not involve rings old_res = [res for res in top.residues() if res.id == mutation_residue_id][0] validate_bool = False if old_res.name in ring_amino_acids or proposed_residue in ring_amino_acids else True new_positions, logp_proposal = geometry_engine.propose(topology_proposal, pos, beta, validate_energy_bookkeeping=validate_bool) logp_reverse = geometry_engine.logp_reverse(topology_proposal, new_positions, pos, beta, validate_energy_bookkeeping=validate_bool) if repartitioned_endstate is None: factory = HybridTopologyFactory elif repartitioned_endstate in [0, 1]: factory = RepartitionedHybridTopologyFactory forward_htf = factory(topology_proposal=topology_proposal, current_positions=pos, new_positions=new_positions, use_dispersion_correction=False, functions=None, softcore_alpha=None, bond_softening_constant=1.0, angle_softening_constant=1.0, soften_only_new=False, neglected_new_angle_terms=[], neglected_old_angle_terms=[], softcore_LJ_v2=True, softcore_electrostatics=True, softcore_LJ_v2_alpha=0.85, softcore_electrostatics_alpha=0.3, softcore_sigma_Q=1.0, interpolate_old_and_new_14s=flatten_exceptions, omitted_terms=None, endstate=repartitioned_endstate, flatten_torsions=flatten_torsions) if not topology_proposal.unique_new_atoms: assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})" assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})" else: added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential if not topology_proposal.unique_old_atoms: assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})" assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})" subtracted_valence_energy = 0.0 else: subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential if conduct_endstate_validation and repartitioned_endstate is None: zero_state_error, one_state_error = validate_endstate_energies(forward_htf._topology_proposal, forward_htf, added_valence_energy, subtracted_valence_energy, beta=beta, ENERGY_THRESHOLD=ENERGY_THRESHOLD) if zero_state_error > ENERGY_THRESHOLD: _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 0 state is above the threshold ({ENERGY_THRESHOLD}): {zero_state_error}") if one_state_error > ENERGY_THRESHOLD: _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 1 state is above the threshold ({ENERGY_THRESHOLD}): {one_state_error}") else: pass htfs.append(forward_htf) self.apo_htf = htfs[0] self.complex_htf = htfs[1] if ligand_input else None
class ExpandedEnsembleSampler(object): """ Method of expanded ensembles sampling engine. Properties ---------- sampler : MCMCSampler The MCMC sampler used for updating positions. proposal_engine : ProposalEngine The ProposalEngine to use for proposing new sampler states and topologies. system_generator : SystemGenerator The SystemGenerator to use for creating System objects following proposals. state : hashable object The current sampler state. Can be any hashable object. states : set of hashable object All known states. iteration : int Iterations completed. naccepted : int Number of accepted thermodynamic/chemical state changes. nrejected : int Number of rejected thermodynamic/chemical state changes. number_of_state_visits : dict of state_key Cumulative counts of visited states. verbose : bool If True, verbose output is printed. References ---------- [1] Lyubartsev AP, Martsinovski AA, Shevkunov SV, and Vorontsov-Velyaminov PN. New approach to Monte Carlo calculation of the free energy: Method of expanded ensembles. JCP 96:1776, 1992 http://dx.doi.org/10.1063/1.462133 Examples -------- >>> # Create a test system >>> test = testsystems.AlanineDipeptideVacuum() >>> # Create a SystemGenerator and rebuild the System. >>> from perses.rjmc.topology_proposal import SystemGenerator >>> system_generator = SystemGenerator(['amber99sbildn.xml'], forcefield_kwargs={ 'nonbondedMethod' : app.NoCutoff, 'implicitSolvent' : None, 'constraints' : None }) >>> test.system = system_generator.build_system(test.topology) >>> # Create a sampler state. >>> sampler_state = SamplerState(system=test.system, positions=test.positions) >>> # Create a thermodynamic state. >>> thermodynamic_state = ThermodynamicState(system=test.system, temperature=298.0*unit.kelvin) >>> # Create an MCMC sampler >>> mcmc_sampler = MCMCSampler(thermodynamic_state, sampler_state) >>> # Turn off verbosity >>> mcmc_sampler.verbose = False >>> # Create an Expanded Ensemble sampler >>> from perses.rjmc.topology_proposal import PointMutationEngine >>> allowed_mutations = [[('2','ALA')],[('2','VAL'),('2','LEU')]] >>> proposal_engine = PointMutationEngine(system_generator, max_point_mutants=1, chain_id='1', proposal_metadata=None, allowed_mutations=allowed_mutations) >>> exen_sampler = ExpandedEnsembleSampler(mcmc_sampler, test.topology, 'ACE-ALA-NME', proposal_engine) >>> # Run the sampler >>> exen_sampler.run() """ def __init__(self, sampler, topology, state_key, proposal_engine, log_weights=None, scheme='ncmc-geometry-ncmc', options=dict(), platform=None): """ Create an expanded ensemble sampler. p(x,k) \propto \exp[-u_k(x) + g_k] where g_k is the log weight. Parameters ---------- sampler : MCMCSampler MCMCSampler initialized with current SamplerState topology : simtk.openmm.app.Topology Current topology state : hashable object Current chemical state proposal_engine : ProposalEngine ProposalEngine to use for proposing new chemical states log_weights : dict of object : float Log weights to use for expanded ensemble biases. scheme : str, optional, default='ncmc-geometry-ncmc' Update scheme. One of ['ncmc-geometry-ncmc', 'geometry-ncmc-geometry', 'geometry-ncmc'] options : dict, optional, default=dict() Options for initializing switching scheme, such as 'timestep', 'nsteps', 'functions' for NCMC platform : simtk.openmm.Platform, optional, default=None Platform to use for NCMC switching. If `None`, default (fastest) platform is used. """ # Keep copies of initializing arguments. # TODO: Make deep copies? self.sampler = sampler self.topology = topology self.state_key = state_key self.proposal_engine = proposal_engine self.log_weights = log_weights self.scheme = scheme if self.log_weights is None: self.log_weights = dict() # Initialize self.iteration = 0 option_names = ['timestep', 'nsteps', 'functions'] for option_name in option_names: if option_name not in options: options[option_name] = None from perses.annihilation.ncmc_switching import NCMCEngine self.ncmc_engine = NCMCEngine(temperature=self.sampler.thermodynamic_state.temperature, timestep=options['timestep'], nsteps=options['nsteps'], functions=options['functions'], platform=platform) from perses.rjmc.geometry import FFAllAngleGeometryEngine self.geometry_engine = FFAllAngleGeometryEngine({'data': 0}) self.naccepted = 0 self.nrejected = 0 self.number_of_state_visits = dict() self.verbose = False self.pdbfile = None # if not None, write PDB file self.geometry_pdbfile = None # if not None, write PDB file of geometry proposals self.accept_everything = False # if True, will accept anything that doesn't lead to NaNs @property def state_keys(self): return log_weights.keys() def get_log_weight(self, state_key): """ Get the log weight of the specified state. Parameters ---------- state_key : hashable object The state key (e.g. chemical state key) to look up. Returns ------- log_weight : float The log weight of the provided state key. Note ---- This adds the key to the self.log_weights dict. """ if state_key not in self.log_weights: self.log_weights[state_key] = 0.0 return self.log_weights[state_key] def update_positions(self): """ Sample new positions. """ self.sampler.update() def update_state(self): """ Sample the thermodynamic state. """ # Check that system and topology have same number of atoms. old_system = self.sampler.sampler_state.system old_topology = self.topology old_topology_natoms = sum([1 for atom in old_topology.atoms()]) # number of topology atoms old_system_natoms = old_system.getNumParticles() if old_topology_natoms != old_system_natoms: msg = 'ExpandedEnsembleSampler: topology has %d atoms, while system has %d atoms' % (old_topology_natoms, old_system_natoms) raise Exception(msg) if self.scheme == 'ncmc-geometry-ncmc': if self.verbose: print("Updating chemical state with ncmc-geometry-ncmc scheme...") # DEBUG: Check current topology can be built. try: self.proposal_engine._system_generator.build_system(self.topology) except Exception as e: msg = str(e) msg += '\n' msg += 'ExpandedEnsembleSampler.update_sampler: self.topology before ProposalEngine call cannot be built into a system' raise Exception(msg) # Propose new chemical state. if self.verbose: print("Proposing new topology...") [system, topology, positions] = [self.sampler.thermodynamic_state.system, self.topology, self.sampler.sampler_state.positions] topology_proposal = self.proposal_engine.propose(system, topology) if self.verbose: print("Proposed transformation: %s => %s" % (topology_proposal.old_chemical_state_key, topology_proposal.new_chemical_state_key)) # DEBUG: Check current topology can be built. if self.verbose: print("Generating new system...") try: self.proposal_engine._system_generator.build_system(topology_proposal.new_topology) except Exception as e: msg = str(e) msg += '\n' msg += 'ExpandedEnsembleSampler.update_sampler: toology_proposal.new_topology before ProposalEngine call cannot be built into a system' raise Exception(msg) # Check to make sure no out-of-bounds atoms are present in new_to_old_atom_map natoms_old = topology_proposal.old_system.getNumParticles() natoms_new = topology_proposal.new_system.getNumParticles() if not set(topology_proposal.new_to_old_atom_map.values()).issubset(range(natoms_old)): msg = "Some old atoms in TopologyProposal.new_to_old_atom_map are not in span of old atoms (1..%d):\n" % natoms_old msg += str(topology_proposal.new_to_old_atom_map) raise Exception(msg) if not set(topology_proposal.new_to_old_atom_map.keys()).issubset(range(natoms_new)): msg = "Some new atoms in TopologyProposal.new_to_old_atom_map are not in span of old atoms (1..%d):\n" % natoms_new msg += str(topology_proposal.new_to_old_atom_map) raise Exception(msg) # Determine state keys old_state_key = self.state_key new_state_key = topology_proposal.new_chemical_state_key # Determine log weight old_log_weight = self.get_log_weight(old_state_key) new_log_weight = self.get_log_weight(new_state_key) if self.verbose: print("Performing NCMC annihilation") # Alchemically eliminate atoms being removed. [ncmc_old_positions, ncmc_elimination_logp, potential_delete] = self.ncmc_engine.integrate(topology_proposal, positions, direction='delete') # Check that positions are not NaN if np.any(np.isnan(ncmc_old_positions)): raise Exception("Positions are NaN after NCMC delete with %d steps" % switching_nsteps) if self.verbose: print("Geometry engine proposal...") # Generate coordinates for new atoms and compute probability ratio of old and new probabilities. geometry_old_positions = ncmc_old_positions geometry_new_positions, geometry_logp_propose = self.geometry_engine.propose(topology_proposal, geometry_old_positions, self.sampler.thermodynamic_state.beta) if self.geometry_pdbfile is not None: print("Writing proposed geometry...") #self.geometry_pdbfile.write('MODEL %4d\n' % (self.iteration+1)) # PyMOL doesn't render connectivity correctly this way from simtk.openmm.app import PDBFile PDBFile.writeFile(topology_proposal.new_topology, geometry_new_positions, file=self.geometry_pdbfile) #self.geometry_pdbfile.write('ENDMDL\n') self.geometry_pdbfile.flush() geometry_logp_reverse = self.geometry_engine.logp_reverse(topology_proposal, geometry_new_positions, geometry_old_positions, self.sampler.thermodynamic_state.beta) geometry_logp = geometry_logp_reverse - geometry_logp_propose if self.verbose: print("Performing NCMC insertion") # Alchemically introduce new atoms. [ncmc_new_positions, ncmc_introduction_logp, potential_insert] = self.ncmc_engine.integrate(topology_proposal, geometry_new_positions, direction='insert') # Check that positions are not NaN if np.any(np.isnan(ncmc_new_positions)): raise Exception("Positions are NaN after NCMC insert with %d steps" % switching_nsteps) # Compute change in eliminated potential contribution. switch_logp = - (potential_insert - potential_delete) if self.verbose: print('potential before geometry : %12.3f kT' % potential_delete) print('potential after geometry : %12.3f kT' % potential_insert) print('---------------------------------------------------------') print('switch_logp : %12.3f' % switch_logp) print('geometry_logp_propose : %12.3f' % geometry_logp_propose) print('geometry_logp_reverse : %12.3f' % geometry_logp_reverse) # Compute total log acceptance probability, including all components. logp_accept = topology_proposal.logp_proposal + geometry_logp + switch_logp + ncmc_elimination_logp + ncmc_introduction_logp + new_log_weight - old_log_weight if self.verbose: print("logp_accept = %+10.4e [logp_proposal %+10.4e geometry_logp %+10.4e switch_logp %+10.4e ncmc_elimination_logp %+10.4e ncmc_introduction_logp %+10.4e old_log_weight %+10.4e new_log_weight %+10.4e]" % (logp_accept, topology_proposal.logp_proposal, geometry_logp, switch_logp, ncmc_elimination_logp, ncmc_introduction_logp, old_log_weight, new_log_weight)) # Accept or reject. if np.isnan(logp_accept): accept = False print('logp_accept = NaN') else: accept = ((logp_accept>=0.0) or (np.random.uniform() < np.exp(logp_accept))) if self.accept_everything: print('accept_everything option is turned on; accepting') accept = True if accept: self.sampler.thermodynamic_state.system = topology_proposal.new_system self.sampler.sampler_state.system = topology_proposal.new_system self.topology = topology_proposal.new_topology self.sampler.sampler_state.positions = ncmc_new_positions self.state_key = topology_proposal.new_chemical_state_key self.naccepted += 1 if self.verbose: print(" accepted") else: self.nrejected += 1 if self.verbose: print(" rejected") else: raise Exception("Expanded ensemble state proposal scheme '%s' unsupported" % self.scheme) # Update statistics. self.update_statistics() def update(self): """ Update the sampler with one step of sampling. """ if self.verbose: print("-" * 80) print("Expanded Ensemble sampler iteration %8d" % self.iteration) self.update_positions() self.update_state() self.iteration += 1 if self.verbose: print("-" * 80) if self.pdbfile is not None: print("Writing frame...") from simtk.openmm.app import PDBFile PDBFile.writeModel(self.topology, self.sampler.sampler_state.positions, self.pdbfile, self.iteration) self.pdbfile.flush() def run(self, niterations=1): """ Run the sampler for the specified number of iterations Parameters ---------- niterations : int, optional, default=1 Number of iterations to run the sampler for. """ for iteration in range(niterations): self.update() def update_statistics(self): """ Update sampler statistics. """ if self.state_key not in self.number_of_state_visits: self.number_of_state_visits[self.state_key] = 0 self.number_of_state_visits[self.state_key] += 1