def run_setup(setup_options, serialize_systems=True, build_samplers=True): """ Run the setup pipeline and return the relevant setup objects based on a yaml input file. Parameters ---------- setup_options : dict result of loading yaml input file Returns ------- setup_dict: dict {'topology_proposals': top_prop, 'hybrid_topology_factories': htf, 'hybrid_samplers': hss} - 'topology_proposals': """ phases = setup_options['phases'] known_phases = ['complex', 'solvent', 'vacuum'] for phase in phases: assert ( phase in known_phases ), f"Unknown phase, {phase} provided. run_setup() can be used with {known_phases}" if 'use_given_geometries' not in list(setup_options.keys()): use_given_geometries = False else: assert type(setup_options['use_given_geometries']) == type(True) use_given_geometries = setup_options['use_given_geometries'] if 'complex' in phases: _logger.info(f"\tPulling receptor (as pdb or mol2)...") # We'll need the protein PDB file (without missing atoms) try: protein_pdb_filename = setup_options['protein_pdb'] assert protein_pdb_filename is not None receptor_mol2 = None except KeyError: try: receptor_mol2 = setup_options['receptor_mol2'] assert receptor_mol2 is not None protein_pdb_filename = None except KeyError as e: print( "Either protein_pdb or receptor_mol2 must be specified if running a complex simulation" ) raise e else: protein_pdb_filename = None receptor_mol2 = None # And a ligand file containing the pair of ligands between which we will transform ligand_file = setup_options['ligand_file'] _logger.info(f"\tdetected ligand file: {ligand_file}") # get the indices of ligands out of the file: old_ligand_index = setup_options['old_ligand_index'] new_ligand_index = setup_options['new_ligand_index'] _logger.info( f"\told ligand index: {old_ligand_index}; new ligand index: {new_ligand_index}" ) _logger.info(f"\tsetting up forcefield files...") forcefield_files = setup_options['forcefield_files'] if "timestep" in setup_options: if isinstance(setup_options['timestep'], float): timestep = setup_options['timestep'] * unit.femtoseconds else: timestep = setup_options['timestep'] _logger.info(f"\ttimestep: {timestep}.") else: timestep = 1.0 * unit.femtoseconds _logger.info(f"\tno timestep detected: setting default as 1.0fs.") if "neq_splitting" in setup_options: neq_splitting = setup_options['neq_splitting'] _logger.info(f"\tneq_splitting: {neq_splitting}") try: eq_splitting = setup_options['eq_splitting'] _logger.info(f"\teq_splitting: {eq_splitting}") except KeyError as e: print( "If you specify a nonequilibrium splitting string, you must also specify an equilibrium one." ) raise e else: eq_splitting = "V R O R V" neq_splitting = "V R O R V" _logger.info( f"\tno splitting strings specified: defaulting to neq: {neq_splitting}, eq: {eq_splitting}." ) if "measure_shadow_work" in setup_options: measure_shadow_work = setup_options['measure_shadow_work'] _logger.info(f"\tmeasuring shadow work: {measure_shadow_work}.") else: measure_shadow_work = False _logger.info( f"\tno measure_shadow_work specified: defaulting to False.") if isinstance(setup_options['pressure'], float): pressure = setup_options['pressure'] * unit.atmosphere else: pressure = setup_options['pressure'] if isinstance(setup_options['temperature'], float): temperature = setup_options['temperature'] * unit.kelvin else: temperature = setup_options['temperature'] if isinstance(setup_options['solvent_padding'], float): solvent_padding_angstroms = setup_options[ 'solvent_padding'] * unit.angstrom else: solvent_padding_angstroms = setup_options['solvent_padding'] if isinstance(setup_options['ionic_strength'], float): ionic_strength = setup_options['ionic_strength'] * unit.molar else: ionic_strength = setup_options['ionic_strength'] _logger.info(f"\tsetting pressure: {pressure}.") _logger.info(f"\tsetting temperature: {temperature}.") _logger.info(f"\tsetting solvent padding: {solvent_padding_angstroms}A.") _logger.info(f"\tsetting ionic strength: {ionic_strength}M.") setup_pickle_file = setup_options[ 'save_setup_pickle_as'] if 'save_setup_pickle_as' in list( setup_options) else None _logger.info(f"\tsetup pickle file: {setup_pickle_file}") trajectory_directory = setup_options['trajectory_directory'] _logger.info(f"\ttrajectory directory: {trajectory_directory}") try: atom_map_file = setup_options['atom_map'] with open(atom_map_file, 'r') as f: atom_map = { int(x.split()[0]): int(x.split()[1]) for x in f.readlines() } _logger.info(f"\tsucceeded parsing atom map.") except Exception: atom_map = None _logger.info(f"\tno atom map specified: default to None.") if 'topology_proposal' not in list(setup_options.keys( )) or setup_options['topology_proposal'] is None: _logger.info( f"\tno topology_proposal specified; proceeding to RelativeFEPSetup...\n\n\n" ) if 'set_solvent_box_dims_to_complex' in list(setup_options.keys( )) and setup_options['set_solvent_box_dims_to_complex']: set_solvent_box_dims_to_complex = True else: set_solvent_box_dims_to_complex = False _logger.info( f'Box dimensions: {setup_options["complex_box_dimensions"]} and {setup_options["solvent_box_dimensions"]}' ) fe_setup = RelativeFEPSetup( ligand_file, old_ligand_index, new_ligand_index, forcefield_files, phases=phases, protein_pdb_filename=protein_pdb_filename, receptor_mol2_filename=receptor_mol2, pressure=pressure, temperature=temperature, solvent_padding=solvent_padding_angstroms, spectator_filenames=setup_options['spectators'], map_strength=setup_options['map_strength'], atom_expr=setup_options['atom_expr'], bond_expr=setup_options['bond_expr'], atom_map=atom_map, neglect_angles=setup_options['neglect_angles'], anneal_14s=setup_options['anneal_1,4s'], small_molecule_forcefield=setup_options[ 'small_molecule_forcefield'], small_molecule_parameters_cache=setup_options[ 'small_molecule_parameters_cache'], trajectory_directory=trajectory_directory, trajectory_prefix=setup_options['trajectory_prefix'], nonbonded_method=setup_options['nonbonded_method'], complex_box_dimensions=setup_options['complex_box_dimensions'], solvent_box_dimensions=setup_options['solvent_box_dimensions'], ionic_strength=ionic_strength, remove_constraints=setup_options['remove_constraints'], use_given_geometries=use_given_geometries) _logger.info(f"\twriting pickle output...") if setup_pickle_file is not None: with open( os.path.join(os.getcwd(), trajectory_directory, setup_pickle_file), 'wb') as f: try: pickle.dump(fe_setup, f) _logger.info(f"\tsuccessfully dumped pickle.") except Exception as e: print(e) print("\tUnable to save setup object as a pickle") _logger.info( f"\tsetup is complete. Writing proposals and positions for each phase to top_prop dict..." ) else: _logger.info( f"\tsetup is complete. Omitted writing proposals and positions for each phase to top_prop dict..." ) top_prop = dict() for phase in phases: top_prop[f'{phase}_topology_proposal'] = getattr( fe_setup, f'{phase}_topology_proposal') top_prop[f'{phase}_geometry_engine'] = getattr( fe_setup, f'_{phase}_geometry_engine') top_prop[f'{phase}_old_positions'] = getattr( fe_setup, f'{phase}_old_positions') top_prop[f'{phase}_new_positions'] = getattr( fe_setup, f'{phase}_new_positions') top_prop[f'{phase}_added_valence_energy'] = getattr( fe_setup, f'_{phase}_added_valence_energy') top_prop[f'{phase}_subtracted_valence_energy'] = getattr( fe_setup, f'_{phase}_subtracted_valence_energy') top_prop[f'{phase}_logp_proposal'] = getattr( fe_setup, f'_{phase}_logp_proposal') top_prop[f'{phase}_logp_reverse'] = getattr( fe_setup, f'_{phase}_logp_reverse') top_prop[f'{phase}_forward_neglected_angles'] = getattr( fe_setup, f'_{phase}_forward_neglected_angles') top_prop[f'{phase}_reverse_neglected_angles'] = getattr( fe_setup, f'_{phase}_reverse_neglected_angles') top_prop['ligand_oemol_old'] = fe_setup._ligand_oemol_old top_prop['ligand_oemol_new'] = fe_setup._ligand_oemol_new top_prop[ 'non_offset_new_to_old_atom_map'] = fe_setup.non_offset_new_to_old_atom_map _logger.info(f"\twriting atom_mapping.png") atom_map_outfile = os.path.join(os.getcwd(), trajectory_directory, 'atom_mapping.png') if 'render_atom_map' in list( setup_options.keys()) and setup_options['render_atom_map']: render_atom_mapping(atom_map_outfile, fe_setup._ligand_oemol_old, fe_setup._ligand_oemol_new, fe_setup.non_offset_new_to_old_atom_map) else: _logger.info(f"\tloading topology proposal from yaml setup options...") top_prop = np.load(setup_options['topology_proposal']).item() n_steps_per_move_application = setup_options[ 'n_steps_per_move_application'] _logger.info( f"\t steps per move application: {n_steps_per_move_application}") trajectory_directory = setup_options['trajectory_directory'] trajectory_prefix = setup_options['trajectory_prefix'] _logger.info(f"\ttrajectory prefix: {trajectory_prefix}") if 'atom_selection' in setup_options: atom_selection = setup_options['atom_selection'] _logger.info(f"\tatom selection detected: {atom_selection}") else: _logger.info(f"\tno atom selection detected: default to all.") atom_selection = 'all' if setup_options['fe_type'] == 'neq': _logger.info(f"\tInstantiating nonequilibrium switching FEP") n_equilibrium_steps_per_iteration = setup_options[ 'n_equilibrium_steps_per_iteration'] ncmc_save_interval = setup_options['ncmc_save_interval'] write_ncmc_configuration = setup_options['write_ncmc_configuration'] if setup_options['LSF']: _internal_parallelism = { 'library': ('dask', 'LSF'), 'num_processes': setup_options['processes'] } else: _internal_parallelism = None ne_fep = dict() for phase in phases: _logger.info(f"\t\tphase: {phase}") hybrid_factory = HybridTopologyFactory( top_prop['%s_topology_proposal' % phase], top_prop['%s_old_positions' % phase], top_prop['%s_new_positions' % phase], neglected_new_angle_terms=top_prop[ f"{phase}_forward_neglected_angles"], neglected_old_angle_terms=top_prop[ f"{phase}_reverse_neglected_angles"], softcore_LJ_v2=setup_options['softcore_v2'], interpolate_old_and_new_14s=setup_options['anneal_1,4s']) if build_samplers: ne_fep[phase] = SequentialMonteCarlo( factory=hybrid_factory, lambda_protocol=setup_options['lambda_protocol'], temperature=temperature, trajectory_directory=trajectory_directory, trajectory_prefix=f"{trajectory_prefix}_{phase}", atom_selection=atom_selection, timestep=timestep, eq_splitting_string=eq_splitting, neq_splitting_string=neq_splitting, collision_rate=setup_options['ncmc_collision_rate_ps'], ncmc_save_interval=ncmc_save_interval, internal_parallelism=_internal_parallelism) print("Nonequilibrium switching driver class constructed") return {'topology_proposals': top_prop, 'ne_fep': ne_fep} else: _logger.info(f"\tno nonequilibrium detected.") htf = dict() hss = dict() _logger.info(f"\tcataloging HybridTopologyFactories...") for phase in phases: _logger.info(f"\t\tphase: {phase}:") #TODO write a SAMSFEP class that mirrors NonequilibriumSwitchingFEP _logger.info( f"\t\twriting HybridTopologyFactory for phase {phase}...") htf[phase] = HybridTopologyFactory( top_prop['%s_topology_proposal' % phase], top_prop['%s_old_positions' % phase], top_prop['%s_new_positions' % phase], neglected_new_angle_terms=top_prop[ f"{phase}_forward_neglected_angles"], neglected_old_angle_terms=top_prop[ f"{phase}_reverse_neglected_angles"], softcore_LJ_v2=setup_options['softcore_v2'], interpolate_old_and_new_14s=setup_options['anneal_1,4s']) for phase in phases: # Define necessary vars to check energy bookkeeping _top_prop = top_prop['%s_topology_proposal' % phase] _htf = htf[phase] _forward_added_valence_energy = top_prop['%s_added_valence_energy' % phase] _reverse_subtracted_valence_energy = top_prop[ '%s_subtracted_valence_energy' % phase] if not use_given_geometries: zero_state_error, one_state_error = validate_endstate_energies( _top_prop, _htf, _forward_added_valence_energy, _reverse_subtracted_valence_energy, beta=1.0 / (kB * temperature), ENERGY_THRESHOLD=ENERGY_THRESHOLD ) #, trajectory_directory=f'{xml_directory}{phase}') _logger.info(f"\t\terror in zero state: {zero_state_error}") _logger.info(f"\t\terror in one state: {one_state_error}") else: _logger.info( f"'use_given_geometries' was passed to setup; skipping endstate validation" ) #TODO expose more of these options in input if build_samplers: n_states = setup_options['n_states'] _logger.info(f"\tn_states: {n_states}") if 'n_replicas' not in setup_options: n_replicas = n_states else: n_replicas = setup_options['n_replicas'] checkpoint_interval = setup_options['checkpoint_interval'] # generating lambda protocol lambda_protocol = LambdaProtocol( functions=setup_options['protocol-type']) _logger.info( f'Using lambda protocol : {setup_options["protocol-type"]}' ) if atom_selection: selection_indices = htf[phase].hybrid_topology.select( atom_selection) else: selection_indices = None storage_name = str(trajectory_directory) + '/' + str( trajectory_prefix) + '-' + str(phase) + '.nc' _logger.info(f'\tstorage_name: {storage_name}') _logger.info(f'\tselection_indices {selection_indices}') _logger.info(f'\tcheckpoint interval {checkpoint_interval}') reporter = MultiStateReporter( storage_name, analysis_particle_indices=selection_indices, checkpoint_interval=checkpoint_interval) if phase == 'vacuum': endstates = False else: endstates = True if setup_options['fe_type'] == 'fah': _logger.info('SETUP FOR FAH DONE') return { 'topology_proposals': top_prop, 'hybrid_topology_factories': htf } if setup_options['fe_type'] == 'sams': hss[phase] = HybridSAMSSampler( mcmc_moves=mcmc.LangevinSplittingDynamicsMove( timestep=timestep, collision_rate=1.0 / unit.picosecond, n_steps=n_steps_per_move_application, reassign_velocities=False, n_restart_attempts=20, constraint_tolerance=1e-06), hybrid_factory=htf[phase], online_analysis_interval=setup_options['offline-freq'], online_analysis_minimum_iterations=10, flatness_criteria=setup_options['flatness-criteria'], gamma0=setup_options['gamma0']) hss[phase].setup(n_states=n_states, n_replicas=n_replicas, temperature=temperature, storage_file=reporter, lambda_protocol=lambda_protocol, endstates=endstates) elif setup_options['fe_type'] == 'repex': hss[phase] = HybridRepexSampler( mcmc_moves=mcmc.LangevinSplittingDynamicsMove( timestep=timestep, collision_rate=1.0 / unit.picosecond, n_steps=n_steps_per_move_application, reassign_velocities=False, n_restart_attempts=20, constraint_tolerance=1e-06), hybrid_factory=htf[phase], online_analysis_interval=setup_options['offline-freq']) hss[phase].setup(n_states=n_states, temperature=temperature, storage_file=reporter, lambda_protocol=lambda_protocol, endstates=endstates) else: _logger.info(f"omitting sampler construction") if serialize_systems: # save the systems and the states pass _logger.info('WRITING OUT XML FILES') #old_thermodynamic_state, new_thermodynamic_state, hybrid_thermodynamic_state, _ = generate_endpoint_thermodynamic_states(htf[phase].hybrid_system, _top_prop) xml_directory = f'{setup_options["trajectory_directory"]}/xml/' if not os.path.exists(xml_directory): os.makedirs(xml_directory) from perses.utils import data _logger.info('WRITING OUT XML FILES') _logger.info(f'Saving the hybrid, old and new system to disk') data.serialize( htf[phase].hybrid_system, f'{setup_options["trajectory_directory"]}/xml/{phase}-hybrid-system.gz' ) data.serialize( htf[phase]._old_system, f'{setup_options["trajectory_directory"]}/xml/{phase}-old-system.gz' ) data.serialize( htf[phase]._new_system, f'{setup_options["trajectory_directory"]}/xml/{phase}-new-system.gz' ) return { 'topology_proposals': top_prop, 'hybrid_topology_factories': htf, 'hybrid_samplers': hss }
def run_neq_fah_setup(ligand_file, old_ligand_index, new_ligand_index, forcefield_files, trajectory_directory, complex_box_dimensions=(9.8, 9.8, 9.8), solvent_box_dimensions=(3.5, 3.5, 3.5), timestep=4.0 * unit.femtosecond, eq_splitting='V R O R V', neq_splitting='V R H O R V', measure_shadow_work=False, pressure=1.0, temperature=300, solvent_padding=9 * unit.angstroms, phases=['complex', 'solvent', 'vacuum'], protein_pdb=None, receptor_mol2=None, small_molecule_forcefield='openff-1.0.0', small_molecule_parameters_cache=None, atom_expression=['IntType'], bond_expression=['DefaultBonds'], spectators=None, neglect_angles=False, anneal_14s=False, nonbonded_method='PME', map_strength=None, softcore_v2=False, save_setup_pickle_as=None, render_atom_map=False, alchemical_functions=DEFAULT_ALCHEMICAL_FUNCTIONS, num_equilibration_iterations=1000, num_equilibration_steps_per_iteration=250, nsteps_eq=250000, nsteps_neq=250000, fe_type='fah', collision_rate=1. / unit.picoseconds, collision_rate_setup=90. / unit.picoseconds, constraint_tolerance=1e-6, n_steps_per_move_application=250, globalVarFreq=250, **kwargs): """ main execution function that will: - create a directory for each phase according to the `trajectory_directory` argument - make a subdirectory named f"RUN_{old_ligand_index}_{new_ligand_index}" given the specified ligand indices of the `ligand_file` - create topology proposals for all phases - create/serialize hybrid factories or all phases (and validate endstates) - create/serialize an openmmtools.integrators.PeriodicNonequilibriumIntegrator for all phases - relax generated structures with a minimizer and LangevinIntegrator for all phases - create/serialize a state associated with the relaxed structures - create/serialize a `core.xml` object for all phases arguments ligand_file : str .sdf (or any openeye-readable) file containing ligand labeled indices and structures old_ligand_index : int index of the old ligand new_ligand_index : int inded of the new ligand forcefield_files : list of str list of forcefields to use for complex/solvent parameterization trajectory_directory : str RUNXXX for FAH deployment complex_box_dimensions : Vec3, default=(9.8, 9.8, 9.8) define box dimensions of complex phase solvent_box_dimensions : Vec3, default=(3.5, 3.5, 3.5) define box dimensions of solvent phase timestep : simtk.unit.Quantity, default=4.*unit.femtosecond step size of nonequilibrium integration eq_splitting : str, default = 'V R O R V' splitting string of relaxation dynamics neq_splitting : str, default = 'V R H O R V' splitting string of nonequilibrium dynamics measure_shadow_work : bool, default=False True/False to measure shadow work pressure: float, default=1. pressure in atms for simulation temperature: float, default=300., temperature in K for simulation phases: list, default = ['complex','solvent','vacuum'] phases to run, where allowed phases are 'complex','solvent','vacuum' protein_pdb : str, default=None name of protein file receptor_mol2 : str, default=None name of receptor file if protein_pdb not provided small_molecule_forcefield : str, default='openff-1.0.0' small molecule forcefield filename small_molecule_parameters_cache : str, default=None cache file containing small molecule forcefield files atom_expression : list default=['IntType'] list of string for atom mapping criteria. see oechem.OEExprOpts for options bond_expression : list default=['DefaultBonds'] list of string for bond mapping criteria. see oechem.OEExprOpts for options map_strength : 'str', default=None atom and bond expressions will be ignored, and either a 'weak', 'default' or 'strong' map_strength will be used. spectators : str, default=None path to any non-alchemical atoms in simulation neglect_angles : bool, default=False wether to use angle terms in building of unique-new groups. False is strongly recommended anneal_14s : bool, default False Whether to anneal 1,4 interactions over the protocol; nonbonded_method : str, default='PME' nonbonded method to use softcore_v2=bool, default=False wether to use v2 softcore alchemical_functions : dict, default=DEFAULT_ALCHEMICAL_FUNCTIONS alchemical functions for transformation num_equilibration_iterations: int, default=1000 number of equilibration steps to do during set up num_equilibration_steps_per_iteration: int, default=250, number of steps per iteration. default is 250 steps of 2fs, 1000 times which is 500ps of equilibration for SETUP nsteps_eq : int, default=250000 number of normal MD steps to take for FAH integrator for PRODUCTION nsteps_neq : int, default=250000 number of nonequilibrium steps to take for FAH integrator for PRODUCTION fe_type : str, default='fah' tells setup_relative_calculation() to use the fah pipeline collision_rate : simtk.unit.Quantity, default=1./unit.picosecond collision_rate for PRODUCTION collision_rate_setup : simtk.unit.Quantity, default=90./unit.picosecond constraint_tolerance : float, default=1e-6 tolerance to use for constraints n_steps_per_move_application : int default=250 number of equilibrium steps to take per move """ from perses.app.setup_relative_calculation import run_setup from perses.utils import data #turn all of the args into a dict for passing to run_setup setup_options = locals() if 'kwargs' in setup_options.keys(): setup_options.update(setup_options['kwargs']) #some modification for fah-specific functionality: setup_options['trajectory_prefix'] = None setup_options['anneal_1,4s'] = False from perses.utils.openeye import generate_expression setup_options['atom_expr'] = generate_expression( setup_options['atom_expression']) setup_options['bond_expr'] = generate_expression( setup_options['bond_expression']) #run the run_setup to generate topology proposals and htfs _logger.info(f"spectators: {setup_options['spectators']}") setup_dict = run_setup(setup_options, serialize_systems=False, build_samplers=False) topology_proposals = setup_dict['topology_proposals'] htfs = setup_dict['hybrid_topology_factories'] #create solvent and complex directories for phase in htfs.keys(): _logger.info(f'PHASE RUNNING: {phase}') _logger.info(f'Setting up phase {phase}') if phase == 'solvent': phase_dir = f"{setup_options['solvent_projid']}/RUNS" if phase == 'complex': phase_dir = f"{setup_options['complex_projid']}/RUNS" if phase == 'vacuum': phase_dir = 'VACUUM/RUNS' dir = os.path.join(os.getcwd(), phase_dir, trajectory_directory) if not os.path.exists(dir): os.mkdir(dir) np.savez_compressed(f'{dir}/htf', htfs[phase]) #serialize the hybrid_system data.serialize(htfs[phase].hybrid_system, f"{dir}/system.xml.bz2") #make and serialize an integrator integrator = make_neq_integrator(**setup_options) data.serialize(integrator, f"{dir}/integrator.xml") #create and serialize a state try: state = relax_structure( temperature=temperature, system=htfs[phase].hybrid_system, positions=htfs[phase].hybrid_positions, nequil=num_equilibration_iterations, n_steps_per_iteration=num_equilibration_steps_per_iteration, collision_rate=collision_rate_setup) data.serialize(state, f"{dir}/state.xml.bz2") except Exception as e: print(e) passed = False else: passed = True pos = state.getPositions(asNumpy=True) pos = np.asarray(pos) import mdtraj as md top = htfs[phase].hybrid_topology np.save(f'{dir}/hybrid_topology', top) traj = md.Trajectory(pos, top) traj.remove_solvent(exclude=['CL', 'NA'], inplace=True) traj.save(f'{dir}/hybrid_{phase}.pdb') #lastly, make a core.xml nsteps_per_cycle = 2 * nsteps_eq + 2 * nsteps_neq ncycles = 1 nsteps_per_ps = 250 core_parameters = { 'numSteps': ncycles * nsteps_per_cycle, 'xtcFreq': 1000 * nsteps_per_ps, # once per ns 'xtcAtoms': 'solute', 'precision': 'mixed', 'globalVarFilename': 'globals.csv', 'globalVarFreq': 10 * nsteps_per_ps, } # Serialize core.xml import dicttoxml with open(f'{dir}/core.xml', 'wt') as outfile: #core_parameters = create_core_parameters(phase) xml = dicttoxml.dicttoxml(core_parameters, custom_root='config', attr_type=False) from xml.dom.minidom import parseString dom = parseString(xml) outfile.write(dom.toprettyxml()) #create a logger for reference references = { 'start_ligand': old_ligand_index, 'end_ligand': new_ligand_index, 'protein_pdb': protein_pdb, 'passed_strucutre_relax': passed } np.save(f'{dir}/references', references) tp = topology_proposals from perses.utils.smallmolecules import render_atom_mapping render_atom_mapping(f'{dir}/atom_map.png', tp['ligand_oemol_old'], tp['ligand_oemol_new'], tp['non_offset_new_to_old_atom_map'])
def run_neq_fah_setup(ligand_file, old_ligand_index, new_ligand_index, forcefield_files, trajectory_directory, complex_box_dimensions=(9.8, 9.8, 9.8), solvent_box_dimensions=(3.5, 3.5, 3.5), timestep=4.0, eq_splitting='V R O R V', neq_splitting='V R H O R V', measure_shadow_work=False, pressure=1.0, temperature=300. * unit.kelvin, solvent_padding=9 * unit.angstroms, phases=['complex', 'solvent', 'vacuum'], phase_project_ids=None, protein_pdb=None, receptor_mol2=None, small_molecule_forcefield='openff-1.2.0', small_molecule_parameters_cache=None, atom_expression=['IntType'], bond_expression=['DefaultBonds'], spectators=None, neglect_angles=False, anneal_14s=False, nonbonded_method='PME', map_strength=None, softcore_v2=False, save_setup_pickle_as=None, render_atom_map=False, alchemical_functions=DEFAULT_ALCHEMICAL_FUNCTIONS, num_equilibration_iterations=1000, num_equilibration_steps_per_iteration=250, nsteps_eq=250000, nsteps_neq=250000, fe_type='fah', collision_rate=1. / unit.picoseconds, collision_rate_setup=90. / unit.picoseconds, constraint_tolerance=1e-6, n_steps_per_move_application=250, globalVarFreq=250, setup='small_molecule', protein_kwargs=None, ionic_strength=0.15 * unit.molar, remove_constraints='not water', **kwargs): """ main execution function that will: - create a directory for each phase according to the `trajectory_directory` argument - make a subdirectory named f"RUN_{old_ligand_index}_{new_ligand_index}" given the specified ligand indices of the `ligand_file` - create topology proposals for all phases - create/serialize hybrid factories or all phases (and validate endstates) - create/serialize an openmmtools.integrators.PeriodicNonequilibriumIntegrator for all phases - relax generated structures with a minimizer and LangevinIntegrator for all phases - create/serialize a state associated with the relaxed structures - create/serialize a `core.xml` object for all phases >>> run_neq_fah_setup('ligand.sdf', 0, 1,['amber/ff14SB.xml','amber/tip3p_standard.xml','amber/tip3p_HFE_multivalent.xml'],'RUN0',protein_pdb='protein.pdb', phases=['complex','solvent','vacuum'],phase_project_ids={'complex':14320,'solvent':14321,'vacuum':'vacuum'}) arguments ligand_file : str .sdf (or any openeye-readable) file containing ligand labeled indices and structures old_ligand_index : int index of the old ligand new_ligand_index : int inded of the new ligand forcefield_files : list of str list of forcefields to use for complex/solvent parameterization trajectory_directory : str RUNXXX for FAH deployment complex_box_dimensions : Vec3, default=(9.8, 9.8, 9.8) define box dimensions of complex phase (in nm) solvent_box_dimensions : Vec3, default=(3.5, 3.5, 3.5) define box dimensions of solvent phase (in nm) timestep : float, default=4. step size of nonequilibrium integration eq_splitting : str, default = 'V R O R V' splitting string of relaxation dynamics neq_splitting : str, default = 'V R H O R V' splitting string of nonequilibrium dynamics measure_shadow_work : bool, default=False True/False to measure shadow work pressure: float, default=1. pressure in atms for simulation temperature: simtk.unit.Quantity, default=300.*unit.kelvin, temperature in K for simulation phases: list, default = ['complex','solvent','vacuum','apo'] phases to run, where allowed phases are: 'complex','solvent','vacuum','apo' protein_pdb : str, default=None name of protein file receptor_mol2 : str, default=None name of receptor file if protein_pdb not provided small_molecule_forcefield : str, default='openff-1.0.0' small molecule forcefield filename small_molecule_parameters_cache : str, default=None cache file containing small molecule forcefield files atom_expression : list default=['IntType'] list of string for atom mapping criteria. see oechem.OEExprOpts for options bond_expression : list default=['DefaultBonds'] list of string for bond mapping criteria. see oechem.OEExprOpts for options map_strength : 'str', default=None atom and bond expressions will be ignored, and either a 'weak', 'default' or 'strong' map_strength will be used. spectators : str, default=None path to any non-alchemical atoms in simulation neglect_angles : bool, default=False wether to use angle terms in building of unique-new groups. False is strongly recommended anneal_14s : bool, default False Whether to anneal 1,4 interactions over the protocol; nonbonded_method : str, default='PME' nonbonded method to use softcore_v2=bool, default=False wether to use v2 softcore alchemical_functions : dict, default=DEFAULT_ALCHEMICAL_FUNCTIONS alchemical functions for transformation num_equilibration_iterations: int, default=1000 number of equilibration steps to do during set up num_equilibration_steps_per_iteration: int, default=250, number of steps per iteration. default is 250 steps of 2fs, 1000 times which is 500ps of equilibration for SETUP nsteps_eq : int, default=250000 number of normal MD steps to take for FAH integrator for PRODUCTION nsteps_neq : int, default=250000 number of nonequilibrium steps to take for FAH integrator for PRODUCTION fe_type : str, default='fah' tells setup_relative_calculation() to use the fah pipeline collision_rate : simtk.unit.Quantity, default=1./unit.picosecond collision_rate for PRODUCTION collision_rate_setup : simtk.unit.Quantity, default=90./unit.picosecond constraint_tolerance : float, default=1e-6 tolerance to use for constraints n_steps_per_move_application : int default=250 number of equilibrium steps to take per move """ from perses.utils import data if isinstance(temperature, float) or isinstance(temperature, int): temperature = temperature * unit.kelvin if isinstance(timestep, float) or isinstance(timestep, int): timestep = timestep * unit.femtosecond if isinstance(pressure, float) or isinstance(pressure, int): pressure = pressure * unit.atmosphere #turn all of the args into a dict for passing to run_setup # HBM - this doesn't feel particularly safe # Also, this means that the function can't run without being called by run(), as we are requiring things that aren't arguments to this function, like 'solvent_projid'...etc setup_options = locals() if 'kwargs' in setup_options.keys( ): #update the setup options w.r.t. kwargs setup_options.update(setup_options['kwargs']) if protein_kwargs is not None: #update the setup options w.r.t. the protein kwargs setup_options.update(setup_options['protein_kwargs']) if 'apo_box_dimensions' not in list(setup_options.keys()): setup_options['apo_box_dimensions'] = setup_options[ 'complex_box_dimensions'] #setups_allowed setups_allowed = ['small_molecule', 'protein'] assert setup in setups_allowed, f"setup {setup} not in setups_allowed: {setups_allowed}" # check there is a project_id for each phase for phase in phases: assert ( phase in phase_project_ids ), f"Phase {phase} requested, but not in phase_project_ids {phase_project_ids.keys()}" #some modification for fah-specific functionality: setup_options['trajectory_prefix'] = None setup_options['anneal_1,4s'] = False from perses.utils.openeye import generate_expression setup_options['atom_expr'] = generate_expression( setup_options['atom_expression']) setup_options['bond_expr'] = generate_expression( setup_options['bond_expression']) #run the run_setup to generate topology proposals and htfs _logger.info(f"spectators: {setup_options['spectators']}") if setup == 'small_molecule': from perses.app.setup_relative_calculation import run_setup setup_dict = run_setup(setup_options, serialize_systems=False, build_samplers=False) topology_proposals = setup_dict['topology_proposals'] htfs = setup_dict['hybrid_topology_factories'] elif setup == 'protein': from perses.app.relative_point_mutation_setup import PointMutationExecutor setup_engine = PointMutationExecutor(**setup_options) topology_proposals = { 'complex': setup_engine.get_complex_htf()._topology_proposal, 'apo': setup_engine.get_apo_htf()._topology_proposal } htfs = { 'complex': setup_engine.get_complex_htf(), 'apo': setup_engine.get_apo_htf() } #create solvent and complex directories for phase in htfs.keys(): _logger.info(f'Setting up phase {phase}') phase_dir = f"{phase_project_ids[phase]}/RUNS" dir = os.path.join(os.getcwd(), phase_dir, trajectory_directory) if not os.path.exists(dir): os.makedirs(dir) # TODO - replace this with actually saving the importand part of the HTF np.savez_compressed(f'{dir}/htf', htfs[phase]) #serialize the hybrid_system data.serialize(htfs[phase].hybrid_system, f"{dir}/system.xml.bz2") #make and serialize an integrator integrator = make_neq_integrator(**setup_options) data.serialize(integrator, f"{dir}/integrator.xml") #create and serialize a state try: state = relax_structure( temperature=temperature, system=htfs[phase].hybrid_system, positions=htfs[phase].hybrid_positions, nequil=num_equilibration_iterations, n_steps_per_iteration=num_equilibration_steps_per_iteration, collision_rate=collision_rate_setup, **kwargs) data.serialize(state, f"{dir}/state.xml.bz2") except Exception as e: _logger.warning(e) passed = False else: passed = True pos = state.getPositions(asNumpy=True) pos = np.asarray(pos) import mdtraj as md top = htfs[phase].hybrid_topology np.save(f'{dir}/hybrid_topology', top) traj = md.Trajectory(pos, top) traj.remove_solvent(exclude=['CL', 'NA'], inplace=True) traj.save(f'{dir}/hybrid_{phase}.pdb') #lastly, make a core.xml ### nsteps_per_cycle = 2 * nsteps_eq + 2 * nsteps_neq ncycles = 1 nsteps_per_ps = 250 nsteps = ncycles * nsteps_per_cycle make_core_file(numSteps=nsteps, xtcFreq=1000 * nsteps_per_ps, globalVarFreq=10 * nsteps_per_ps, directory=dir) #create a logger for reference # TODO - add more details to this references = { 'start_ligand': old_ligand_index, 'end_ligand': new_ligand_index, 'protein_pdb': protein_pdb, 'passed_strucutre_relax': passed } np.save(f'{dir}/references', references) tp = topology_proposals from perses.utils.smallmolecules import render_atom_mapping atom_map_filename = f'{dir}/atom_map.png' if setup == 'protein': from perses.utils.smallmolecules import render_protein_residue_atom_mapping render_protein_residue_atom_mapping(tp['apo'], atom_map_filename) else: old_ligand_oemol, new_ligand_oemol = tp['ligand_oemol_old'], tp[ 'ligand_oemol_new'] _map = tp['non_offset_new_to_old_atom_map'] render_atom_mapping(atom_map_filename, old_ligand_oemol, new_ligand_oemol, _map)
def validate_endstate_energies(topology_proposal, htf, added_energy, subtracted_energy, beta=1.0 / kT, ENERGY_THRESHOLD=1e-6, platform=DEFAULT_PLATFORM, trajectory_directory=None): """ Function to validate that the difference between the nonalchemical versus alchemical state at lambda = 0,1 is equal to the difference in valence energy (forward and reverse). Parameters ---------- topology_proposal : perses.topology_proposal.TopologyProposal object top_proposal for relevant transformation htf : perses.new_relative.HybridTopologyFactory object hybrid top factory for setting alchemical hybrid states added_energy : float reduced added valence energy subtracted_energy: float reduced subtracted valence energy Returns ------- zero_state_energy_difference : float reduced potential difference of the nonalchemical and alchemical lambda = 0 state (corrected for valence energy). one_state_energy_difference : float reduced potential difference of the nonalchemical and alchemical lambda = 1 state (corrected for valence energy). """ import copy #import openmmtools.cache as cache #context_cache = cache.global_context_cache from perses.dispersed.utils import configure_platform from perses.utils import data platform = configure_platform(platform.getName(), fallback_platform_name='Reference', precision='double') #create copies of old/new systems and set the dispersion correction top_proposal = copy.deepcopy(topology_proposal) forces = { top_proposal._old_system.getForce(index).__class__.__name__: top_proposal._old_system.getForce(index) for index in range(top_proposal._old_system.getNumForces()) } forces['NonbondedForce'].setUseDispersionCorrection(False) forces = { top_proposal._new_system.getForce(index).__class__.__name__: top_proposal._new_system.getForce(index) for index in range(top_proposal._new_system.getNumForces()) } forces['NonbondedForce'].setUseDispersionCorrection(False) #create copy of hybrid system, define old and new positions, and turn off dispersion correction hybrid_system = copy.deepcopy(htf.hybrid_system) hybrid_system_n_forces = hybrid_system.getNumForces() for force_index in range(hybrid_system_n_forces): forcename = hybrid_system.getForce(force_index).__class__.__name__ if forcename == 'NonbondedForce': hybrid_system.getForce(force_index).setUseDispersionCorrection( False) old_positions, new_positions = htf._old_positions, htf._new_positions #generate endpoint thermostates nonalch_zero, nonalch_one, alch_zero, alch_one = generate_endpoint_thermodynamic_states( hybrid_system, top_proposal) # compute reduced energies #for the nonalchemical systems... attrib_list = [('real-old', nonalch_zero, old_positions, top_proposal._old_system.getDefaultPeriodicBoxVectors()), ('hybrid-old', alch_zero, htf._hybrid_positions, hybrid_system.getDefaultPeriodicBoxVectors()), ('hybrid-new', alch_one, htf._hybrid_positions, hybrid_system.getDefaultPeriodicBoxVectors()), ('real-new', nonalch_one, new_positions, top_proposal._new_system.getDefaultPeriodicBoxVectors())] rp_list = [] for (state_name, state, pos, box_vectors) in attrib_list: integrator = openmm.VerletIntegrator(1.0 * unit.femtoseconds) context = state.create_context(integrator, platform) samplerstate = states.SamplerState(positions=pos, box_vectors=box_vectors) samplerstate.apply_to_context(context) rp = state.reduced_potential(context) rp_list.append(rp) energy_comps = compute_potential_components(context) for name, force in energy_comps: print("\t\t\t{}: {}".format(name, force)) _logger.debug( f'added forces:{sum([energy for name, energy in energy_comps])}') _logger.debug(f'rp: {rp}') if trajectory_directory is not None: _logger.info( f'Saving {state_name} state xml to {trajectory_directory}/{state_name}-state.gz' ) state = context.getState(getPositions=True, getVelocities=True, getForces=True, getEnergy=True, getParameters=True) data.serialize(state, f'{trajectory_directory}-{state_name}-state.gz') del context, integrator nonalch_zero_rp, alch_zero_rp, alch_one_rp, nonalch_one_rp = rp_list[ 0], rp_list[1], rp_list[2], rp_list[3] ratio = abs((nonalch_zero_rp - alch_zero_rp + added_energy) / (nonalch_zero_rp + alch_zero_rp + added_energy)) assert ratio < ENERGY_THRESHOLD, f"The ratio in energy difference for the ZERO state is {ratio}.\n This is greater than the threshold of {ENERGY_THRESHOLD}.\n real-zero: {nonalch_zero_rp} \n alc-zero: {alch_zero_rp} \nadded-valence: {added_energy}" ratio = abs((nonalch_one_rp - alch_one_rp + subtracted_energy) / (nonalch_one_rp + alch_one_rp + subtracted_energy)) assert ratio < ENERGY_THRESHOLD, f"The ratio in energy difference for the ONE state is {ratio}.\n This is greater than the threshold of {ENERGY_THRESHOLD}.\n real-one: {nonalch_one_rp} \n alc-one: {alch_one_rp} \nsubtracted-valence: {subtracted_energy}" return abs(nonalch_zero_rp - alch_zero_rp + added_energy), abs(nonalch_one_rp - alch_one_rp + subtracted_energy)