def _paprika_build_release_protocols( cls, orientation_replicator: ProtocolReplicator, restraint_schemas: Dict[str, ProtocolPath], solvation_template: coordinates.SolvateExistingStructure, minimization_template: openmm.OpenMMEnergyMinimisation, thermalization_template: openmm.OpenMMSimulation, equilibration_template: openmm.OpenMMSimulation, production_template: openmm.OpenMMSimulation, ): # Define a replicator to set up each release window release_replicator = ProtocolReplicator("release_replicator") release_replicator.template_values = ProtocolPath( "release_windows_indices", "global") orientation_placeholder = orientation_replicator.placeholder_id release_replicator_id = (f"{release_replicator.placeholder_id}_" f"{orientation_placeholder}") # Filter out only the solvent substance to help with the solvation step. filter_solvent = miscellaneous.FilterSubstanceByRole( "host-filter_solvent") filter_solvent.input_substance = ProtocolPath("host_substance", "global") filter_solvent.component_roles = [Component.Role.Solvent] # Construct a set of coordinates for a host molecule correctly # aligned to the z-axis. align_coordinates = PrepareReleaseCoordinates( "release_align_coordinates") align_coordinates.substance = ProtocolPath("host_substance", "global") align_coordinates.complex_file_path = ProtocolPath( "host_coordinate_path", "global") solvate_coordinates = copy.deepcopy(solvation_template) solvate_coordinates.id = "release_solvate_coordinates" solvate_coordinates.substance = ProtocolPath("filtered_substance", filter_solvent.id) solvate_coordinates.solute_coordinate_file = ProtocolPath( "output_coordinate_path", align_coordinates.id) # Apply the force field parameters. This only needs to be done for one # of the windows. apply_parameters = forcefield.BaseBuildSystem( "release_apply_parameters") apply_parameters.force_field_path = ProtocolPath( "force_field_path", "global") apply_parameters.substance = ProtocolPath("host_substance", "global") apply_parameters.coordinate_file_path = ProtocolPath( "coordinate_file_path", solvate_coordinates.id) # Add the dummy atoms. add_dummy_atoms = AddDummyAtoms("release_add_dummy_atoms") add_dummy_atoms.substance = ProtocolPath("host_substance", "global") add_dummy_atoms.input_coordinate_path = ProtocolPath( "coordinate_file_path", solvate_coordinates.id, ) add_dummy_atoms.input_system = ProtocolPath("parameterized_system", apply_parameters.id) add_dummy_atoms.offset = ProtocolPath("dummy_atom_offset", "global") # Apply the restraints files generate_restraints = GenerateReleaseRestraints( f"release_generate_restraints_{orientation_placeholder}") generate_restraints.host_coordinate_path = ProtocolPath( "output_coordinate_path", add_dummy_atoms.id) generate_restraints.release_lambdas = ProtocolPath( "release_lambdas", "global") generate_restraints.restraint_schemas = restraint_schemas apply_restraints = ApplyRestraints( f"release_apply_restraints_{release_replicator_id}") apply_restraints.restraints_path = ProtocolPath( "restraints_path", generate_restraints.id) apply_restraints.phase = "release" apply_restraints.window_index = ReplicatorValue(release_replicator.id) apply_restraints.input_system = ProtocolPath("output_system", add_dummy_atoms.id) # Setup the simulations for the release phase. ( release_minimization, release_thermalization, release_equilibration, release_production, ) = cls._paprika_build_simulation_protocols( ProtocolPath("output_coordinate_path", add_dummy_atoms.id), ProtocolPath("output_system", apply_restraints.id), "release", release_replicator_id, minimization_template, thermalization_template, equilibration_template, production_template, ) # Analyze the release phase. analyze_release_phase = AnalyzeAPRPhase( f"analyze_release_phase_{orientation_placeholder}") analyze_release_phase.topology_path = ProtocolPath( "output_coordinate_path", add_dummy_atoms.id) analyze_release_phase.trajectory_paths = ProtocolPath( "trajectory_file_path", release_production.id) analyze_release_phase.phase = "release" analyze_release_phase.restraints_path = ProtocolPath( "restraints_path", generate_restraints.id) # Return the full list of protocols which make up the release parts # of a host-guest APR calculation. protocols = [ filter_solvent, align_coordinates, solvate_coordinates, apply_parameters, add_dummy_atoms, generate_restraints, apply_restraints, release_minimization, release_thermalization, release_equilibration, release_production, analyze_release_phase, ] return ( protocols, release_replicator, ProtocolPath("result", analyze_release_phase.id), )
def default_yank_schema(existing_schema=None): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- existing_schema: SimulationSchema, optional An existing schema whose settings to use. If set, the schema's `workflow_schema` will be overwritten by this method. Returns ------- SimulationSchema The schema to follow when estimating this property. """ calculation_schema = SimulationSchema() if existing_schema is not None: assert isinstance(existing_schema, SimulationSchema) calculation_schema = copy.deepcopy(existing_schema) schema = WorkflowSchema( property_type=HostGuestBindingAffinity.__name__) schema.id = "{}{}".format(HostGuestBindingAffinity.__name__, "Schema") # Initial coordinate and topology setup. filter_ligand = miscellaneous.FilterSubstanceByRole("filter_ligand") filter_ligand.input_substance = ProtocolPath("substance", "global") filter_ligand.component_roles = [Component.Role.Ligand] # We only support substances with a single guest ligand. filter_ligand.expected_components = 1 schema.protocols[filter_ligand.id] = filter_ligand.schema # Construct the protocols which will (for now) take as input a set of host coordinates, # and generate a set of charges for them. filter_receptor = miscellaneous.FilterSubstanceByRole( "filter_receptor") filter_receptor.input_substance = ProtocolPath("substance", "global") filter_receptor.component_roles = [Component.Role.Receptor] # We only support substances with a single host receptor. filter_receptor.expected_components = 1 schema.protocols[filter_receptor.id] = filter_receptor.schema # Perform docking to position the guest within the host. perform_docking = coordinates.BuildDockedCoordinates("perform_docking") perform_docking.ligand_substance = ProtocolPath( "filtered_substance", filter_ligand.id) perform_docking.receptor_coordinate_file = ProtocolPath( "receptor_mol2", "global") schema.protocols[perform_docking.id] = perform_docking.schema # Solvate the docked structure using packmol filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent") filter_solvent.input_substance = ProtocolPath("substance", "global") filter_solvent.component_roles = [Component.Role.Solvent] schema.protocols[filter_solvent.id] = filter_solvent.schema solvate_complex = coordinates.SolvateExistingStructure( "solvate_complex") solvate_complex.max_molecules = 1000 solvate_complex.substance = ProtocolPath("filtered_substance", filter_solvent.id) solvate_complex.solute_coordinate_file = ProtocolPath( "docked_complex_coordinate_path", perform_docking.id) schema.protocols[solvate_complex.id] = solvate_complex.schema # Assign force field parameters to the solvated complex system. build_solvated_complex_system = forcefield.BaseBuildSystem( "build_solvated_complex_system") build_solvated_complex_system.force_field_path = ProtocolPath( "force_field_path", "global") build_solvated_complex_system.coordinate_file_path = ProtocolPath( "coordinate_file_path", solvate_complex.id) build_solvated_complex_system.substance = ProtocolPath( "substance", "global") build_solvated_complex_system.charged_molecule_paths = [ ProtocolPath("receptor_mol2", "global") ] schema.protocols[build_solvated_complex_system. id] = build_solvated_complex_system.schema # Solvate the ligand using packmol solvate_ligand = coordinates.SolvateExistingStructure("solvate_ligand") solvate_ligand.max_molecules = 1000 solvate_ligand.substance = ProtocolPath("filtered_substance", filter_solvent.id) solvate_ligand.solute_coordinate_file = ProtocolPath( "docked_ligand_coordinate_path", perform_docking.id) schema.protocols[solvate_ligand.id] = solvate_ligand.schema # Assign force field parameters to the solvated ligand system. build_solvated_ligand_system = forcefield.BaseBuildSystem( "build_solvated_ligand_system") build_solvated_ligand_system.force_field_path = ProtocolPath( "force_field_path", "global") build_solvated_ligand_system.coordinate_file_path = ProtocolPath( "coordinate_file_path", solvate_ligand.id) build_solvated_ligand_system.substance = ProtocolPath( "substance", "global") schema.protocols[build_solvated_ligand_system. id] = build_solvated_ligand_system.schema # Employ YANK to estimate the binding free energy. yank_protocol = yank.LigandReceptorYankProtocol("yank_protocol") yank_protocol.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") yank_protocol.number_of_iterations = 2000 yank_protocol.steps_per_iteration = 500 yank_protocol.checkpoint_interval = 10 yank_protocol.verbose = True yank_protocol.force_field_path = ProtocolPath("force_field_path", "global") yank_protocol.ligand_residue_name = ProtocolPath( "ligand_residue_name", perform_docking.id) yank_protocol.receptor_residue_name = ProtocolPath( "receptor_residue_name", perform_docking.id) yank_protocol.solvated_ligand_coordinates = ProtocolPath( "coordinate_file_path", solvate_ligand.id) yank_protocol.solvated_ligand_system = ProtocolPath( "parameterized_system", build_solvated_ligand_system.id) yank_protocol.solvated_complex_coordinates = ProtocolPath( "coordinate_file_path", solvate_complex.id) yank_protocol.solvated_complex_system = ProtocolPath( "parameterized_system", build_solvated_complex_system.id) schema.protocols[yank_protocol.id] = yank_protocol.schema # Define where the final values come from. schema.final_value_source = ProtocolPath("free_energy_difference", yank_protocol.id) calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema(absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=2000): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = (absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED) # Setup the fully solvated systems. build_full_coordinates = coordinates.BuildCoordinatesPackmol( "build_solvated_coordinates") build_full_coordinates.substance = ProtocolPath("substance", "global") build_full_coordinates.max_molecules = n_molecules assign_full_parameters = forcefield.BaseBuildSystem( "assign_solvated_parameters") assign_full_parameters.force_field_path = ProtocolPath( "force_field_path", "global") assign_full_parameters.substance = ProtocolPath("substance", "global") assign_full_parameters.coordinate_file_path = ProtocolPath( "coordinate_file_path", build_full_coordinates.id) # Perform a quick minimisation of the full system to give # YANK a better starting point for its minimisation. energy_minimisation = openmm.OpenMMEnergyMinimisation( "energy_minimisation") energy_minimisation.system_path = ProtocolPath( "system_path", assign_full_parameters.id) energy_minimisation.input_coordinate_file = ProtocolPath( "coordinate_file_path", build_full_coordinates.id) equilibration_simulation = openmm.OpenMMSimulation( "equilibration_simulation") equilibration_simulation.ensemble = Ensemble.NPT equilibration_simulation.steps_per_iteration = 100000 equilibration_simulation.output_frequency = 10000 equilibration_simulation.timestep = 2.0 * unit.femtosecond equilibration_simulation.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") equilibration_simulation.system_path = ProtocolPath( "system_path", assign_full_parameters.id) equilibration_simulation.input_coordinate_file = ProtocolPath( "output_coordinate_file", energy_minimisation.id) # Create a substance which only contains the solute (e.g. for the # vacuum phase simulations). filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent") filter_solvent.input_substance = ProtocolPath("substance", "global") filter_solvent.component_roles = [Component.Role.Solvent] filter_solute = miscellaneous.FilterSubstanceByRole("filter_solute") filter_solute.input_substance = ProtocolPath("substance", "global") filter_solute.component_roles = [Component.Role.Solute] # Setup the solute in vacuum system. build_vacuum_coordinates = coordinates.BuildCoordinatesPackmol( "build_vacuum_coordinates") build_vacuum_coordinates.substance = ProtocolPath( "filtered_substance", filter_solute.id) build_vacuum_coordinates.max_molecules = 1 assign_vacuum_parameters = forcefield.BaseBuildSystem( "assign_parameters") assign_vacuum_parameters.force_field_path = ProtocolPath( "force_field_path", "global") assign_vacuum_parameters.substance = ProtocolPath( "filtered_substance", filter_solute.id) assign_vacuum_parameters.coordinate_file_path = ProtocolPath( "coordinate_file_path", build_vacuum_coordinates.id) # Set up the protocol to run yank. run_yank = yank.SolvationYankProtocol("run_solvation_yank") run_yank.solute = ProtocolPath("filtered_substance", filter_solute.id) run_yank.solvent_1 = ProtocolPath("filtered_substance", filter_solvent.id) run_yank.solvent_2 = Substance() run_yank.thermodynamic_state = ProtocolPath("thermodynamic_state", "global") run_yank.steps_per_iteration = 500 run_yank.checkpoint_interval = 50 run_yank.solvent_1_coordinates = ProtocolPath( "output_coordinate_file", equilibration_simulation.id) run_yank.solvent_1_system = ProtocolPath("system_path", assign_full_parameters.id) run_yank.solvent_2_coordinates = ProtocolPath( "coordinate_file_path", build_vacuum_coordinates.id) run_yank.solvent_2_system = ProtocolPath("system_path", assign_vacuum_parameters.id) # Set up the group which will run yank until the free energy has been determined to within # a given uncertainty conditional_group = groups.ConditionalGroup("conditional_group") conditional_group.max_iterations = 20 if use_target_uncertainty: condition = groups.ConditionalGroup.Condition() condition.type = groups.ConditionalGroup.Condition.Type.LessThan condition.right_hand_value = ProtocolPath("target_uncertainty", "global") condition.left_hand_value = ProtocolPath( "estimated_free_energy.error", conditional_group.id, run_yank.id) conditional_group.add_condition(condition) # Define the total number of iterations that yank should run for. total_iterations = miscellaneous.MultiplyValue("total_iterations") total_iterations.value = 2000 total_iterations.multiplier = ProtocolPath("current_iteration", conditional_group.id) # Make sure the simulations gets extended after each iteration. run_yank.number_of_iterations = ProtocolPath("result", total_iterations.id) conditional_group.add_protocols(total_iterations, run_yank) # Define the full workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ build_full_coordinates.schema, assign_full_parameters.schema, energy_minimisation.schema, equilibration_simulation.schema, filter_solvent.schema, filter_solute.schema, build_vacuum_coordinates.schema, assign_vacuum_parameters.schema, conditional_group.schema, ] schema.final_value_source = ProtocolPath("estimated_free_energy", conditional_group.id, run_yank.id) calculation_schema.workflow_schema = schema return calculation_schema
def generate_base_reweighting_protocols( statistical_inefficiency: S, reweight_observable: T, replicator_id: str = "data_replicator", id_suffix: str = "", ) -> Tuple[ReweightingProtocols[S, T], ProtocolReplicator]: """Constructs a set of protocols which, when combined in a workflow schema, may be executed to reweight a set of cached simulation data to estimate the average value of an observable. Parameters ---------- statistical_inefficiency The protocol which will be used to compute the statistical inefficiency and equilibration time of the observable of interest. This information will be used to decorrelate the cached data prior to reweighting. reweight_observable The MBAR reweighting protocol to use to reweight the observable to the target state. This method will automatically set the reduced potentials on the object. replicator_id: str The id to use for the cached data replicator. id_suffix: str A string suffix to append to each of the protocol ids. Returns ------- The protocols to add to the workflow, a reference to the average value of the estimated observable (an ``Observable`` object), and the replicator which will clone the workflow for each piece of cached simulation data. """ # Create the replicator which will apply these protocol once for each piece of # cached simulation data. data_replicator = ProtocolReplicator(replicator_id=replicator_id) data_replicator.template_values = ProtocolPath("full_system_data", "global") # Validate the inputs. assert isinstance(statistical_inefficiency, analysis.BaseAverageObservable) assert data_replicator.placeholder_id in statistical_inefficiency.id assert data_replicator.placeholder_id not in reweight_observable.id replicator_suffix = f"_{data_replicator.placeholder_id}{id_suffix}" # Unpack all the of the stored data. unpack_stored_data = storage.UnpackStoredSimulationData( "unpack_data{}".format(replicator_suffix) ) unpack_stored_data.simulation_data_path = ReplicatorValue(replicator_id) # Join the individual trajectories together. join_trajectories = reweighting.ConcatenateTrajectories( f"join_trajectories{id_suffix}" ) join_trajectories.input_coordinate_paths = ProtocolPath( "coordinate_file_path", unpack_stored_data.id ) join_trajectories.input_trajectory_paths = ProtocolPath( "trajectory_file_path", unpack_stored_data.id ) join_observables = reweighting.ConcatenateObservables( f"join_observables{id_suffix}" ) join_observables.input_observables = ProtocolPath( "observables", unpack_stored_data.id ) # Calculate the reduced potentials for each of the reference states. build_reference_system = forcefield.BaseBuildSystem( f"build_system{replicator_suffix}" ) build_reference_system.force_field_path = ProtocolPath( "force_field_path", unpack_stored_data.id ) build_reference_system.coordinate_file_path = ProtocolPath( "coordinate_file_path", unpack_stored_data.id ) build_reference_system.substance = ProtocolPath("substance", unpack_stored_data.id) reduced_reference_potential = openmm.OpenMMEvaluateEnergies( f"reduced_potential{replicator_suffix}" ) reduced_reference_potential.parameterized_system = ProtocolPath( "parameterized_system", build_reference_system.id ) reduced_reference_potential.thermodynamic_state = ProtocolPath( "thermodynamic_state", unpack_stored_data.id ) reduced_reference_potential.coordinate_file_path = ProtocolPath( "coordinate_file_path", unpack_stored_data.id ) reduced_reference_potential.trajectory_file_path = ProtocolPath( "output_trajectory_path", join_trajectories.id ) # Calculate the reduced potential of the target state. build_target_system = forcefield.BaseBuildSystem(f"build_system_target{id_suffix}") build_target_system.force_field_path = ProtocolPath("force_field_path", "global") build_target_system.substance = ProtocolPath("substance", "global") build_target_system.coordinate_file_path = ProtocolPath( "output_coordinate_path", join_trajectories.id ) reduced_target_potential = openmm.OpenMMEvaluateEnergies( f"reduced_potential_target{id_suffix}" ) reduced_target_potential.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global" ) reduced_target_potential.parameterized_system = ProtocolPath( "parameterized_system", build_target_system.id ) reduced_target_potential.coordinate_file_path = ProtocolPath( "output_coordinate_path", join_trajectories.id ) reduced_target_potential.trajectory_file_path = ProtocolPath( "output_trajectory_path", join_trajectories.id ) reduced_target_potential.gradient_parameters = ProtocolPath( "parameter_gradient_keys", "global" ) # Compute the observable gradients. zero_gradients = gradients.ZeroGradients(f"zero_gradients{id_suffix}") zero_gradients.force_field_path = ProtocolPath("force_field_path", "global") zero_gradients.gradient_parameters = ProtocolPath( "parameter_gradient_keys", "global" ) # Decorrelate the target potentials and observables. if not isinstance(statistical_inefficiency, analysis.BaseAverageObservable): raise NotImplementedError() decorrelate_target_potential = analysis.DecorrelateObservables( f"decorrelate_target_potential{id_suffix}" ) decorrelate_target_potential.time_series_statistics = ProtocolPath( "time_series_statistics", statistical_inefficiency.id ) decorrelate_target_potential.input_observables = ProtocolPath( "output_observables", reduced_target_potential.id ) decorrelate_observable = analysis.DecorrelateObservables( f"decorrelate_observable{id_suffix}" ) decorrelate_observable.time_series_statistics = ProtocolPath( "time_series_statistics", statistical_inefficiency.id ) decorrelate_observable.input_observables = ProtocolPath( "output_observables", zero_gradients.id ) # Decorrelate the reference potentials. Due to a quirk of how workflow replicators # work the time series statistics need to be passed via a dummy protocol first. # # Because the `statistical_inefficiency` and `decorrelate_reference_potential` # protocols are replicated by the same replicator the `time_series_statistics` # input of `decorrelate_reference_potential_X` will take its value from # the `time_series_statistics` output of `statistical_inefficiency_X` rather than # as a list of of [statistical_inefficiency_0.time_series_statistics... # statistical_inefficiency_N.time_series_statistics]. Passing the statistics via # an un-replicated intermediate resolves this. replicate_statistics = miscellaneous.DummyProtocol( f"replicated_statistics{id_suffix}" ) replicate_statistics.input_value = ProtocolPath( "time_series_statistics", statistical_inefficiency.id ) decorrelate_reference_potential = analysis.DecorrelateObservables( f"decorrelate_reference_potential{replicator_suffix}" ) decorrelate_reference_potential.time_series_statistics = ProtocolPath( "output_value", replicate_statistics.id ) decorrelate_reference_potential.input_observables = ProtocolPath( "output_observables", reduced_reference_potential.id ) # Finally, apply MBAR to get the reweighted value. reweight_observable.reference_reduced_potentials = ProtocolPath( "output_observables[ReducedPotential]", decorrelate_reference_potential.id ) reweight_observable.target_reduced_potentials = ProtocolPath( "output_observables[ReducedPotential]", decorrelate_target_potential.id ) reweight_observable.observable = ProtocolPath( "output_observables", decorrelate_observable.id ) reweight_observable.frame_counts = ProtocolPath( "time_series_statistics.n_uncorrelated_points", statistical_inefficiency.id ) protocols = ReweightingProtocols( unpack_stored_data, # join_trajectories, join_observables, # build_reference_system, reduced_reference_potential, # build_target_system, reduced_target_potential, # statistical_inefficiency, replicate_statistics, # decorrelate_reference_potential, decorrelate_target_potential, # decorrelate_observable, zero_gradients, # reweight_observable, ) return protocols, data_replicator
def generate_simulation_protocols( analysis_protocol: S, use_target_uncertainty: bool, id_suffix: str = "", conditional_group: Optional[ConditionalGroup] = None, n_molecules: int = 1000, ) -> Tuple[SimulationProtocols[S], ProtocolPath, StoredSimulationData]: """Constructs a set of protocols which, when combined in a workflow schema, may be executed to run a single simulation to estimate the average value of an observable. The protocols returned will: 1) Build a set of liquid coordinates for the property substance using packmol. 2) Assign a set of smirnoff force field parameters to the system. 3) Perform an energy minimisation on the system. 4) Run a short NPT equilibration simulation for 100000 steps using a timestep of 2fs. 5) Within a conditional group (up to a maximum of 100 times): 5a) Run a longer NPT production simulation for 1000000 steps using a timestep of 2fs 5b) Extract the average value of an observable and it's uncertainty. 5c) If a convergence mode is set by the options, check if the target uncertainty has been met. If not, repeat steps 5a), 5b) and 5c). 6) Extract uncorrelated configurations from a generated production simulation. 7) Extract uncorrelated statistics from a generated production simulation. Parameters ---------- analysis_protocol The protocol which will extract the observable of interest from the generated simulation data. use_target_uncertainty Whether to run the simulation until the observable is estimated to within the target uncertainty. id_suffix: str A string suffix to append to each of the protocol ids. conditional_group: ProtocolGroup, optional A custom group to wrap the main simulation / extraction protocols within. It is up to the caller of this method to manually add the convergence conditions to this group. If `None`, a default group with uncertainty convergence conditions is automatically constructed. n_molecules: int The number of molecules to use in the workflow. Returns ------- The protocols to add to the workflow, a reference to the average value of the estimated observable (an ``Observable`` object), and an object which describes the default data from a simulation to store, such as the uncorrelated statistics and configurations. """ build_coordinates = coordinates.BuildCoordinatesPackmol( f"build_coordinates{id_suffix}" ) build_coordinates.substance = ProtocolPath("substance", "global") build_coordinates.max_molecules = n_molecules assign_parameters = forcefield.BaseBuildSystem(f"assign_parameters{id_suffix}") assign_parameters.force_field_path = ProtocolPath("force_field_path", "global") assign_parameters.coordinate_file_path = ProtocolPath( "coordinate_file_path", build_coordinates.id ) assign_parameters.substance = ProtocolPath("output_substance", build_coordinates.id) # Equilibration energy_minimisation = openmm.OpenMMEnergyMinimisation( f"energy_minimisation{id_suffix}" ) energy_minimisation.input_coordinate_file = ProtocolPath( "coordinate_file_path", build_coordinates.id ) energy_minimisation.parameterized_system = ProtocolPath( "parameterized_system", assign_parameters.id ) equilibration_simulation = openmm.OpenMMSimulation( f"equilibration_simulation{id_suffix}" ) equilibration_simulation.ensemble = Ensemble.NPT equilibration_simulation.steps_per_iteration = 100000 equilibration_simulation.output_frequency = 5000 equilibration_simulation.timestep = 2.0 * unit.femtosecond equilibration_simulation.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global" ) equilibration_simulation.input_coordinate_file = ProtocolPath( "output_coordinate_file", energy_minimisation.id ) equilibration_simulation.parameterized_system = ProtocolPath( "parameterized_system", assign_parameters.id ) # Production production_simulation = openmm.OpenMMSimulation(f"production_simulation{id_suffix}") production_simulation.ensemble = Ensemble.NPT production_simulation.steps_per_iteration = 1000000 production_simulation.output_frequency = 2000 production_simulation.timestep = 2.0 * unit.femtosecond production_simulation.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global" ) production_simulation.input_coordinate_file = ProtocolPath( "output_coordinate_file", equilibration_simulation.id ) production_simulation.parameterized_system = ProtocolPath( "parameterized_system", assign_parameters.id ) production_simulation.gradient_parameters = ProtocolPath( "parameter_gradient_keys", "global" ) # Set up a conditional group to ensure convergence of uncertainty if conditional_group is None: conditional_group = groups.ConditionalGroup(f"conditional_group{id_suffix}") conditional_group.max_iterations = 100 if use_target_uncertainty: condition = groups.ConditionalGroup.Condition() condition.right_hand_value = ProtocolPath("target_uncertainty", "global") condition.type = groups.ConditionalGroup.Condition.Type.LessThan condition.left_hand_value = ProtocolPath( "value.error", conditional_group.id, analysis_protocol.id ) conditional_group.add_condition(condition) # Make sure the simulation gets extended after each iteration. production_simulation.total_number_of_iterations = ProtocolPath( "current_iteration", conditional_group.id ) conditional_group.add_protocols(production_simulation, analysis_protocol) # Point the analyse protocol to the correct data sources if not isinstance(analysis_protocol, analysis.BaseAverageObservable): raise ValueError( "The analysis protocol must inherit from either the " "AverageTrajectoryObservable or BaseAverageObservable " "protocols." ) analysis_protocol.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global" ) analysis_protocol.potential_energies = ProtocolPath( f"observables[{ObservableType.PotentialEnergy.value}]", production_simulation.id, ) # Finally, extract uncorrelated data time_series_statistics = ProtocolPath( "time_series_statistics", conditional_group.id, analysis_protocol.id ) coordinate_file = ProtocolPath( "output_coordinate_file", conditional_group.id, production_simulation.id ) trajectory_path = ProtocolPath( "trajectory_file_path", conditional_group.id, production_simulation.id ) observables = ProtocolPath( "observables", conditional_group.id, production_simulation.id ) decorrelate_trajectory = analysis.DecorrelateTrajectory( f"decorrelate_trajectory{id_suffix}" ) decorrelate_trajectory.time_series_statistics = time_series_statistics decorrelate_trajectory.input_coordinate_file = coordinate_file decorrelate_trajectory.input_trajectory_path = trajectory_path decorrelate_observables = analysis.DecorrelateObservables( f"decorrelate_observables{id_suffix}" ) decorrelate_observables.time_series_statistics = time_series_statistics decorrelate_observables.input_observables = observables # Build the object which defines which pieces of simulation data to store. output_to_store = StoredSimulationData() output_to_store.thermodynamic_state = ProtocolPath("thermodynamic_state", "global") output_to_store.property_phase = PropertyPhase.Liquid output_to_store.force_field_id = PlaceholderValue() output_to_store.number_of_molecules = ProtocolPath( "output_number_of_molecules", build_coordinates.id ) output_to_store.substance = ProtocolPath("output_substance", build_coordinates.id) output_to_store.statistical_inefficiency = ProtocolPath( "time_series_statistics.statistical_inefficiency", conditional_group.id, analysis_protocol.id, ) output_to_store.observables = ProtocolPath( "output_observables", decorrelate_observables.id ) output_to_store.trajectory_file_name = ProtocolPath( "output_trajectory_path", decorrelate_trajectory.id ) output_to_store.coordinate_file_name = coordinate_file output_to_store.source_calculation_id = PlaceholderValue() # Define where the final values come from. final_value_source = ProtocolPath( "value", conditional_group.id, analysis_protocol.id ) base_protocols = SimulationProtocols( build_coordinates, assign_parameters, energy_minimisation, equilibration_simulation, production_simulation, analysis_protocol, conditional_group, decorrelate_trajectory, decorrelate_observables, ) return base_protocols, final_value_source, output_to_store
def generate_base_reweighting_protocols( analysis_protocol, mbar_protocol, replicator_id="data_repl", id_suffix="", ): """Constructs a set of protocols which, when combined in a workflow schema, may be executed to reweight a set of existing data to estimate a particular property. The reweighted observable of interest will be calculated by following the passed in `analysis_protocol`. Parameters ---------- analysis_protocol: AveragePropertyProtocol The protocol which will take input from the stored data, and generate a set of observables to reweight. mbar_protocol: BaseReweightingProtocol A template mbar reweighting protocol, which has it's reference observables already set. This method will automatically set the reduced potentials on this object. replicator_id: str The id to use for the data replicator. id_suffix: str A string suffix to append to each of the protocol ids. Returns ------- BaseReweightingProtocols: A named tuple of the protocol which should form the bulk of a property estimation workflow. ProtocolReplicator: A replicator which will clone the workflow for each piece of stored data. """ assert isinstance(analysis_protocol, analysis.AveragePropertyProtocol) assert f"$({replicator_id})" in analysis_protocol.id assert f"$({replicator_id})" not in mbar_protocol.id replicator_suffix = "_$({}){}".format(replicator_id, id_suffix) # Unpack all the of the stored data. unpack_stored_data = storage.UnpackStoredSimulationData( "unpack_data{}".format(replicator_suffix)) unpack_stored_data.simulation_data_path = ReplicatorValue(replicator_id) # The autocorrelation time of each of the stored files will be calculated for this property # using the passed in analysis protocol. if isinstance(analysis_protocol, analysis.ExtractAverageStatistic): analysis_protocol.statistics_path = ProtocolPath( "statistics_file_path", unpack_stored_data.id) elif isinstance(analysis_protocol, analysis.AverageTrajectoryProperty): analysis_protocol.input_coordinate_file = ProtocolPath( "coordinate_file_path", unpack_stored_data.id) analysis_protocol.trajectory_path = ProtocolPath( "trajectory_file_path", unpack_stored_data.id) # Decorrelate the frames of the stored trajectory and statistics arrays. decorrelate_statistics = analysis.ExtractUncorrelatedStatisticsData( "decorrelate_stats{}".format(replicator_suffix)) decorrelate_statistics.statistical_inefficiency = ProtocolPath( "statistical_inefficiency", analysis_protocol.id) decorrelate_statistics.equilibration_index = ProtocolPath( "equilibration_index", analysis_protocol.id) decorrelate_statistics.input_statistics_path = ProtocolPath( "statistics_file_path", unpack_stored_data.id) decorrelate_trajectory = analysis.ExtractUncorrelatedTrajectoryData( "decorrelate_traj{}".format(replicator_suffix)) decorrelate_trajectory.statistical_inefficiency = ProtocolPath( "statistical_inefficiency", analysis_protocol.id) decorrelate_trajectory.equilibration_index = ProtocolPath( "equilibration_index", analysis_protocol.id) decorrelate_trajectory.input_coordinate_file = ProtocolPath( "coordinate_file_path", unpack_stored_data.id) decorrelate_trajectory.input_trajectory_path = ProtocolPath( "trajectory_file_path", unpack_stored_data.id) # Stitch together all of the trajectories join_trajectories = reweighting.ConcatenateTrajectories("concat_traj" + id_suffix) join_trajectories.input_coordinate_paths = ProtocolPath( "coordinate_file_path", unpack_stored_data.id) join_trajectories.input_trajectory_paths = ProtocolPath( "output_trajectory_path", decorrelate_trajectory.id) join_statistics = reweighting.ConcatenateStatistics("concat_stats" + id_suffix) join_statistics.input_statistics_paths = ProtocolPath( "output_statistics_path", decorrelate_statistics.id) # Calculate the reduced potentials for each of the reference states. build_reference_system = forcefield.BaseBuildSystem( "build_system{}".format(replicator_suffix)) build_reference_system.force_field_path = ProtocolPath( "force_field_path", unpack_stored_data.id) build_reference_system.substance = ProtocolPath("substance", unpack_stored_data.id) build_reference_system.coordinate_file_path = ProtocolPath( "coordinate_file_path", unpack_stored_data.id) reduced_reference_potential = openmm.OpenMMReducedPotentials( "reduced_potential{}".format(replicator_suffix)) reduced_reference_potential.system_path = ProtocolPath( "system_path", build_reference_system.id) reduced_reference_potential.thermodynamic_state = ProtocolPath( "thermodynamic_state", unpack_stored_data.id) reduced_reference_potential.coordinate_file_path = ProtocolPath( "coordinate_file_path", unpack_stored_data.id) reduced_reference_potential.trajectory_file_path = ProtocolPath( "output_trajectory_path", join_trajectories.id) reduced_reference_potential.kinetic_energies_path = ProtocolPath( "output_statistics_path", join_statistics.id) # Calculate the reduced potential of the target state. build_target_system = forcefield.BaseBuildSystem("build_system_target" + id_suffix) build_target_system.force_field_path = ProtocolPath( "force_field_path", "global") build_target_system.substance = ProtocolPath("substance", "global") build_target_system.coordinate_file_path = ProtocolPath( "output_coordinate_path", join_trajectories.id) reduced_target_potential = openmm.OpenMMReducedPotentials( "reduced_potential_target" + id_suffix) reduced_target_potential.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") reduced_target_potential.system_path = ProtocolPath( "system_path", build_target_system.id) reduced_target_potential.coordinate_file_path = ProtocolPath( "output_coordinate_path", join_trajectories.id) reduced_target_potential.trajectory_file_path = ProtocolPath( "output_trajectory_path", join_trajectories.id) reduced_target_potential.kinetic_energies_path = ProtocolPath( "output_statistics_path", join_statistics.id) # Finally, apply MBAR to get the reweighted value. mbar_protocol.reference_reduced_potentials = ProtocolPath( "statistics_file_path", reduced_reference_potential.id) mbar_protocol.target_reduced_potentials = ProtocolPath( "statistics_file_path", reduced_target_potential.id) if (isinstance(mbar_protocol, reweighting.ReweightStatistics) and mbar_protocol.statistics_type != ObservableType.PotentialEnergy and mbar_protocol.statistics_type != ObservableType.TotalEnergy and mbar_protocol.statistics_type != ObservableType.Enthalpy and mbar_protocol.statistics_type != ObservableType.ReducedPotential): mbar_protocol.statistics_paths = ProtocolPath( "output_statistics_path", decorrelate_statistics.id) elif isinstance(mbar_protocol, reweighting.ReweightStatistics): mbar_protocol.statistics_paths = [ ProtocolPath("statistics_file_path", reduced_target_potential.id) ] mbar_protocol.frame_counts = ProtocolPath( "number_of_uncorrelated_samples", decorrelate_statistics.id) base_protocols = BaseReweightingProtocols( unpack_stored_data, analysis_protocol, decorrelate_statistics, decorrelate_trajectory, join_trajectories, join_statistics, build_reference_system, reduced_reference_potential, build_target_system, reduced_target_potential, mbar_protocol, ) # Create the replicator object. component_replicator = ProtocolReplicator(replicator_id=replicator_id) component_replicator.template_values = ProtocolPath( "full_system_data", "global") return base_protocols, component_replicator