Пример #1
0
    def _paprika_default_simulation_protocols(
        cls,
        n_thermalization_steps: int,
        n_equilibration_steps: int,
        n_production_steps: int,
        dt_thermalization: unit.Quantity,
        dt_equilibration: unit.Quantity,
        dt_production: unit.Quantity,
    ) -> Tuple[openmm.OpenMMEnergyMinimisation, openmm.OpenMMSimulation,
               openmm.OpenMMSimulation, openmm.OpenMMSimulation, ]:
        """Returns the default set of simulation protocols to use for each window
        of an APR calculation.

        Parameters
        ----------
        n_thermalization_steps
            The number of thermalization simulations steps to perform.
            Sample generated during this step will be discarded.
        n_equilibration_steps
            The number of equilibration simulations steps to perform.
            Sample generated during this step will be discarded.
        n_production_steps
            The number of production simulations steps to perform.
            Sample generated during this step will be used in the final
            free energy calculation.
        dt_thermalization
            The integration timestep during thermalization
        dt_equilibration
            The integration timestep during equilibration
        dt_production
            The integration timestep during production

        Returns
        -------
            A protocol to perform an energy minimization, a thermalization,
            an equilibration, and finally a production simulation.
        """
        energy_minimisation = openmm.OpenMMEnergyMinimisation("")

        thermalization = openmm.OpenMMSimulation("")
        thermalization.steps_per_iteration = n_thermalization_steps
        thermalization.output_frequency = 10000
        thermalization.timestep = dt_thermalization

        equilibration = openmm.OpenMMSimulation("")
        equilibration.steps_per_iteration = n_equilibration_steps
        equilibration.output_frequency = 10000
        equilibration.timestep = dt_equilibration

        production = openmm.OpenMMSimulation("")
        production.steps_per_iteration = n_production_steps
        production.output_frequency = 5000
        production.timestep = dt_production

        return energy_minimisation, thermalization, equilibration, production
Пример #2
0
    def default_simulation_schema(absolute_tolerance=UNDEFINED,
                                  relative_tolerance=UNDEFINED,
                                  n_molecules=2000):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (absolute_tolerance != UNDEFINED
                                  or relative_tolerance != UNDEFINED)

        # Setup the fully solvated systems.
        build_full_coordinates = coordinates.BuildCoordinatesPackmol(
            "build_solvated_coordinates")
        build_full_coordinates.substance = ProtocolPath("substance", "global")
        build_full_coordinates.max_molecules = n_molecules

        assign_full_parameters = forcefield.BaseBuildSystem(
            "assign_solvated_parameters")
        assign_full_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        assign_full_parameters.substance = ProtocolPath("substance", "global")
        assign_full_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", build_full_coordinates.id)

        # Perform a quick minimisation of the full system to give
        # YANK a better starting point for its minimisation.
        energy_minimisation = openmm.OpenMMEnergyMinimisation(
            "energy_minimisation")
        energy_minimisation.system_path = ProtocolPath(
            "system_path", assign_full_parameters.id)
        energy_minimisation.input_coordinate_file = ProtocolPath(
            "coordinate_file_path", build_full_coordinates.id)

        equilibration_simulation = openmm.OpenMMSimulation(
            "equilibration_simulation")
        equilibration_simulation.ensemble = Ensemble.NPT
        equilibration_simulation.steps_per_iteration = 100000
        equilibration_simulation.output_frequency = 10000
        equilibration_simulation.timestep = 2.0 * unit.femtosecond
        equilibration_simulation.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")
        equilibration_simulation.system_path = ProtocolPath(
            "system_path", assign_full_parameters.id)
        equilibration_simulation.input_coordinate_file = ProtocolPath(
            "output_coordinate_file", energy_minimisation.id)

        # Create a substance which only contains the solute (e.g. for the
        # vacuum phase simulations).
        filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent")
        filter_solvent.input_substance = ProtocolPath("substance", "global")
        filter_solvent.component_roles = [Component.Role.Solvent]

        filter_solute = miscellaneous.FilterSubstanceByRole("filter_solute")
        filter_solute.input_substance = ProtocolPath("substance", "global")
        filter_solute.component_roles = [Component.Role.Solute]

        # Setup the solute in vacuum system.
        build_vacuum_coordinates = coordinates.BuildCoordinatesPackmol(
            "build_vacuum_coordinates")
        build_vacuum_coordinates.substance = ProtocolPath(
            "filtered_substance", filter_solute.id)
        build_vacuum_coordinates.max_molecules = 1

        assign_vacuum_parameters = forcefield.BaseBuildSystem(
            "assign_parameters")
        assign_vacuum_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        assign_vacuum_parameters.substance = ProtocolPath(
            "filtered_substance", filter_solute.id)
        assign_vacuum_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", build_vacuum_coordinates.id)

        # Set up the protocol to run yank.
        run_yank = yank.SolvationYankProtocol("run_solvation_yank")
        run_yank.solute = ProtocolPath("filtered_substance", filter_solute.id)
        run_yank.solvent_1 = ProtocolPath("filtered_substance",
                                          filter_solvent.id)
        run_yank.solvent_2 = Substance()
        run_yank.thermodynamic_state = ProtocolPath("thermodynamic_state",
                                                    "global")
        run_yank.steps_per_iteration = 500
        run_yank.checkpoint_interval = 50
        run_yank.solvent_1_coordinates = ProtocolPath(
            "output_coordinate_file", equilibration_simulation.id)
        run_yank.solvent_1_system = ProtocolPath("system_path",
                                                 assign_full_parameters.id)
        run_yank.solvent_2_coordinates = ProtocolPath(
            "coordinate_file_path", build_vacuum_coordinates.id)
        run_yank.solvent_2_system = ProtocolPath("system_path",
                                                 assign_vacuum_parameters.id)

        # Set up the group which will run yank until the free energy has been determined to within
        # a given uncertainty
        conditional_group = groups.ConditionalGroup("conditional_group")
        conditional_group.max_iterations = 20

        if use_target_uncertainty:

            condition = groups.ConditionalGroup.Condition()
            condition.type = groups.ConditionalGroup.Condition.Type.LessThan
            condition.right_hand_value = ProtocolPath("target_uncertainty",
                                                      "global")
            condition.left_hand_value = ProtocolPath(
                "estimated_free_energy.error", conditional_group.id,
                run_yank.id)

            conditional_group.add_condition(condition)

        # Define the total number of iterations that yank should run for.
        total_iterations = miscellaneous.MultiplyValue("total_iterations")
        total_iterations.value = 2000
        total_iterations.multiplier = ProtocolPath("current_iteration",
                                                   conditional_group.id)

        # Make sure the simulations gets extended after each iteration.
        run_yank.number_of_iterations = ProtocolPath("result",
                                                     total_iterations.id)

        conditional_group.add_protocols(total_iterations, run_yank)

        # Define the full workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            build_full_coordinates.schema,
            assign_full_parameters.schema,
            energy_minimisation.schema,
            equilibration_simulation.schema,
            filter_solvent.schema,
            filter_solute.schema,
            build_vacuum_coordinates.schema,
            assign_vacuum_parameters.schema,
            conditional_group.schema,
        ]

        schema.final_value_source = ProtocolPath("estimated_free_energy",
                                                 conditional_group.id,
                                                 run_yank.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
Пример #3
0
def generate_simulation_protocols(
    analysis_protocol: S,
    use_target_uncertainty: bool,
    id_suffix: str = "",
    conditional_group: Optional[ConditionalGroup] = None,
    n_molecules: int = 1000,
) -> Tuple[SimulationProtocols[S], ProtocolPath, StoredSimulationData]:
    """Constructs a set of protocols which, when combined in a workflow schema, may be
    executed to run a single simulation to estimate the average value of an observable.

    The protocols returned will:

        1) Build a set of liquid coordinates for the
           property substance using packmol.

        2) Assign a set of smirnoff force field parameters
           to the system.

        3) Perform an energy minimisation on the system.

        4) Run a short NPT equilibration simulation for 100000 steps
           using a timestep of 2fs.

        5) Within a conditional group (up to a maximum of 100 times):

            5a) Run a longer NPT production simulation for 1000000 steps using a
                timestep of 2fs

            5b) Extract the average value of an observable and it's uncertainty.

            5c) If a convergence mode is set by the options, check if the target
                uncertainty has been met. If not, repeat steps 5a), 5b) and 5c).

        6) Extract uncorrelated configurations from a generated production
           simulation.

        7) Extract uncorrelated statistics from a generated production
           simulation.

    Parameters
    ----------
    analysis_protocol
        The protocol which will extract the observable of
        interest from the generated simulation data.
    use_target_uncertainty
        Whether to run the simulation until the observable is
        estimated to within the target uncertainty.
    id_suffix: str
        A string suffix to append to each of the protocol ids.
    conditional_group: ProtocolGroup, optional
        A custom group to wrap the main simulation / extraction
        protocols within. It is up to the caller of this method to
        manually add the convergence conditions to this group.
        If `None`, a default group with uncertainty convergence
        conditions is automatically constructed.
    n_molecules: int
        The number of molecules to use in the workflow.

    Returns
    -------
        The protocols to add to the workflow, a reference to the average value of the
        estimated observable (an ``Observable`` object), and an object which describes
        the default data from a simulation to store, such as the uncorrelated statistics
        and configurations.
    """

    build_coordinates = coordinates.BuildCoordinatesPackmol(
        f"build_coordinates{id_suffix}"
    )
    build_coordinates.substance = ProtocolPath("substance", "global")
    build_coordinates.max_molecules = n_molecules

    assign_parameters = forcefield.BaseBuildSystem(f"assign_parameters{id_suffix}")
    assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
    assign_parameters.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", build_coordinates.id
    )
    assign_parameters.substance = ProtocolPath("output_substance", build_coordinates.id)

    # Equilibration
    energy_minimisation = openmm.OpenMMEnergyMinimisation(
        f"energy_minimisation{id_suffix}"
    )
    energy_minimisation.input_coordinate_file = ProtocolPath(
        "coordinate_file_path", build_coordinates.id
    )
    energy_minimisation.parameterized_system = ProtocolPath(
        "parameterized_system", assign_parameters.id
    )

    equilibration_simulation = openmm.OpenMMSimulation(
        f"equilibration_simulation{id_suffix}"
    )
    equilibration_simulation.ensemble = Ensemble.NPT
    equilibration_simulation.steps_per_iteration = 100000
    equilibration_simulation.output_frequency = 5000
    equilibration_simulation.timestep = 2.0 * unit.femtosecond
    equilibration_simulation.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    equilibration_simulation.input_coordinate_file = ProtocolPath(
        "output_coordinate_file", energy_minimisation.id
    )
    equilibration_simulation.parameterized_system = ProtocolPath(
        "parameterized_system", assign_parameters.id
    )

    # Production
    production_simulation = openmm.OpenMMSimulation(f"production_simulation{id_suffix}")
    production_simulation.ensemble = Ensemble.NPT
    production_simulation.steps_per_iteration = 1000000
    production_simulation.output_frequency = 2000
    production_simulation.timestep = 2.0 * unit.femtosecond
    production_simulation.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    production_simulation.input_coordinate_file = ProtocolPath(
        "output_coordinate_file", equilibration_simulation.id
    )
    production_simulation.parameterized_system = ProtocolPath(
        "parameterized_system", assign_parameters.id
    )
    production_simulation.gradient_parameters = ProtocolPath(
        "parameter_gradient_keys", "global"
    )

    # Set up a conditional group to ensure convergence of uncertainty
    if conditional_group is None:

        conditional_group = groups.ConditionalGroup(f"conditional_group{id_suffix}")
        conditional_group.max_iterations = 100

        if use_target_uncertainty:

            condition = groups.ConditionalGroup.Condition()
            condition.right_hand_value = ProtocolPath("target_uncertainty", "global")
            condition.type = groups.ConditionalGroup.Condition.Type.LessThan
            condition.left_hand_value = ProtocolPath(
                "value.error", conditional_group.id, analysis_protocol.id
            )

            conditional_group.add_condition(condition)

            # Make sure the simulation gets extended after each iteration.
            production_simulation.total_number_of_iterations = ProtocolPath(
                "current_iteration", conditional_group.id
            )

    conditional_group.add_protocols(production_simulation, analysis_protocol)

    # Point the analyse protocol to the correct data sources
    if not isinstance(analysis_protocol, analysis.BaseAverageObservable):

        raise ValueError(
            "The analysis protocol must inherit from either the "
            "AverageTrajectoryObservable or BaseAverageObservable "
            "protocols."
        )

    analysis_protocol.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    analysis_protocol.potential_energies = ProtocolPath(
        f"observables[{ObservableType.PotentialEnergy.value}]",
        production_simulation.id,
    )

    # Finally, extract uncorrelated data
    time_series_statistics = ProtocolPath(
        "time_series_statistics", conditional_group.id, analysis_protocol.id
    )
    coordinate_file = ProtocolPath(
        "output_coordinate_file", conditional_group.id, production_simulation.id
    )
    trajectory_path = ProtocolPath(
        "trajectory_file_path", conditional_group.id, production_simulation.id
    )
    observables = ProtocolPath(
        "observables", conditional_group.id, production_simulation.id
    )

    decorrelate_trajectory = analysis.DecorrelateTrajectory(
        f"decorrelate_trajectory{id_suffix}"
    )
    decorrelate_trajectory.time_series_statistics = time_series_statistics
    decorrelate_trajectory.input_coordinate_file = coordinate_file
    decorrelate_trajectory.input_trajectory_path = trajectory_path

    decorrelate_observables = analysis.DecorrelateObservables(
        f"decorrelate_observables{id_suffix}"
    )
    decorrelate_observables.time_series_statistics = time_series_statistics
    decorrelate_observables.input_observables = observables

    # Build the object which defines which pieces of simulation data to store.
    output_to_store = StoredSimulationData()

    output_to_store.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
    output_to_store.property_phase = PropertyPhase.Liquid

    output_to_store.force_field_id = PlaceholderValue()

    output_to_store.number_of_molecules = ProtocolPath(
        "output_number_of_molecules", build_coordinates.id
    )
    output_to_store.substance = ProtocolPath("output_substance", build_coordinates.id)
    output_to_store.statistical_inefficiency = ProtocolPath(
        "time_series_statistics.statistical_inefficiency",
        conditional_group.id,
        analysis_protocol.id,
    )
    output_to_store.observables = ProtocolPath(
        "output_observables", decorrelate_observables.id
    )
    output_to_store.trajectory_file_name = ProtocolPath(
        "output_trajectory_path", decorrelate_trajectory.id
    )
    output_to_store.coordinate_file_name = coordinate_file

    output_to_store.source_calculation_id = PlaceholderValue()

    # Define where the final values come from.
    final_value_source = ProtocolPath(
        "value", conditional_group.id, analysis_protocol.id
    )

    base_protocols = SimulationProtocols(
        build_coordinates,
        assign_parameters,
        energy_minimisation,
        equilibration_simulation,
        production_simulation,
        analysis_protocol,
        conditional_group,
        decorrelate_trajectory,
        decorrelate_observables,
    )

    return base_protocols, final_value_source, output_to_store