Пример #1
0
    def _paprika_build_release_protocols(
        cls,
        orientation_replicator: ProtocolReplicator,
        restraint_schemas: Dict[str, ProtocolPath],
        solvation_template: coordinates.SolvateExistingStructure,
        minimization_template: openmm.OpenMMEnergyMinimisation,
        thermalization_template: openmm.OpenMMSimulation,
        equilibration_template: openmm.OpenMMSimulation,
        production_template: openmm.OpenMMSimulation,
    ):

        # Define a replicator to set up each release window
        release_replicator = ProtocolReplicator("release_replicator")
        release_replicator.template_values = ProtocolPath(
            "release_windows_indices", "global")

        orientation_placeholder = orientation_replicator.placeholder_id

        release_replicator_id = (f"{release_replicator.placeholder_id}_"
                                 f"{orientation_placeholder}")

        # Filter out only the solvent substance to help with the solvation step.
        filter_solvent = miscellaneous.FilterSubstanceByRole(
            "host-filter_solvent")
        filter_solvent.input_substance = ProtocolPath("host_substance",
                                                      "global")
        filter_solvent.component_roles = [Component.Role.Solvent]

        # Construct a set of coordinates for a host molecule correctly
        # aligned to the z-axis.
        align_coordinates = PrepareReleaseCoordinates(
            "release_align_coordinates")
        align_coordinates.substance = ProtocolPath("host_substance", "global")
        align_coordinates.complex_file_path = ProtocolPath(
            "host_coordinate_path", "global")

        solvate_coordinates = copy.deepcopy(solvation_template)
        solvate_coordinates.id = "release_solvate_coordinates"
        solvate_coordinates.substance = ProtocolPath("filtered_substance",
                                                     filter_solvent.id)
        solvate_coordinates.solute_coordinate_file = ProtocolPath(
            "output_coordinate_path", align_coordinates.id)

        # Apply the force field parameters. This only needs to be done for one
        # of the windows.
        apply_parameters = forcefield.BaseBuildSystem(
            "release_apply_parameters")
        apply_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        apply_parameters.substance = ProtocolPath("host_substance", "global")
        apply_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_coordinates.id)

        # Add the dummy atoms.
        add_dummy_atoms = AddDummyAtoms("release_add_dummy_atoms")
        add_dummy_atoms.substance = ProtocolPath("host_substance", "global")
        add_dummy_atoms.input_coordinate_path = ProtocolPath(
            "coordinate_file_path",
            solvate_coordinates.id,
        )
        add_dummy_atoms.input_system = ProtocolPath("parameterized_system",
                                                    apply_parameters.id)
        add_dummy_atoms.offset = ProtocolPath("dummy_atom_offset", "global")

        # Apply the restraints files
        generate_restraints = GenerateReleaseRestraints(
            f"release_generate_restraints_{orientation_placeholder}")
        generate_restraints.host_coordinate_path = ProtocolPath(
            "output_coordinate_path", add_dummy_atoms.id)
        generate_restraints.release_lambdas = ProtocolPath(
            "release_lambdas", "global")
        generate_restraints.restraint_schemas = restraint_schemas

        apply_restraints = ApplyRestraints(
            f"release_apply_restraints_{release_replicator_id}")
        apply_restraints.restraints_path = ProtocolPath(
            "restraints_path", generate_restraints.id)
        apply_restraints.phase = "release"
        apply_restraints.window_index = ReplicatorValue(release_replicator.id)
        apply_restraints.input_system = ProtocolPath("output_system",
                                                     add_dummy_atoms.id)

        # Setup the simulations for the release phase.
        (
            release_minimization,
            release_thermalization,
            release_equilibration,
            release_production,
        ) = cls._paprika_build_simulation_protocols(
            ProtocolPath("output_coordinate_path", add_dummy_atoms.id),
            ProtocolPath("output_system", apply_restraints.id),
            "release",
            release_replicator_id,
            minimization_template,
            thermalization_template,
            equilibration_template,
            production_template,
        )

        # Analyze the release phase.
        analyze_release_phase = AnalyzeAPRPhase(
            f"analyze_release_phase_{orientation_placeholder}")
        analyze_release_phase.topology_path = ProtocolPath(
            "output_coordinate_path", add_dummy_atoms.id)
        analyze_release_phase.trajectory_paths = ProtocolPath(
            "trajectory_file_path", release_production.id)
        analyze_release_phase.phase = "release"
        analyze_release_phase.restraints_path = ProtocolPath(
            "restraints_path", generate_restraints.id)

        # Return the full list of protocols which make up the release parts
        # of a host-guest APR calculation.
        protocols = [
            filter_solvent,
            align_coordinates,
            solvate_coordinates,
            apply_parameters,
            add_dummy_atoms,
            generate_restraints,
            apply_restraints,
            release_minimization,
            release_thermalization,
            release_equilibration,
            release_production,
            analyze_release_phase,
        ]

        return (
            protocols,
            release_replicator,
            ProtocolPath("result", analyze_release_phase.id),
        )
Пример #2
0
    def default_yank_schema(existing_schema=None):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        existing_schema: SimulationSchema, optional
            An existing schema whose settings to use. If set,
            the schema's `workflow_schema` will be overwritten
            by this method.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """

        calculation_schema = SimulationSchema()

        if existing_schema is not None:
            assert isinstance(existing_schema, SimulationSchema)
            calculation_schema = copy.deepcopy(existing_schema)

        schema = WorkflowSchema(
            property_type=HostGuestBindingAffinity.__name__)
        schema.id = "{}{}".format(HostGuestBindingAffinity.__name__, "Schema")

        # Initial coordinate and topology setup.
        filter_ligand = miscellaneous.FilterSubstanceByRole("filter_ligand")
        filter_ligand.input_substance = ProtocolPath("substance", "global")

        filter_ligand.component_roles = [Component.Role.Ligand]
        # We only support substances with a single guest ligand.
        filter_ligand.expected_components = 1

        schema.protocols[filter_ligand.id] = filter_ligand.schema

        # Construct the protocols which will (for now) take as input a set of host coordinates,
        # and generate a set of charges for them.
        filter_receptor = miscellaneous.FilterSubstanceByRole(
            "filter_receptor")
        filter_receptor.input_substance = ProtocolPath("substance", "global")

        filter_receptor.component_roles = [Component.Role.Receptor]
        # We only support substances with a single host receptor.
        filter_receptor.expected_components = 1

        schema.protocols[filter_receptor.id] = filter_receptor.schema

        # Perform docking to position the guest within the host.
        perform_docking = coordinates.BuildDockedCoordinates("perform_docking")

        perform_docking.ligand_substance = ProtocolPath(
            "filtered_substance", filter_ligand.id)
        perform_docking.receptor_coordinate_file = ProtocolPath(
            "receptor_mol2", "global")

        schema.protocols[perform_docking.id] = perform_docking.schema

        # Solvate the docked structure using packmol
        filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent")
        filter_solvent.input_substance = ProtocolPath("substance", "global")
        filter_solvent.component_roles = [Component.Role.Solvent]

        schema.protocols[filter_solvent.id] = filter_solvent.schema

        solvate_complex = coordinates.SolvateExistingStructure(
            "solvate_complex")
        solvate_complex.max_molecules = 1000

        solvate_complex.substance = ProtocolPath("filtered_substance",
                                                 filter_solvent.id)
        solvate_complex.solute_coordinate_file = ProtocolPath(
            "docked_complex_coordinate_path", perform_docking.id)

        schema.protocols[solvate_complex.id] = solvate_complex.schema

        # Assign force field parameters to the solvated complex system.
        build_solvated_complex_system = forcefield.BaseBuildSystem(
            "build_solvated_complex_system")

        build_solvated_complex_system.force_field_path = ProtocolPath(
            "force_field_path", "global")

        build_solvated_complex_system.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_complex.id)
        build_solvated_complex_system.substance = ProtocolPath(
            "substance", "global")

        build_solvated_complex_system.charged_molecule_paths = [
            ProtocolPath("receptor_mol2", "global")
        ]

        schema.protocols[build_solvated_complex_system.
                         id] = build_solvated_complex_system.schema

        # Solvate the ligand using packmol
        solvate_ligand = coordinates.SolvateExistingStructure("solvate_ligand")
        solvate_ligand.max_molecules = 1000

        solvate_ligand.substance = ProtocolPath("filtered_substance",
                                                filter_solvent.id)
        solvate_ligand.solute_coordinate_file = ProtocolPath(
            "docked_ligand_coordinate_path", perform_docking.id)

        schema.protocols[solvate_ligand.id] = solvate_ligand.schema

        # Assign force field parameters to the solvated ligand system.
        build_solvated_ligand_system = forcefield.BaseBuildSystem(
            "build_solvated_ligand_system")

        build_solvated_ligand_system.force_field_path = ProtocolPath(
            "force_field_path", "global")

        build_solvated_ligand_system.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_ligand.id)
        build_solvated_ligand_system.substance = ProtocolPath(
            "substance", "global")

        schema.protocols[build_solvated_ligand_system.
                         id] = build_solvated_ligand_system.schema

        # Employ YANK to estimate the binding free energy.
        yank_protocol = yank.LigandReceptorYankProtocol("yank_protocol")

        yank_protocol.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        yank_protocol.number_of_iterations = 2000
        yank_protocol.steps_per_iteration = 500
        yank_protocol.checkpoint_interval = 10

        yank_protocol.verbose = True

        yank_protocol.force_field_path = ProtocolPath("force_field_path",
                                                      "global")

        yank_protocol.ligand_residue_name = ProtocolPath(
            "ligand_residue_name", perform_docking.id)
        yank_protocol.receptor_residue_name = ProtocolPath(
            "receptor_residue_name", perform_docking.id)

        yank_protocol.solvated_ligand_coordinates = ProtocolPath(
            "coordinate_file_path", solvate_ligand.id)
        yank_protocol.solvated_ligand_system = ProtocolPath(
            "parameterized_system", build_solvated_ligand_system.id)

        yank_protocol.solvated_complex_coordinates = ProtocolPath(
            "coordinate_file_path", solvate_complex.id)
        yank_protocol.solvated_complex_system = ProtocolPath(
            "parameterized_system", build_solvated_complex_system.id)

        schema.protocols[yank_protocol.id] = yank_protocol.schema

        # Define where the final values come from.
        schema.final_value_source = ProtocolPath("free_energy_difference",
                                                 yank_protocol.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
Пример #3
0
    def default_simulation_schema(absolute_tolerance=UNDEFINED,
                                  relative_tolerance=UNDEFINED,
                                  n_molecules=2000):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (absolute_tolerance != UNDEFINED
                                  or relative_tolerance != UNDEFINED)

        # Setup the fully solvated systems.
        build_full_coordinates = coordinates.BuildCoordinatesPackmol(
            "build_solvated_coordinates")
        build_full_coordinates.substance = ProtocolPath("substance", "global")
        build_full_coordinates.max_molecules = n_molecules

        assign_full_parameters = forcefield.BaseBuildSystem(
            "assign_solvated_parameters")
        assign_full_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        assign_full_parameters.substance = ProtocolPath("substance", "global")
        assign_full_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", build_full_coordinates.id)

        # Perform a quick minimisation of the full system to give
        # YANK a better starting point for its minimisation.
        energy_minimisation = openmm.OpenMMEnergyMinimisation(
            "energy_minimisation")
        energy_minimisation.system_path = ProtocolPath(
            "system_path", assign_full_parameters.id)
        energy_minimisation.input_coordinate_file = ProtocolPath(
            "coordinate_file_path", build_full_coordinates.id)

        equilibration_simulation = openmm.OpenMMSimulation(
            "equilibration_simulation")
        equilibration_simulation.ensemble = Ensemble.NPT
        equilibration_simulation.steps_per_iteration = 100000
        equilibration_simulation.output_frequency = 10000
        equilibration_simulation.timestep = 2.0 * unit.femtosecond
        equilibration_simulation.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")
        equilibration_simulation.system_path = ProtocolPath(
            "system_path", assign_full_parameters.id)
        equilibration_simulation.input_coordinate_file = ProtocolPath(
            "output_coordinate_file", energy_minimisation.id)

        # Create a substance which only contains the solute (e.g. for the
        # vacuum phase simulations).
        filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent")
        filter_solvent.input_substance = ProtocolPath("substance", "global")
        filter_solvent.component_roles = [Component.Role.Solvent]

        filter_solute = miscellaneous.FilterSubstanceByRole("filter_solute")
        filter_solute.input_substance = ProtocolPath("substance", "global")
        filter_solute.component_roles = [Component.Role.Solute]

        # Setup the solute in vacuum system.
        build_vacuum_coordinates = coordinates.BuildCoordinatesPackmol(
            "build_vacuum_coordinates")
        build_vacuum_coordinates.substance = ProtocolPath(
            "filtered_substance", filter_solute.id)
        build_vacuum_coordinates.max_molecules = 1

        assign_vacuum_parameters = forcefield.BaseBuildSystem(
            "assign_parameters")
        assign_vacuum_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        assign_vacuum_parameters.substance = ProtocolPath(
            "filtered_substance", filter_solute.id)
        assign_vacuum_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", build_vacuum_coordinates.id)

        # Set up the protocol to run yank.
        run_yank = yank.SolvationYankProtocol("run_solvation_yank")
        run_yank.solute = ProtocolPath("filtered_substance", filter_solute.id)
        run_yank.solvent_1 = ProtocolPath("filtered_substance",
                                          filter_solvent.id)
        run_yank.solvent_2 = Substance()
        run_yank.thermodynamic_state = ProtocolPath("thermodynamic_state",
                                                    "global")
        run_yank.steps_per_iteration = 500
        run_yank.checkpoint_interval = 50
        run_yank.solvent_1_coordinates = ProtocolPath(
            "output_coordinate_file", equilibration_simulation.id)
        run_yank.solvent_1_system = ProtocolPath("system_path",
                                                 assign_full_parameters.id)
        run_yank.solvent_2_coordinates = ProtocolPath(
            "coordinate_file_path", build_vacuum_coordinates.id)
        run_yank.solvent_2_system = ProtocolPath("system_path",
                                                 assign_vacuum_parameters.id)

        # Set up the group which will run yank until the free energy has been determined to within
        # a given uncertainty
        conditional_group = groups.ConditionalGroup("conditional_group")
        conditional_group.max_iterations = 20

        if use_target_uncertainty:

            condition = groups.ConditionalGroup.Condition()
            condition.type = groups.ConditionalGroup.Condition.Type.LessThan
            condition.right_hand_value = ProtocolPath("target_uncertainty",
                                                      "global")
            condition.left_hand_value = ProtocolPath(
                "estimated_free_energy.error", conditional_group.id,
                run_yank.id)

            conditional_group.add_condition(condition)

        # Define the total number of iterations that yank should run for.
        total_iterations = miscellaneous.MultiplyValue("total_iterations")
        total_iterations.value = 2000
        total_iterations.multiplier = ProtocolPath("current_iteration",
                                                   conditional_group.id)

        # Make sure the simulations gets extended after each iteration.
        run_yank.number_of_iterations = ProtocolPath("result",
                                                     total_iterations.id)

        conditional_group.add_protocols(total_iterations, run_yank)

        # Define the full workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            build_full_coordinates.schema,
            assign_full_parameters.schema,
            energy_minimisation.schema,
            equilibration_simulation.schema,
            filter_solvent.schema,
            filter_solute.schema,
            build_vacuum_coordinates.schema,
            assign_vacuum_parameters.schema,
            conditional_group.schema,
        ]

        schema.final_value_source = ProtocolPath("estimated_free_energy",
                                                 conditional_group.id,
                                                 run_yank.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
Пример #4
0
def generate_base_reweighting_protocols(
    statistical_inefficiency: S,
    reweight_observable: T,
    replicator_id: str = "data_replicator",
    id_suffix: str = "",
) -> Tuple[ReweightingProtocols[S, T], ProtocolReplicator]:
    """Constructs a set of protocols which, when combined in a workflow schema, may be
    executed to reweight a set of cached simulation data to estimate the average
    value of an observable.

    Parameters
    ----------
    statistical_inefficiency
        The protocol which will be used to compute the statistical inefficiency and
        equilibration time of the observable of interest. This information will be
        used to decorrelate the cached data prior to reweighting.
    reweight_observable
        The MBAR reweighting protocol to use to reweight the observable to the target
        state. This method will automatically set the reduced potentials on the
        object.
    replicator_id: str
        The id to use for the cached data replicator.
    id_suffix: str
        A string suffix to append to each of the protocol ids.

    Returns
    -------
        The protocols to add to the workflow, a reference to the average value of the
        estimated observable (an ``Observable`` object), and the replicator which will
        clone the workflow for each piece of cached simulation data.
    """

    # Create the replicator which will apply these protocol once for each piece of
    # cached simulation data.
    data_replicator = ProtocolReplicator(replicator_id=replicator_id)
    data_replicator.template_values = ProtocolPath("full_system_data", "global")

    # Validate the inputs.
    assert isinstance(statistical_inefficiency, analysis.BaseAverageObservable)

    assert data_replicator.placeholder_id in statistical_inefficiency.id
    assert data_replicator.placeholder_id not in reweight_observable.id

    replicator_suffix = f"_{data_replicator.placeholder_id}{id_suffix}"

    # Unpack all the of the stored data.
    unpack_stored_data = storage.UnpackStoredSimulationData(
        "unpack_data{}".format(replicator_suffix)
    )
    unpack_stored_data.simulation_data_path = ReplicatorValue(replicator_id)

    # Join the individual trajectories together.
    join_trajectories = reweighting.ConcatenateTrajectories(
        f"join_trajectories{id_suffix}"
    )
    join_trajectories.input_coordinate_paths = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id
    )
    join_trajectories.input_trajectory_paths = ProtocolPath(
        "trajectory_file_path", unpack_stored_data.id
    )
    join_observables = reweighting.ConcatenateObservables(
        f"join_observables{id_suffix}"
    )
    join_observables.input_observables = ProtocolPath(
        "observables", unpack_stored_data.id
    )

    # Calculate the reduced potentials for each of the reference states.
    build_reference_system = forcefield.BaseBuildSystem(
        f"build_system{replicator_suffix}"
    )
    build_reference_system.force_field_path = ProtocolPath(
        "force_field_path", unpack_stored_data.id
    )
    build_reference_system.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id
    )
    build_reference_system.substance = ProtocolPath("substance", unpack_stored_data.id)

    reduced_reference_potential = openmm.OpenMMEvaluateEnergies(
        f"reduced_potential{replicator_suffix}"
    )
    reduced_reference_potential.parameterized_system = ProtocolPath(
        "parameterized_system", build_reference_system.id
    )
    reduced_reference_potential.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", unpack_stored_data.id
    )
    reduced_reference_potential.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id
    )
    reduced_reference_potential.trajectory_file_path = ProtocolPath(
        "output_trajectory_path", join_trajectories.id
    )

    # Calculate the reduced potential of the target state.
    build_target_system = forcefield.BaseBuildSystem(f"build_system_target{id_suffix}")
    build_target_system.force_field_path = ProtocolPath("force_field_path", "global")
    build_target_system.substance = ProtocolPath("substance", "global")
    build_target_system.coordinate_file_path = ProtocolPath(
        "output_coordinate_path", join_trajectories.id
    )

    reduced_target_potential = openmm.OpenMMEvaluateEnergies(
        f"reduced_potential_target{id_suffix}"
    )
    reduced_target_potential.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    reduced_target_potential.parameterized_system = ProtocolPath(
        "parameterized_system", build_target_system.id
    )
    reduced_target_potential.coordinate_file_path = ProtocolPath(
        "output_coordinate_path", join_trajectories.id
    )
    reduced_target_potential.trajectory_file_path = ProtocolPath(
        "output_trajectory_path", join_trajectories.id
    )
    reduced_target_potential.gradient_parameters = ProtocolPath(
        "parameter_gradient_keys", "global"
    )

    # Compute the observable gradients.
    zero_gradients = gradients.ZeroGradients(f"zero_gradients{id_suffix}")
    zero_gradients.force_field_path = ProtocolPath("force_field_path", "global")
    zero_gradients.gradient_parameters = ProtocolPath(
        "parameter_gradient_keys", "global"
    )

    # Decorrelate the target potentials and observables.
    if not isinstance(statistical_inefficiency, analysis.BaseAverageObservable):
        raise NotImplementedError()

    decorrelate_target_potential = analysis.DecorrelateObservables(
        f"decorrelate_target_potential{id_suffix}"
    )
    decorrelate_target_potential.time_series_statistics = ProtocolPath(
        "time_series_statistics", statistical_inefficiency.id
    )
    decorrelate_target_potential.input_observables = ProtocolPath(
        "output_observables", reduced_target_potential.id
    )

    decorrelate_observable = analysis.DecorrelateObservables(
        f"decorrelate_observable{id_suffix}"
    )
    decorrelate_observable.time_series_statistics = ProtocolPath(
        "time_series_statistics", statistical_inefficiency.id
    )
    decorrelate_observable.input_observables = ProtocolPath(
        "output_observables", zero_gradients.id
    )

    # Decorrelate the reference potentials. Due to a quirk of how workflow replicators
    # work the time series statistics need to be passed via a dummy protocol first.
    #
    # Because the `statistical_inefficiency` and `decorrelate_reference_potential`
    # protocols are replicated by the same replicator the `time_series_statistics`
    # input of `decorrelate_reference_potential_X` will take its value from
    # the `time_series_statistics` output of `statistical_inefficiency_X` rather than
    # as a list of of [statistical_inefficiency_0.time_series_statistics...
    # statistical_inefficiency_N.time_series_statistics]. Passing the statistics via
    # an un-replicated intermediate resolves this.
    replicate_statistics = miscellaneous.DummyProtocol(
        f"replicated_statistics{id_suffix}"
    )
    replicate_statistics.input_value = ProtocolPath(
        "time_series_statistics", statistical_inefficiency.id
    )

    decorrelate_reference_potential = analysis.DecorrelateObservables(
        f"decorrelate_reference_potential{replicator_suffix}"
    )
    decorrelate_reference_potential.time_series_statistics = ProtocolPath(
        "output_value", replicate_statistics.id
    )
    decorrelate_reference_potential.input_observables = ProtocolPath(
        "output_observables", reduced_reference_potential.id
    )

    # Finally, apply MBAR to get the reweighted value.
    reweight_observable.reference_reduced_potentials = ProtocolPath(
        "output_observables[ReducedPotential]", decorrelate_reference_potential.id
    )
    reweight_observable.target_reduced_potentials = ProtocolPath(
        "output_observables[ReducedPotential]", decorrelate_target_potential.id
    )
    reweight_observable.observable = ProtocolPath(
        "output_observables", decorrelate_observable.id
    )
    reweight_observable.frame_counts = ProtocolPath(
        "time_series_statistics.n_uncorrelated_points", statistical_inefficiency.id
    )

    protocols = ReweightingProtocols(
        unpack_stored_data,
        #
        join_trajectories,
        join_observables,
        #
        build_reference_system,
        reduced_reference_potential,
        #
        build_target_system,
        reduced_target_potential,
        #
        statistical_inefficiency,
        replicate_statistics,
        #
        decorrelate_reference_potential,
        decorrelate_target_potential,
        #
        decorrelate_observable,
        zero_gradients,
        #
        reweight_observable,
    )

    return protocols, data_replicator
Пример #5
0
def generate_simulation_protocols(
    analysis_protocol: S,
    use_target_uncertainty: bool,
    id_suffix: str = "",
    conditional_group: Optional[ConditionalGroup] = None,
    n_molecules: int = 1000,
) -> Tuple[SimulationProtocols[S], ProtocolPath, StoredSimulationData]:
    """Constructs a set of protocols which, when combined in a workflow schema, may be
    executed to run a single simulation to estimate the average value of an observable.

    The protocols returned will:

        1) Build a set of liquid coordinates for the
           property substance using packmol.

        2) Assign a set of smirnoff force field parameters
           to the system.

        3) Perform an energy minimisation on the system.

        4) Run a short NPT equilibration simulation for 100000 steps
           using a timestep of 2fs.

        5) Within a conditional group (up to a maximum of 100 times):

            5a) Run a longer NPT production simulation for 1000000 steps using a
                timestep of 2fs

            5b) Extract the average value of an observable and it's uncertainty.

            5c) If a convergence mode is set by the options, check if the target
                uncertainty has been met. If not, repeat steps 5a), 5b) and 5c).

        6) Extract uncorrelated configurations from a generated production
           simulation.

        7) Extract uncorrelated statistics from a generated production
           simulation.

    Parameters
    ----------
    analysis_protocol
        The protocol which will extract the observable of
        interest from the generated simulation data.
    use_target_uncertainty
        Whether to run the simulation until the observable is
        estimated to within the target uncertainty.
    id_suffix: str
        A string suffix to append to each of the protocol ids.
    conditional_group: ProtocolGroup, optional
        A custom group to wrap the main simulation / extraction
        protocols within. It is up to the caller of this method to
        manually add the convergence conditions to this group.
        If `None`, a default group with uncertainty convergence
        conditions is automatically constructed.
    n_molecules: int
        The number of molecules to use in the workflow.

    Returns
    -------
        The protocols to add to the workflow, a reference to the average value of the
        estimated observable (an ``Observable`` object), and an object which describes
        the default data from a simulation to store, such as the uncorrelated statistics
        and configurations.
    """

    build_coordinates = coordinates.BuildCoordinatesPackmol(
        f"build_coordinates{id_suffix}"
    )
    build_coordinates.substance = ProtocolPath("substance", "global")
    build_coordinates.max_molecules = n_molecules

    assign_parameters = forcefield.BaseBuildSystem(f"assign_parameters{id_suffix}")
    assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
    assign_parameters.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", build_coordinates.id
    )
    assign_parameters.substance = ProtocolPath("output_substance", build_coordinates.id)

    # Equilibration
    energy_minimisation = openmm.OpenMMEnergyMinimisation(
        f"energy_minimisation{id_suffix}"
    )
    energy_minimisation.input_coordinate_file = ProtocolPath(
        "coordinate_file_path", build_coordinates.id
    )
    energy_minimisation.parameterized_system = ProtocolPath(
        "parameterized_system", assign_parameters.id
    )

    equilibration_simulation = openmm.OpenMMSimulation(
        f"equilibration_simulation{id_suffix}"
    )
    equilibration_simulation.ensemble = Ensemble.NPT
    equilibration_simulation.steps_per_iteration = 100000
    equilibration_simulation.output_frequency = 5000
    equilibration_simulation.timestep = 2.0 * unit.femtosecond
    equilibration_simulation.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    equilibration_simulation.input_coordinate_file = ProtocolPath(
        "output_coordinate_file", energy_minimisation.id
    )
    equilibration_simulation.parameterized_system = ProtocolPath(
        "parameterized_system", assign_parameters.id
    )

    # Production
    production_simulation = openmm.OpenMMSimulation(f"production_simulation{id_suffix}")
    production_simulation.ensemble = Ensemble.NPT
    production_simulation.steps_per_iteration = 1000000
    production_simulation.output_frequency = 2000
    production_simulation.timestep = 2.0 * unit.femtosecond
    production_simulation.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    production_simulation.input_coordinate_file = ProtocolPath(
        "output_coordinate_file", equilibration_simulation.id
    )
    production_simulation.parameterized_system = ProtocolPath(
        "parameterized_system", assign_parameters.id
    )
    production_simulation.gradient_parameters = ProtocolPath(
        "parameter_gradient_keys", "global"
    )

    # Set up a conditional group to ensure convergence of uncertainty
    if conditional_group is None:

        conditional_group = groups.ConditionalGroup(f"conditional_group{id_suffix}")
        conditional_group.max_iterations = 100

        if use_target_uncertainty:

            condition = groups.ConditionalGroup.Condition()
            condition.right_hand_value = ProtocolPath("target_uncertainty", "global")
            condition.type = groups.ConditionalGroup.Condition.Type.LessThan
            condition.left_hand_value = ProtocolPath(
                "value.error", conditional_group.id, analysis_protocol.id
            )

            conditional_group.add_condition(condition)

            # Make sure the simulation gets extended after each iteration.
            production_simulation.total_number_of_iterations = ProtocolPath(
                "current_iteration", conditional_group.id
            )

    conditional_group.add_protocols(production_simulation, analysis_protocol)

    # Point the analyse protocol to the correct data sources
    if not isinstance(analysis_protocol, analysis.BaseAverageObservable):

        raise ValueError(
            "The analysis protocol must inherit from either the "
            "AverageTrajectoryObservable or BaseAverageObservable "
            "protocols."
        )

    analysis_protocol.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    analysis_protocol.potential_energies = ProtocolPath(
        f"observables[{ObservableType.PotentialEnergy.value}]",
        production_simulation.id,
    )

    # Finally, extract uncorrelated data
    time_series_statistics = ProtocolPath(
        "time_series_statistics", conditional_group.id, analysis_protocol.id
    )
    coordinate_file = ProtocolPath(
        "output_coordinate_file", conditional_group.id, production_simulation.id
    )
    trajectory_path = ProtocolPath(
        "trajectory_file_path", conditional_group.id, production_simulation.id
    )
    observables = ProtocolPath(
        "observables", conditional_group.id, production_simulation.id
    )

    decorrelate_trajectory = analysis.DecorrelateTrajectory(
        f"decorrelate_trajectory{id_suffix}"
    )
    decorrelate_trajectory.time_series_statistics = time_series_statistics
    decorrelate_trajectory.input_coordinate_file = coordinate_file
    decorrelate_trajectory.input_trajectory_path = trajectory_path

    decorrelate_observables = analysis.DecorrelateObservables(
        f"decorrelate_observables{id_suffix}"
    )
    decorrelate_observables.time_series_statistics = time_series_statistics
    decorrelate_observables.input_observables = observables

    # Build the object which defines which pieces of simulation data to store.
    output_to_store = StoredSimulationData()

    output_to_store.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
    output_to_store.property_phase = PropertyPhase.Liquid

    output_to_store.force_field_id = PlaceholderValue()

    output_to_store.number_of_molecules = ProtocolPath(
        "output_number_of_molecules", build_coordinates.id
    )
    output_to_store.substance = ProtocolPath("output_substance", build_coordinates.id)
    output_to_store.statistical_inefficiency = ProtocolPath(
        "time_series_statistics.statistical_inefficiency",
        conditional_group.id,
        analysis_protocol.id,
    )
    output_to_store.observables = ProtocolPath(
        "output_observables", decorrelate_observables.id
    )
    output_to_store.trajectory_file_name = ProtocolPath(
        "output_trajectory_path", decorrelate_trajectory.id
    )
    output_to_store.coordinate_file_name = coordinate_file

    output_to_store.source_calculation_id = PlaceholderValue()

    # Define where the final values come from.
    final_value_source = ProtocolPath(
        "value", conditional_group.id, analysis_protocol.id
    )

    base_protocols = SimulationProtocols(
        build_coordinates,
        assign_parameters,
        energy_minimisation,
        equilibration_simulation,
        production_simulation,
        analysis_protocol,
        conditional_group,
        decorrelate_trajectory,
        decorrelate_observables,
    )

    return base_protocols, final_value_source, output_to_store
Пример #6
0
def generate_base_reweighting_protocols(
    analysis_protocol,
    mbar_protocol,
    replicator_id="data_repl",
    id_suffix="",
):
    """Constructs a set of protocols which, when combined in a workflow schema,
    may be executed to reweight a set of existing data to estimate a particular
    property. The reweighted observable of interest will be calculated by
    following the passed in `analysis_protocol`.

    Parameters
    ----------
    analysis_protocol: AveragePropertyProtocol
        The protocol which will take input from the stored data,
        and generate a set of observables to reweight.
    mbar_protocol: BaseReweightingProtocol
        A template mbar reweighting protocol, which has it's reference
        observables already set. This method will automatically set the
        reduced potentials on this object.
    replicator_id: str
        The id to use for the data replicator.
    id_suffix: str
        A string suffix to append to each of the protocol ids.

    Returns
    -------
    BaseReweightingProtocols:
        A named tuple of the protocol which should form the bulk of
        a property estimation workflow.
    ProtocolReplicator:
        A replicator which will clone the workflow for each piece of
        stored data.
    """

    assert isinstance(analysis_protocol, analysis.AveragePropertyProtocol)

    assert f"$({replicator_id})" in analysis_protocol.id
    assert f"$({replicator_id})" not in mbar_protocol.id

    replicator_suffix = "_$({}){}".format(replicator_id, id_suffix)

    # Unpack all the of the stored data.
    unpack_stored_data = storage.UnpackStoredSimulationData(
        "unpack_data{}".format(replicator_suffix))
    unpack_stored_data.simulation_data_path = ReplicatorValue(replicator_id)

    # The autocorrelation time of each of the stored files will be calculated for this property
    # using the passed in analysis protocol.
    if isinstance(analysis_protocol, analysis.ExtractAverageStatistic):

        analysis_protocol.statistics_path = ProtocolPath(
            "statistics_file_path", unpack_stored_data.id)

    elif isinstance(analysis_protocol, analysis.AverageTrajectoryProperty):

        analysis_protocol.input_coordinate_file = ProtocolPath(
            "coordinate_file_path", unpack_stored_data.id)
        analysis_protocol.trajectory_path = ProtocolPath(
            "trajectory_file_path", unpack_stored_data.id)

    # Decorrelate the frames of the stored trajectory and statistics arrays.
    decorrelate_statistics = analysis.ExtractUncorrelatedStatisticsData(
        "decorrelate_stats{}".format(replicator_suffix))
    decorrelate_statistics.statistical_inefficiency = ProtocolPath(
        "statistical_inefficiency", analysis_protocol.id)
    decorrelate_statistics.equilibration_index = ProtocolPath(
        "equilibration_index", analysis_protocol.id)
    decorrelate_statistics.input_statistics_path = ProtocolPath(
        "statistics_file_path", unpack_stored_data.id)

    decorrelate_trajectory = analysis.ExtractUncorrelatedTrajectoryData(
        "decorrelate_traj{}".format(replicator_suffix))
    decorrelate_trajectory.statistical_inefficiency = ProtocolPath(
        "statistical_inefficiency", analysis_protocol.id)
    decorrelate_trajectory.equilibration_index = ProtocolPath(
        "equilibration_index", analysis_protocol.id)
    decorrelate_trajectory.input_coordinate_file = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id)
    decorrelate_trajectory.input_trajectory_path = ProtocolPath(
        "trajectory_file_path", unpack_stored_data.id)

    # Stitch together all of the trajectories
    join_trajectories = reweighting.ConcatenateTrajectories("concat_traj" +
                                                            id_suffix)
    join_trajectories.input_coordinate_paths = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id)
    join_trajectories.input_trajectory_paths = ProtocolPath(
        "output_trajectory_path", decorrelate_trajectory.id)

    join_statistics = reweighting.ConcatenateStatistics("concat_stats" +
                                                        id_suffix)
    join_statistics.input_statistics_paths = ProtocolPath(
        "output_statistics_path", decorrelate_statistics.id)

    # Calculate the reduced potentials for each of the reference states.
    build_reference_system = forcefield.BaseBuildSystem(
        "build_system{}".format(replicator_suffix))
    build_reference_system.force_field_path = ProtocolPath(
        "force_field_path", unpack_stored_data.id)
    build_reference_system.substance = ProtocolPath("substance",
                                                    unpack_stored_data.id)
    build_reference_system.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id)

    reduced_reference_potential = openmm.OpenMMReducedPotentials(
        "reduced_potential{}".format(replicator_suffix))
    reduced_reference_potential.system_path = ProtocolPath(
        "system_path", build_reference_system.id)
    reduced_reference_potential.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", unpack_stored_data.id)
    reduced_reference_potential.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id)
    reduced_reference_potential.trajectory_file_path = ProtocolPath(
        "output_trajectory_path", join_trajectories.id)
    reduced_reference_potential.kinetic_energies_path = ProtocolPath(
        "output_statistics_path", join_statistics.id)

    # Calculate the reduced potential of the target state.
    build_target_system = forcefield.BaseBuildSystem("build_system_target" +
                                                     id_suffix)
    build_target_system.force_field_path = ProtocolPath(
        "force_field_path", "global")
    build_target_system.substance = ProtocolPath("substance", "global")
    build_target_system.coordinate_file_path = ProtocolPath(
        "output_coordinate_path", join_trajectories.id)

    reduced_target_potential = openmm.OpenMMReducedPotentials(
        "reduced_potential_target" + id_suffix)
    reduced_target_potential.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global")
    reduced_target_potential.system_path = ProtocolPath(
        "system_path", build_target_system.id)
    reduced_target_potential.coordinate_file_path = ProtocolPath(
        "output_coordinate_path", join_trajectories.id)
    reduced_target_potential.trajectory_file_path = ProtocolPath(
        "output_trajectory_path", join_trajectories.id)
    reduced_target_potential.kinetic_energies_path = ProtocolPath(
        "output_statistics_path", join_statistics.id)

    # Finally, apply MBAR to get the reweighted value.
    mbar_protocol.reference_reduced_potentials = ProtocolPath(
        "statistics_file_path", reduced_reference_potential.id)
    mbar_protocol.target_reduced_potentials = ProtocolPath(
        "statistics_file_path", reduced_target_potential.id)

    if (isinstance(mbar_protocol, reweighting.ReweightStatistics)
            and mbar_protocol.statistics_type != ObservableType.PotentialEnergy
            and mbar_protocol.statistics_type != ObservableType.TotalEnergy
            and mbar_protocol.statistics_type != ObservableType.Enthalpy and
            mbar_protocol.statistics_type != ObservableType.ReducedPotential):

        mbar_protocol.statistics_paths = ProtocolPath(
            "output_statistics_path", decorrelate_statistics.id)

    elif isinstance(mbar_protocol, reweighting.ReweightStatistics):

        mbar_protocol.statistics_paths = [
            ProtocolPath("statistics_file_path", reduced_target_potential.id)
        ]
        mbar_protocol.frame_counts = ProtocolPath(
            "number_of_uncorrelated_samples", decorrelate_statistics.id)

    base_protocols = BaseReweightingProtocols(
        unpack_stored_data,
        analysis_protocol,
        decorrelate_statistics,
        decorrelate_trajectory,
        join_trajectories,
        join_statistics,
        build_reference_system,
        reduced_reference_potential,
        build_target_system,
        reduced_target_potential,
        mbar_protocol,
    )

    # Create the replicator object.
    component_replicator = ProtocolReplicator(replicator_id=replicator_id)
    component_replicator.template_values = ProtocolPath(
        "full_system_data", "global")

    return base_protocols, component_replicator