示例#1
0
def build_merge(prefix):

    # a - b \
    #       | - e - f
    # c - d /
    protocol_a = DummyInputOutputProtocol(prefix + "protocol_a")
    protocol_a.input_value = 1
    protocol_b = DummyInputOutputProtocol(prefix + "protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)
    protocol_c = DummyInputOutputProtocol(prefix + "protocol_c")
    protocol_c.input_value = 2
    protocol_d = DummyInputOutputProtocol(prefix + "protocol_d")
    protocol_d.input_value = ProtocolPath("output_value", protocol_c.id)
    protocol_e = DummyInputOutputProtocol(prefix + "protocol_e")
    protocol_e.input_value = [
        ProtocolPath("output_value", protocol_b.id),
        ProtocolPath("output_value", protocol_d.id),
    ]
    protocol_f = DummyInputOutputProtocol(prefix + "protocol_f")
    protocol_f.input_value = ProtocolPath("output_value", protocol_e.id)

    return [
        protocol_a,
        protocol_b,
        protocol_c,
        protocol_d,
        protocol_e,
        protocol_f,
    ]
示例#2
0
def test_conditional_protocol_group_fail():

    with tempfile.TemporaryDirectory() as directory:

        initial_value = 2 * unit.kelvin

        value_protocol_a = DummyProtocol("protocol_a")
        value_protocol_a.input_value = initial_value

        add_values = AddValues("add_values")
        add_values.values = [
            ProtocolPath("output_value", value_protocol_a.id),
            ProtocolPath("output_value", value_protocol_a.id),
        ]

        condition = ConditionalGroup.Condition()
        condition.left_hand_value = ProtocolPath("result", add_values.id)
        condition.right_hand_value = ProtocolPath("output_value",
                                                  value_protocol_a.id)
        condition.type = ConditionalGroup.Condition.Type.LessThan

        protocol_group = ConditionalGroup("protocol_group")
        protocol_group.conditions.append(condition)
        protocol_group.max_iterations = 10
        protocol_group.add_protocols(value_protocol_a, add_values)

        with pytest.raises(RuntimeError):
            protocol_group.execute(directory, ComputeResources())
示例#3
0
    def default_reweighting_schema(
        cls,
        absolute_tolerance: unit.Quantity = UNDEFINED,
        relative_tolerance: float = UNDEFINED,
        n_effective_samples: int = 50,
    ) -> ReweightingSchema:

        calculation_schema = super(EnthalpyOfMixing, cls)._default_reweighting_schema(
            ObservableType.ReducedPotential,
            absolute_tolerance,
            relative_tolerance,
            n_effective_samples,
        )

        # Divide the excess reduced potential by beta to get an approximation
        # of the excess enthalpy.
        excess_enthalpy_of_mixing = miscellaneous.MultiplyValue(
            "excess_enthalpy_of_mixing"
        )
        excess_enthalpy_of_mixing.value = (
            calculation_schema.workflow_schema.final_value_source
        )
        excess_enthalpy_of_mixing.multiplier = ProtocolPath(
            "thermodynamic_state.inverse_beta", "global"
        )

        # Update the workflow schema.
        calculation_schema.workflow_schema.protocol_schemas.append(
            excess_enthalpy_of_mixing.schema
        )
        calculation_schema.workflow_schema.final_value_source = ProtocolPath(
            "result", excess_enthalpy_of_mixing.id
        )

        return calculation_schema
示例#4
0
def test_conditional_group_self_reference():
    """Tests that protocols within a conditional group
    can access the outputs of its parent, such as the
    current iteration of the group."""

    max_iterations = 10
    criteria = random.randint(1, max_iterations - 1)

    group = ConditionalGroup("conditional_group")
    group.max_iterations = max_iterations

    protocol = DummyProtocol("protocol_a")
    protocol.input_value = ProtocolPath("current_iteration", group.id)

    condition_1 = ConditionalGroup.Condition()
    condition_1.left_hand_value = ProtocolPath("output_value", group.id,
                                               protocol.id)
    condition_1.right_hand_value = criteria
    condition_1.type = ConditionalGroup.Condition.Type.GreaterThan

    condition_2 = ConditionalGroup.Condition()
    condition_2.left_hand_value = ProtocolPath("current_iteration", group.id)
    condition_2.right_hand_value = criteria
    condition_2.type = ConditionalGroup.Condition.Type.GreaterThan

    group.add_protocols(protocol)
    group.add_condition(condition_1)
    group.add_condition(condition_2)

    with tempfile.TemporaryDirectory() as directory:

        group.execute(directory, ComputeResources())
        assert protocol.output_value == criteria + 1
示例#5
0
def test_conditional_protocol_group():

    with tempfile.TemporaryDirectory() as directory:

        initial_value = 2 * unit.kelvin

        value_protocol_a = DummyProtocol("protocol_a")
        value_protocol_a.input_value = initial_value

        add_values = AddValues("add_values")
        add_values.values = [
            ProtocolPath("output_value", value_protocol_a.id),
            ProtocolPath("output_value", value_protocol_a.id),
        ]

        condition = ConditionalGroup.Condition()
        condition.left_hand_value = ProtocolPath("result", add_values.id)
        condition.right_hand_value = ProtocolPath("output_value",
                                                  value_protocol_a.id)
        condition.type = ConditionalGroup.Condition.Type.GreaterThan

        protocol_group = ConditionalGroup("protocol_group")
        protocol_group.conditions.append(condition)
        protocol_group.add_protocols(value_protocol_a, add_values)

        protocol_group.execute(directory, ComputeResources())

        assert (protocol_group.get_value(ProtocolPath(
            "result", add_values.id)) == 4 * unit.kelvin)
示例#6
0
def test_simple_workflow_graph(calculation_backend, compute_resources,
                               exception):

    expected_value = (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin)

    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = expected_value
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)

    schema = WorkflowSchema()
    schema.protocol_schemas = [protocol_a.schema, protocol_b.schema]
    schema.final_value_source = ProtocolPath("output_value", protocol_b.id)
    schema.validate()

    workflow = Workflow({})
    workflow.schema = schema

    workflow_graph = workflow.to_graph()

    with tempfile.TemporaryDirectory() as directory:

        if calculation_backend is not None:

            with DaskLocalCluster() as calculation_backend:

                if exception:

                    with pytest.raises(AssertionError):

                        workflow_graph.execute(directory, calculation_backend,
                                               compute_resources)

                    return

                else:

                    results_futures = workflow_graph.execute(
                        directory, calculation_backend, compute_resources)

                assert len(results_futures) == 1
                result = results_futures[0].result()

        else:

            result = workflow_graph.execute(directory, calculation_backend,
                                            compute_resources)[0]

            if exception:

                with pytest.raises(AssertionError):

                    workflow_graph.execute(directory, calculation_backend,
                                           compute_resources)

                return

        assert isinstance(result, WorkflowResult)
        assert result.value.value == expected_value.value
示例#7
0
def generate_reweighting_protocols(
    observable_type: ObservableType,
    replicator_id: str = "data_replicator",
    id_suffix: str = "",
) -> Tuple[
    ReweightingProtocols[analysis.AverageObservable, reweighting.ReweightObservable],
    ProtocolReplicator,
]:

    assert observable_type not in [
        ObservableType.KineticEnergy,
        ObservableType.TotalEnergy,
        ObservableType.Enthalpy,
    ]

    statistical_inefficiency = analysis.AverageObservable(
        f"observable_inefficiency_$({replicator_id}){id_suffix}"
    )
    statistical_inefficiency.bootstrap_iterations = 1

    reweight_observable = reweighting.ReweightObservable(
        f"reweight_observable{id_suffix}"
    )

    protocols, data_replicator = generate_base_reweighting_protocols(
        statistical_inefficiency, reweight_observable, replicator_id, id_suffix
    )
    protocols.statistical_inefficiency.observable = ProtocolPath(
        f"observables[{observable_type.value}]", protocols.unpack_stored_data.id
    )

    if (
        observable_type != ObservableType.PotentialEnergy
        and observable_type != ObservableType.TotalEnergy
        and observable_type != ObservableType.Enthalpy
        and observable_type != ObservableType.ReducedPotential
    ):

        protocols.zero_gradients.input_observables = ProtocolPath(
            f"output_observables[{observable_type.value}]",
            protocols.join_observables.id,
        )

    else:

        protocols.zero_gradients = None
        protocols.decorrelate_observable = protocols.decorrelate_target_potential
        protocols.reweight_observable.observable = ProtocolPath(
            f"output_observables[{observable_type.value}]",
            protocols.decorrelate_observable.id,
        )

    return protocols, data_replicator
示例#8
0
def test_index_replicated_protocol():

    replicator = ProtocolReplicator("replicator")
    replicator.template_values = ["a", "b", "c", "d"]

    replicated_protocol = DummyInputOutputProtocol(
        f"protocol_{replicator.placeholder_id}")
    replicated_protocol.input_value = ReplicatorValue(replicator.id)

    schema = WorkflowSchema()
    schema.protocol_replicators = [replicator]
    schema.protocol_schemas = [replicated_protocol.schema]

    for index in range(len(replicator.template_values)):

        indexing_protocol = DummyInputOutputProtocol(
            f"indexing_protocol_{index}")
        indexing_protocol.input_value = ProtocolPath("output_value",
                                                     f"protocol_{index}")
        schema.protocol_schemas.append(indexing_protocol.schema)

    schema.validate()

    workflow = Workflow({})
    workflow.schema = schema
示例#9
0
def test_nested_input():

    dict_protocol = DummyInputOutputProtocol("dict_protocol")
    dict_protocol.input_value = {"a": ThermodynamicState(1.0 * unit.kelvin)}

    quantity_protocol = DummyInputOutputProtocol("quantity_protocol")
    quantity_protocol.input_value = ProtocolPath("output_value[a].temperature",
                                                 dict_protocol.id)

    schema = WorkflowSchema()
    schema.protocol_schemas = [dict_protocol.schema, quantity_protocol.schema]
    schema.validate()

    workflow = Workflow({})
    workflow.schema = schema

    workflow_graph = workflow.to_graph()

    with tempfile.TemporaryDirectory() as temporary_directory:

        with DaskLocalCluster() as calculation_backend:

            results_futures = workflow_graph.execute(temporary_directory,
                                                     calculation_backend)

            assert len(results_futures) == 1
            result = results_futures[0].result()

    assert isinstance(result, WorkflowResult)
示例#10
0
def test_protocol_graph_execution(calculation_backend, compute_resources):

    if calculation_backend is not None:
        calculation_backend.start()

    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = 1
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)

    protocol_graph = ProtocolGraph()
    protocol_graph.add_protocols(protocol_a, protocol_b)

    with tempfile.TemporaryDirectory() as directory:

        results = protocol_graph.execute(directory, calculation_backend,
                                         compute_resources)

        final_result = results[protocol_b.id]

        if calculation_backend is not None:
            final_result = final_result.result()

        with open(final_result[1]) as file:
            results_b = json.load(file, cls=TypedJSONDecoder)

    assert results_b[".output_value"] == protocol_a.input_value

    if compute_resources is not None:
        assert protocol_b.output_value == protocol_a.input_value

    if calculation_backend is not None:
        calculation_backend.stop()
示例#11
0
    def _get_unnested_protocol_path(protocol_path):
        """Returns a protocol path whose nested property name
        has been truncated to only include the top level name,
        e.g:

        `some_protocol_id.value.error` would be truncated to `some_protocol_id.value`

        and

        `some_protocol_id.value[1]` would be truncated to `some_protocol_id.value`

        Parameters
        ----------
        protocol_path: ProtocolPath
            The path to truncate.

        Returns
        -------
        ProtocolPath
            The truncated path.
        """
        property_name = protocol_path.property_name

        # Remove any nested property names from the path
        if protocol_path.property_name.find(".") >= 0:
            property_name = property_name.split(".")[0]

        # Remove any array indices from the path
        if protocol_path.property_name.find("[") >= 0:
            property_name = property_name.split("[")[0]

        return ProtocolPath(property_name, *protocol_path.protocol_ids)
示例#12
0
    def default_simulation_schema(absolute_tolerance=UNDEFINED,
                                  relative_tolerance=UNDEFINED,
                                  n_molecules=1000) -> SimulationSchema:
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (absolute_tolerance != UNDEFINED
                                  or relative_tolerance != UNDEFINED)

        # Define the protocols which will run the simulation itself.
        protocols, value_source, output_to_store = generate_simulation_protocols(
            analysis.AverageObservable("average_density"),
            use_target_uncertainty,
            n_molecules=n_molecules,
        )
        # Specify that the average density should be estimated.
        protocols.analysis_protocol.observable = ProtocolPath(
            f"observables[{ObservableType.Density.value}]",
            protocols.production_simulation.id,
        )

        # Build the workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            protocols.build_coordinates.schema,
            protocols.assign_parameters.schema,
            protocols.energy_minimisation.schema,
            protocols.equilibration_simulation.schema,
            protocols.converge_uncertainty.schema,
            protocols.decorrelate_trajectory.schema,
            protocols.decorrelate_observables.schema,
        ]

        schema.outputs_to_store = {"full_system": output_to_store}
        schema.final_value_source = value_source

        calculation_schema.workflow_schema = schema
        return calculation_schema
示例#13
0
def test_protocol_group_resume():
    """A test that protocol groups can recover after being killed
    (e.g. by a worker being killed due to hitting a wallclock limit)
    """

    compute_resources = ComputeResources()

    # Fake a protocol group which executes the first
    # two protocols and then 'gets killed'.
    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = 1
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)

    protocol_group_a = ProtocolGroup("group_a")
    protocol_group_a.add_protocols(protocol_a, protocol_b)

    protocol_graph = ProtocolGraph()
    protocol_graph.add_protocols(protocol_group_a)
    protocol_graph.execute("graph_a", compute_resources=compute_resources)

    # Remove the output file so it appears the the protocol group had not
    # completed.
    os.unlink(
        os.path.join("graph_a", protocol_group_a.id,
                     f"{protocol_group_a.id}_output.json"))

    # Build the 'full' group with the last two protocols which
    # 'had not been exited' after the group was 'killed'
    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = 1
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)
    protocol_c = DummyInputOutputProtocol("protocol_c")
    protocol_c.input_value = ProtocolPath("output_value", protocol_b.id)
    protocol_d = DummyInputOutputProtocol("protocol_d")
    protocol_d.input_value = ProtocolPath("output_value", protocol_c.id)

    protocol_group_a = ProtocolGroup("group_a")
    protocol_group_a.add_protocols(protocol_a, protocol_b, protocol_c,
                                   protocol_d)

    protocol_graph = ProtocolGraph()
    protocol_graph.add_protocols(protocol_group_a)
    protocol_graph.execute("graph_a", compute_resources=compute_resources)

    assert all(x != UNDEFINED for x in protocol_group_a.outputs.values())
示例#14
0
    def build_graph(prefix):

        merger = build_merge(prefix)
        fork = build_fork(prefix)

        fork[0].input_value = ProtocolPath("output_value",
                                           prefix + "protocol_f")
        return [*merger, *fork]
示例#15
0
def test_protocol_group_execution():

    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = 1
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)

    protocol_group = ProtocolGroup("protocol_group")
    protocol_group.add_protocols(protocol_a, protocol_b)

    with tempfile.TemporaryDirectory() as directory:

        protocol_group.execute(directory, ComputeResources())

    value_path = ProtocolPath("output_value", protocol_group.id, protocol_b.id)
    final_value = protocol_group.get_value(value_path)

    assert final_value == protocol_a.input_value
示例#16
0
def test_workflow_with_groups():

    expected_value = (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin)

    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = expected_value
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)

    conditional_group = ConditionalGroup("conditional_group")
    conditional_group.add_protocols(protocol_a, protocol_b)

    condition = ConditionalGroup.Condition()
    condition.right_hand_value = 2 * unit.kelvin
    condition.type = ConditionalGroup.Condition.Type.LessThan
    condition.left_hand_value = ProtocolPath("output_value.value",
                                             conditional_group.id,
                                             protocol_b.id)
    conditional_group.add_condition(condition)

    schema = WorkflowSchema()
    schema.protocol_schemas = [conditional_group.schema]
    schema.final_value_source = ProtocolPath("output_value",
                                             conditional_group.id,
                                             protocol_b.id)
    schema.validate()

    workflow = Workflow({})
    workflow.schema = schema

    workflow_graph = workflow.to_graph()

    with tempfile.TemporaryDirectory() as directory:

        with DaskLocalCluster() as calculation_backend:

            results_futures = workflow_graph.execute(directory,
                                                     calculation_backend)
            assert len(results_futures) == 1

            result = results_futures[0].result()

        assert isinstance(result, WorkflowResult)
        assert result.value.value == expected_value.value
示例#17
0
def build_fork(prefix):
    #          / i - j
    # g - h - |
    #          \ k - l
    protocol_g = DummyInputOutputProtocol(prefix + "protocol_g")
    protocol_g.input_value = 3
    protocol_h = DummyInputOutputProtocol(prefix + "protocol_h")
    protocol_h.input_value = ProtocolPath("output_value", protocol_g.id)
    protocol_i = DummyInputOutputProtocol(prefix + "protocol_i")
    protocol_i.input_value = ProtocolPath("output_value", protocol_h.id)
    protocol_j = DummyInputOutputProtocol(prefix + "protocol_j")
    protocol_j.input_value = ProtocolPath("output_value", protocol_i.id)
    protocol_k = DummyInputOutputProtocol(prefix + "protocol_k")
    protocol_k.input_value = ProtocolPath("output_value", protocol_h.id)
    protocol_l = DummyInputOutputProtocol(prefix + "protocol_l")
    protocol_l.input_value = ProtocolPath("output_value", protocol_k.id)

    return [
        protocol_g, protocol_h, protocol_i, protocol_j, protocol_k, protocol_l
    ]
示例#18
0
    def _validate_outputs_to_store(self, schemas_by_id):
        """Validates that the references to the outputs to store
        are valid.
        """
        if self.outputs_to_store == UNDEFINED:
            return

        assert all(
            isinstance(x, BaseStoredData)
            for x in self.outputs_to_store.values())

        for output_label in self.outputs_to_store:

            output_to_store = self.outputs_to_store[output_label]
            output_to_store.validate()

            for attribute_name in output_to_store.get_attributes(
                    StorageAttribute):

                attribute_value = getattr(output_to_store, attribute_name)

                if isinstance(attribute_value, ReplicatorValue):

                    matching_replicas = [
                        x for x in self.protocol_replicators
                        if attribute_value.replicator_id == x.id
                    ]

                    if len(matching_replicas) == 0:

                        raise ValueError(
                            f"An output to store is trying to take its value from a "
                            f"replicator {attribute_value.replicator_id} which does "
                            f"not exist.")

                if (not isinstance(attribute_value, ProtocolPath)
                        or attribute_value.is_global):
                    continue

                if attribute_value.start_protocol not in schemas_by_id:
                    raise ValueError(
                        f"The {attribute_value} source does not exist.")

                protocol_schema = schemas_by_id[attribute_value.start_protocol]

                # Currently we do not support validating nested or indexed attributes.
                attribute_value = ProtocolPath(
                    attribute_value.property_name.split(".")[0].split("[")[0],
                    *attribute_value.protocol_ids,
                )

                protocol_object = protocol_schema.to_protocol()
                protocol_object.get_value(attribute_value)
示例#19
0
    def build_protocols(prefix):

        #     .-------------------.
        #     |          / i - j -|- b
        # a - | g - h - |         |
        #     |          \ k - l -|- c
        #     .-------------------.
        protocol_a = DummyInputOutputProtocol(prefix + "protocol_a")
        protocol_a.input_value = 1
        fork_protocols = build_fork(prefix)
        fork_protocols[0].input_value = ProtocolPath("output_value",
                                                     protocol_a.id)
        protocol_group = ProtocolGroup(prefix + "protocol_group")
        protocol_group.add_protocols(*fork_protocols)
        protocol_b = DummyInputOutputProtocol(prefix + "protocol_b")
        protocol_b.input_value = ProtocolPath("output_value",
                                              protocol_group.id, "protocol_j")
        protocol_c = DummyInputOutputProtocol(prefix + "protocol_c")
        protocol_c.input_value = ProtocolPath("output_value",
                                              protocol_group.id, "protocol_l")

        return [protocol_a, protocol_group, protocol_b, protocol_c]
示例#20
0
    def get_value_references(self, input_path):

        if input_path.property_name != "conditions":
            return super(ConditionalGroup,
                         self).get_value_references(input_path)

        value_references = {}

        for index, condition in enumerate(self.conditions):

            if isinstance(condition.left_hand_value, ProtocolPath):

                source_path = ProtocolPath(
                    "conditions[{}].left_hand_value".format(index))
                value_references[source_path] = condition.left_hand_value

            if isinstance(condition.right_hand_value, ProtocolPath):

                source_path = ProtocolPath(
                    "conditions[{}].right_hand_value".format(index))
                value_references[source_path] = condition.right_hand_value

        return value_references
示例#21
0
def test_protocol_path_id_replacement():
    """Tests that the protocol id function on the protocol path
    behaves as expected."""

    protocol_path = ProtocolPath("", "protocol_id_1", "protocol_id_11")
    assert protocol_path.full_path == "protocol_id_1/protocol_id_11."

    # Make sure only full matches lead to id replacement
    protocol_path.replace_protocol("protocol_id_", "new_id_1")
    assert protocol_path.full_path == "protocol_id_1/protocol_id_11."

    protocol_path.replace_protocol("rotocol_id_1", "new_id_1")
    assert protocol_path.full_path == "protocol_id_1/protocol_id_11."

    protocol_path.replace_protocol("protocol_id_1", "new_id_1")
    assert protocol_path.full_path == "new_id_1/protocol_id_11."
示例#22
0
    def _n_molecules_divisor(
        cls,
        n_molecules: ProtocolPath,
        suffix: Optional[str] = None
    ) -> Tuple[ProtocolPath, Optional[miscellaneous.DivideValue]]:
        """Returns the number of molecules to scale the value of the observable by.
        For energies this is just the total number of molecules in the box as they are
        already in units per mole. For other observables this is the total number of
        molecules divided by the Avogadro constant.

        Parameters
        ----------
        n_molecules
            A reference to the number of molecules in the simulation box.
        suffix
            An optional string to append to the id of the protocol which will
            normalize the number of molecules by the Avogadro constant. This
            argument is only used for observables which aren't energies.

        Returns
        -------
            A reference to the divisor as well as optionally the protocol from
            which it is computed.
        """

        suffix = "" if suffix is None else suffix

        n_molar_molecules = None

        if cls._observable_type() in [
                ObservableType.Temperature,
                ObservableType.Volume,
                ObservableType.Density,
        ]:

            n_molar_molecules = miscellaneous.DivideValue(
                f"n_molar_molecules{suffix}")
            n_molar_molecules.value = n_molecules
            n_molar_molecules.divisor = (1.0 *
                                         unit.avogadro_constant).to("mole**-1")

            n_molecules = ProtocolPath("result", n_molar_molecules.id)

        return n_molecules, n_molar_molecules
示例#23
0
    def default_reweighting_schema(
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_effective_samples=50,
    ) -> ReweightingSchema:
        """Returns the default calculation schema to use when estimating
        this property by reweighting existing data.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        ReweightingSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        protocols, data_replicator = generate_reweighting_protocols(
            ObservableType.Density)
        protocols.reweight_observable.required_effective_samples = n_effective_samples

        schema = WorkflowSchema()
        schema.protocol_schemas = [x.schema for x in protocols]
        schema.protocol_replicators = [data_replicator]

        schema.final_value_source = ProtocolPath(
            "value", protocols.reweight_observable.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
示例#24
0
    def _get_unreplicated_path(self, protocol_path):
        """Checks to see if the protocol pointed to by this path will only
        exist after a replicator has been applied, and if so, returns a
        path to the unreplicated protocol.

        Parameters
        ----------
        protocol_path: ProtocolPath
            The path to convert to an unreplicated path.

        Returns
        -------
        ProtocolPath
            The path which should point to only unreplicated protocols
        """

        if self.protocol_replicators == UNDEFINED:
            return protocol_path.copy()

        full_unreplicated_path = str(protocol_path.full_path)

        for replicator in self.protocol_replicators:

            if replicator.placeholder_id in full_unreplicated_path:
                continue

            protocols_to_replicate = self._find_protocols_to_be_replicated(
                replicator)

            for protocol_id in protocols_to_replicate:

                match_pattern = re.escape(
                    protocol_id.replace(replicator.placeholder_id, r"\d+"))
                match_pattern = match_pattern.replace(re.escape(r"\d+"),
                                                      r"\d+")

                full_unreplicated_path = re.sub(match_pattern, protocol_id,
                                                full_unreplicated_path)

        return ProtocolPath.from_string(full_unreplicated_path)
示例#25
0
def test_advanced_nested_replicators():

    dummy_schema = WorkflowSchema()

    replicator_a = ProtocolReplicator(replicator_id="replicator_a")
    replicator_a.template_values = ["a", "b"]

    replicator_b = ProtocolReplicator(
        replicator_id=f"replicator_b_{replicator_a.placeholder_id}")
    replicator_b.template_values = ProtocolPath(
        f"dummy_list[{replicator_a.placeholder_id}]", "global")

    dummy_protocol = DummyReplicableProtocol(f"dummy_"
                                             f"{replicator_a.placeholder_id}_"
                                             f"{replicator_b.placeholder_id}")

    dummy_protocol.replicated_value_a = ReplicatorValue(replicator_a.id)
    dummy_protocol.replicated_value_b = ReplicatorValue(replicator_b.id)

    dummy_schema.protocol_schemas = [dummy_protocol.schema]
    dummy_schema.protocol_replicators = [replicator_a, replicator_b]

    dummy_schema.validate()

    dummy_property = create_dummy_property(Density)
    dummy_metadata = Workflow.generate_default_metadata(
        dummy_property, "smirnoff99Frosst-1.1.0.offxml", [])
    dummy_metadata["dummy_list"] = [[1], [2]]

    dummy_workflow = Workflow(dummy_metadata, "")
    dummy_workflow.schema = dummy_schema

    assert len(dummy_workflow.protocols) == 2

    assert dummy_workflow.protocols["dummy_0_0"].replicated_value_a == "a"
    assert dummy_workflow.protocols["dummy_0_0"].replicated_value_b == 1

    assert dummy_workflow.protocols["dummy_1_0"].replicated_value_a == "b"
    assert dummy_workflow.protocols["dummy_1_0"].replicated_value_b == 2
示例#26
0
    def _paprika_build_simulation_protocols(
        cls,
        coordinate_path: ProtocolPath,
        parameterized_system: ProtocolPath,
        id_prefix: str,
        id_suffix: str,
        minimization_template: openmm.OpenMMEnergyMinimisation,
        thermalization_template: openmm.OpenMMSimulation,
        equilibration_template: openmm.OpenMMSimulation,
        production_template: openmm.OpenMMSimulation,
    ) -> Tuple[openmm.OpenMMEnergyMinimisation, openmm.OpenMMSimulation,
               openmm.OpenMMSimulation, openmm.OpenMMSimulation, ]:

        minimization = copy.deepcopy(minimization_template)
        minimization.id = f"{id_prefix}_energy_minimization_{id_suffix}"
        minimization.input_coordinate_file = coordinate_path
        minimization.parameterized_system = parameterized_system

        thermalization = copy.deepcopy(thermalization_template)
        thermalization.id = f"{id_prefix}_thermalization_{id_suffix}"
        thermalization.input_coordinate_file = ProtocolPath(
            "output_coordinate_file", minimization.id)
        thermalization.parameterized_system = parameterized_system
        thermalization.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        equilibration = copy.deepcopy(equilibration_template)
        equilibration.id = f"{id_prefix}_equilibration_{id_suffix}"
        equilibration.input_coordinate_file = ProtocolPath(
            "output_coordinate_file", thermalization.id)
        equilibration.parameterized_system = parameterized_system
        equilibration.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        production = copy.deepcopy(production_template)
        production.id = f"{id_prefix}_production_{id_suffix}"
        production.input_coordinate_file = ProtocolPath(
            "output_coordinate_file", equilibration.id)
        production.parameterized_system = parameterized_system
        production.thermodynamic_state = ProtocolPath("thermodynamic_state",
                                                      "global")

        return minimization, thermalization, equilibration, production
示例#27
0
    def default_paprika_schema(
        cls,
        existing_schema: SimulationSchema = None,
        n_solvent_molecules: int = 2500,
        n_thermalization_steps: int = 50000,
        n_equilibration_steps: int = 200000,
        n_production_steps: int = 2500000,
        dt_thermalization: unit.Quantity = 1.0 * unit.femtosecond,
        dt_equilibration: unit.Quantity = 2.0 * unit.femtosecond,
        dt_production: unit.Quantity = 2.0 * unit.femtosecond,
        debug: bool = False,
    ):
        """Returns the default calculation schema to use when estimating
        a host-guest binding affinity measurement with an APR calculation
        using the ``paprika`` package.

        Notes
        -----
        * This schema requires additional metadata to be able to estimate
          each metadata. This metadata is automatically generated for properties
          loaded from the ``taproom`` package using the ``TaproomDataSet`` object.

        Parameters
        ----------
        existing_schema: SimulationSchema, optional
            An existing schema whose settings to use. If set,
            the schema's `workflow_schema` will be overwritten
            by this method.
        n_solvent_molecules
            The number of solvent molecules to add to the box.
        n_thermalization_steps
            The number of thermalization simulations steps to perform.
            Sample generated during this step will be discarded.
        n_equilibration_steps
            The number of equilibration simulations steps to perform.
            Sample generated during this step will be discarded.
        n_production_steps
            The number of production simulations steps to perform.
            Sample generated during this step will be used in the final
            free energy calculation.
        dt_thermalization
            The integration timestep during thermalization
        dt_equilibration
            The integration timestep during equilibration
        dt_production
            The integration timestep during production
        debug
            Whether to return a debug schema. This is nearly identical
            to the default schema, albeit with significantly less
            solvent molecules (10), all simulations run in NVT and much
            shorter simulation runs (500 steps). If True, the other input
            arguments will be ignored.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """

        calculation_schema = SimulationSchema()

        if existing_schema is not None:
            assert isinstance(existing_schema, SimulationSchema)
            calculation_schema = copy.deepcopy(existing_schema)

        # Initialize the protocols which will serve as templates for those
        # used in the actual workflows.
        solvation_template = cls._paprika_default_solvation_protocol(
            n_solvent_molecules=n_solvent_molecules)

        (
            minimization_template,
            *simulation_templates,
        ) = cls._paprika_default_simulation_protocols(
            n_thermalization_steps=n_thermalization_steps,
            n_equilibration_steps=n_equilibration_steps,
            n_production_steps=n_production_steps,
            dt_thermalization=dt_thermalization,
            dt_equilibration=dt_equilibration,
            dt_production=dt_production,
        )

        if debug:

            solvation_template.max_molecules = 10
            solvation_template.mass_density = 0.01 * unit.grams / unit.milliliters

            for simulation_template in simulation_templates:

                simulation_template.ensemble = Ensemble.NVT
                simulation_template.steps_per_iteration = 500
                simulation_template.output_frequency = 50

        # Set up a replicator which will perform the attach-pull calculation for
        # each of the guest orientations
        orientation_replicator = ProtocolReplicator("orientation_replicator")
        orientation_replicator.template_values = ProtocolPath(
            "guest_orientations", "global")

        restraint_schemas = {
            "static":
            ProtocolPath(
                f"guest_orientations[{orientation_replicator.placeholder_id}]."
                f"static_restraints",
                "global",
            ),
            "conformational":
            ProtocolPath(
                f"guest_orientations[{orientation_replicator.placeholder_id}]."
                f"conformational_restraints",
                "global",
            ),
            "guest":
            ProtocolPath("guest_restraints", "global"),
            "wall":
            ProtocolPath("wall_restraints", "global"),
            "symmetry":
            ProtocolPath("symmetry_restraints", "global"),
        }

        # Build the protocols to compute the attach and pull free energies.
        (
            attach_pull_protocols,
            attach_pull_replicators,
            attach_free_energy,
            pull_free_energy,
            reference_work,
        ) = cls._paprika_build_attach_pull_protocols(
            orientation_replicator,
            restraint_schemas,
            solvation_template,
            minimization_template,
            *simulation_templates,
        )

        # Build the protocols to compute the release free energies.
        (
            release_protocols,
            release_replicator,
            release_free_energy,
        ) = cls._paprika_build_release_protocols(
            orientation_replicator,
            restraint_schemas,
            solvation_template,
            minimization_template,
            *simulation_templates,
        )

        # Compute the symmetry correction.
        symmetry_correction = ComputeSymmetryCorrection("symmetry_correction")
        symmetry_correction.n_microstates = ProtocolPath(
            "n_guest_microstates", "global")
        symmetry_correction.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        # Sum together the free energies of the individual orientations
        orientation_free_energy = miscellaneous.AddValues(
            f"orientation_free_energy_{orientation_replicator.placeholder_id}")
        orientation_free_energy.values = [
            attach_free_energy,
            pull_free_energy,
            reference_work,
            release_free_energy,
            ProtocolPath("result", symmetry_correction.id),
        ]

        # Finally, combine all of the values together
        total_free_energy = analysis.AverageFreeEnergies("total_free_energy")
        total_free_energy.values = ProtocolPath("result",
                                                orientation_free_energy.id)
        total_free_energy.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        calculation_schema.workflow_schema = WorkflowSchema()

        calculation_schema.workflow_schema.protocol_schemas = [
            *(protocol.schema for protocol in attach_pull_protocols),
            *(protocol.schema for protocol in release_protocols),
            symmetry_correction.schema,
            orientation_free_energy.schema,
            total_free_energy.schema,
        ]
        calculation_schema.workflow_schema.protocol_replicators = [
            orientation_replicator,
            *attach_pull_replicators,
            release_replicator,
        ]

        # Define where the final value comes from.
        calculation_schema.workflow_schema.final_value_source = ProtocolPath(
            "result", total_free_energy.id)

        return calculation_schema
示例#28
0
    def _paprika_build_release_protocols(
        cls,
        orientation_replicator: ProtocolReplicator,
        restraint_schemas: Dict[str, ProtocolPath],
        solvation_template: coordinates.SolvateExistingStructure,
        minimization_template: openmm.OpenMMEnergyMinimisation,
        thermalization_template: openmm.OpenMMSimulation,
        equilibration_template: openmm.OpenMMSimulation,
        production_template: openmm.OpenMMSimulation,
    ):

        # Define a replicator to set up each release window
        release_replicator = ProtocolReplicator("release_replicator")
        release_replicator.template_values = ProtocolPath(
            "release_windows_indices", "global")

        orientation_placeholder = orientation_replicator.placeholder_id

        release_replicator_id = (f"{release_replicator.placeholder_id}_"
                                 f"{orientation_placeholder}")

        # Filter out only the solvent substance to help with the solvation step.
        filter_solvent = miscellaneous.FilterSubstanceByRole(
            "host-filter_solvent")
        filter_solvent.input_substance = ProtocolPath("host_substance",
                                                      "global")
        filter_solvent.component_roles = [Component.Role.Solvent]

        # Construct a set of coordinates for a host molecule correctly
        # aligned to the z-axis.
        align_coordinates = PrepareReleaseCoordinates(
            "release_align_coordinates")
        align_coordinates.substance = ProtocolPath("host_substance", "global")
        align_coordinates.complex_file_path = ProtocolPath(
            "host_coordinate_path", "global")

        solvate_coordinates = copy.deepcopy(solvation_template)
        solvate_coordinates.id = "release_solvate_coordinates"
        solvate_coordinates.substance = ProtocolPath("filtered_substance",
                                                     filter_solvent.id)
        solvate_coordinates.solute_coordinate_file = ProtocolPath(
            "output_coordinate_path", align_coordinates.id)

        # Apply the force field parameters. This only needs to be done for one
        # of the windows.
        apply_parameters = forcefield.BaseBuildSystem(
            "release_apply_parameters")
        apply_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        apply_parameters.substance = ProtocolPath("host_substance", "global")
        apply_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_coordinates.id)

        # Add the dummy atoms.
        add_dummy_atoms = AddDummyAtoms("release_add_dummy_atoms")
        add_dummy_atoms.substance = ProtocolPath("host_substance", "global")
        add_dummy_atoms.input_coordinate_path = ProtocolPath(
            "coordinate_file_path",
            solvate_coordinates.id,
        )
        add_dummy_atoms.input_system = ProtocolPath("parameterized_system",
                                                    apply_parameters.id)
        add_dummy_atoms.offset = ProtocolPath("dummy_atom_offset", "global")

        # Apply the restraints files
        generate_restraints = GenerateReleaseRestraints(
            f"release_generate_restraints_{orientation_placeholder}")
        generate_restraints.host_coordinate_path = ProtocolPath(
            "output_coordinate_path", add_dummy_atoms.id)
        generate_restraints.release_lambdas = ProtocolPath(
            "release_lambdas", "global")
        generate_restraints.restraint_schemas = restraint_schemas

        apply_restraints = ApplyRestraints(
            f"release_apply_restraints_{release_replicator_id}")
        apply_restraints.restraints_path = ProtocolPath(
            "restraints_path", generate_restraints.id)
        apply_restraints.phase = "release"
        apply_restraints.window_index = ReplicatorValue(release_replicator.id)
        apply_restraints.input_system = ProtocolPath("output_system",
                                                     add_dummy_atoms.id)

        # Setup the simulations for the release phase.
        (
            release_minimization,
            release_thermalization,
            release_equilibration,
            release_production,
        ) = cls._paprika_build_simulation_protocols(
            ProtocolPath("output_coordinate_path", add_dummy_atoms.id),
            ProtocolPath("output_system", apply_restraints.id),
            "release",
            release_replicator_id,
            minimization_template,
            thermalization_template,
            equilibration_template,
            production_template,
        )

        # Analyze the release phase.
        analyze_release_phase = AnalyzeAPRPhase(
            f"analyze_release_phase_{orientation_placeholder}")
        analyze_release_phase.topology_path = ProtocolPath(
            "output_coordinate_path", add_dummy_atoms.id)
        analyze_release_phase.trajectory_paths = ProtocolPath(
            "trajectory_file_path", release_production.id)
        analyze_release_phase.phase = "release"
        analyze_release_phase.restraints_path = ProtocolPath(
            "restraints_path", generate_restraints.id)

        # Return the full list of protocols which make up the release parts
        # of a host-guest APR calculation.
        protocols = [
            filter_solvent,
            align_coordinates,
            solvate_coordinates,
            apply_parameters,
            add_dummy_atoms,
            generate_restraints,
            apply_restraints,
            release_minimization,
            release_thermalization,
            release_equilibration,
            release_production,
            analyze_release_phase,
        ]

        return (
            protocols,
            release_replicator,
            ProtocolPath("result", analyze_release_phase.id),
        )
示例#29
0
    def default_yank_schema(existing_schema=None):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        existing_schema: SimulationSchema, optional
            An existing schema whose settings to use. If set,
            the schema's `workflow_schema` will be overwritten
            by this method.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """

        calculation_schema = SimulationSchema()

        if existing_schema is not None:
            assert isinstance(existing_schema, SimulationSchema)
            calculation_schema = copy.deepcopy(existing_schema)

        schema = WorkflowSchema(
            property_type=HostGuestBindingAffinity.__name__)
        schema.id = "{}{}".format(HostGuestBindingAffinity.__name__, "Schema")

        # Initial coordinate and topology setup.
        filter_ligand = miscellaneous.FilterSubstanceByRole("filter_ligand")
        filter_ligand.input_substance = ProtocolPath("substance", "global")

        filter_ligand.component_roles = [Component.Role.Ligand]
        # We only support substances with a single guest ligand.
        filter_ligand.expected_components = 1

        schema.protocols[filter_ligand.id] = filter_ligand.schema

        # Construct the protocols which will (for now) take as input a set of host coordinates,
        # and generate a set of charges for them.
        filter_receptor = miscellaneous.FilterSubstanceByRole(
            "filter_receptor")
        filter_receptor.input_substance = ProtocolPath("substance", "global")

        filter_receptor.component_roles = [Component.Role.Receptor]
        # We only support substances with a single host receptor.
        filter_receptor.expected_components = 1

        schema.protocols[filter_receptor.id] = filter_receptor.schema

        # Perform docking to position the guest within the host.
        perform_docking = coordinates.BuildDockedCoordinates("perform_docking")

        perform_docking.ligand_substance = ProtocolPath(
            "filtered_substance", filter_ligand.id)
        perform_docking.receptor_coordinate_file = ProtocolPath(
            "receptor_mol2", "global")

        schema.protocols[perform_docking.id] = perform_docking.schema

        # Solvate the docked structure using packmol
        filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent")
        filter_solvent.input_substance = ProtocolPath("substance", "global")
        filter_solvent.component_roles = [Component.Role.Solvent]

        schema.protocols[filter_solvent.id] = filter_solvent.schema

        solvate_complex = coordinates.SolvateExistingStructure(
            "solvate_complex")
        solvate_complex.max_molecules = 1000

        solvate_complex.substance = ProtocolPath("filtered_substance",
                                                 filter_solvent.id)
        solvate_complex.solute_coordinate_file = ProtocolPath(
            "docked_complex_coordinate_path", perform_docking.id)

        schema.protocols[solvate_complex.id] = solvate_complex.schema

        # Assign force field parameters to the solvated complex system.
        build_solvated_complex_system = forcefield.BaseBuildSystem(
            "build_solvated_complex_system")

        build_solvated_complex_system.force_field_path = ProtocolPath(
            "force_field_path", "global")

        build_solvated_complex_system.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_complex.id)
        build_solvated_complex_system.substance = ProtocolPath(
            "substance", "global")

        build_solvated_complex_system.charged_molecule_paths = [
            ProtocolPath("receptor_mol2", "global")
        ]

        schema.protocols[build_solvated_complex_system.
                         id] = build_solvated_complex_system.schema

        # Solvate the ligand using packmol
        solvate_ligand = coordinates.SolvateExistingStructure("solvate_ligand")
        solvate_ligand.max_molecules = 1000

        solvate_ligand.substance = ProtocolPath("filtered_substance",
                                                filter_solvent.id)
        solvate_ligand.solute_coordinate_file = ProtocolPath(
            "docked_ligand_coordinate_path", perform_docking.id)

        schema.protocols[solvate_ligand.id] = solvate_ligand.schema

        # Assign force field parameters to the solvated ligand system.
        build_solvated_ligand_system = forcefield.BaseBuildSystem(
            "build_solvated_ligand_system")

        build_solvated_ligand_system.force_field_path = ProtocolPath(
            "force_field_path", "global")

        build_solvated_ligand_system.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_ligand.id)
        build_solvated_ligand_system.substance = ProtocolPath(
            "substance", "global")

        schema.protocols[build_solvated_ligand_system.
                         id] = build_solvated_ligand_system.schema

        # Employ YANK to estimate the binding free energy.
        yank_protocol = yank.LigandReceptorYankProtocol("yank_protocol")

        yank_protocol.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        yank_protocol.number_of_iterations = 2000
        yank_protocol.steps_per_iteration = 500
        yank_protocol.checkpoint_interval = 10

        yank_protocol.verbose = True

        yank_protocol.force_field_path = ProtocolPath("force_field_path",
                                                      "global")

        yank_protocol.ligand_residue_name = ProtocolPath(
            "ligand_residue_name", perform_docking.id)
        yank_protocol.receptor_residue_name = ProtocolPath(
            "receptor_residue_name", perform_docking.id)

        yank_protocol.solvated_ligand_coordinates = ProtocolPath(
            "coordinate_file_path", solvate_ligand.id)
        yank_protocol.solvated_ligand_system = ProtocolPath(
            "parameterized_system", build_solvated_ligand_system.id)

        yank_protocol.solvated_complex_coordinates = ProtocolPath(
            "coordinate_file_path", solvate_complex.id)
        yank_protocol.solvated_complex_system = ProtocolPath(
            "parameterized_system", build_solvated_complex_system.id)

        schema.protocols[yank_protocol.id] = yank_protocol.schema

        # Define where the final values come from.
        schema.final_value_source = ProtocolPath("free_energy_difference",
                                                 yank_protocol.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
示例#30
0
    def _paprika_build_attach_pull_protocols(
        cls,
        orientation_replicator: ProtocolReplicator,
        restraint_schemas: Dict[str, ProtocolPath],
        solvation_template: coordinates.SolvateExistingStructure,
        minimization_template: openmm.OpenMMEnergyMinimisation,
        thermalization_template: openmm.OpenMMSimulation,
        equilibration_template: openmm.OpenMMSimulation,
        production_template: openmm.OpenMMSimulation,
    ):

        # Define a replicator to set and solvate up the coordinates for each pull window
        orientation_placeholder = orientation_replicator.placeholder_id

        pull_replicator = ProtocolReplicator(
            f"pull_replicator_{orientation_placeholder}")
        pull_replicator.template_values = ProtocolPath("pull_windows_indices",
                                                       "global")
        pull_replicator_id = (f"{pull_replicator.placeholder_id}_"
                              f"{orientation_placeholder}")

        attach_replicator = ProtocolReplicator(
            f"attach_replicator_{orientation_placeholder}")
        attach_replicator.template_values = ProtocolPath(
            "attach_windows_indices", "global")
        attach_replicator_id = (f"{attach_replicator.placeholder_id}_"
                                f"{orientation_placeholder}")

        # Filter out only the solvent substance to help with the solvation step.
        filter_solvent = miscellaneous.FilterSubstanceByRole(
            "host-guest-filter_solvent")
        filter_solvent.input_substance = ProtocolPath("substance", "global")
        filter_solvent.component_roles = [Component.Role.Solvent]

        # Define the protocols which will set and solvate up the coordinates for each
        # pull window
        align_coordinates = PreparePullCoordinates(
            f"pull_align_coordinates_{pull_replicator_id}")
        align_coordinates.substance = ProtocolPath("substance", "global")
        align_coordinates.complex_file_path = ProtocolPath(
            f"guest_orientations[{orientation_placeholder}].coordinate_path",
            "global")
        align_coordinates.guest_orientation_mask = ProtocolPath(
            "guest_orientation_mask", "global")
        align_coordinates.pull_window_index = ReplicatorValue(
            pull_replicator.id)
        align_coordinates.pull_distance = ProtocolPath("pull_distance",
                                                       "global")
        align_coordinates.n_pull_windows = ProtocolPath(
            "n_pull_windows", "global")

        solvate_coordinates = copy.deepcopy(solvation_template)
        solvate_coordinates.id = f"pull_solvate_coordinates_{pull_replicator_id}"
        solvate_coordinates.substance = ProtocolPath("filtered_substance",
                                                     filter_solvent.id)
        solvate_coordinates.solute_coordinate_file = ProtocolPath(
            "output_coordinate_path", align_coordinates.id)

        # Apply the force field parameters. This only needs to be done once.
        apply_parameters = forcefield.BuildSmirnoffSystem(
            f"pull_apply_parameters_{orientation_placeholder}")
        apply_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        apply_parameters.substance = ProtocolPath("substance", "global")
        apply_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path",
            f"pull_solvate_coordinates_0_{orientation_placeholder}",
        )

        # Add the dummy atoms.
        add_dummy_atoms = AddDummyAtoms(
            f"pull_add_dummy_atoms_{pull_replicator_id}")
        add_dummy_atoms.substance = ProtocolPath("substance", "global")
        add_dummy_atoms.input_coordinate_path = ProtocolPath(
            "coordinate_file_path", solvate_coordinates.id)
        add_dummy_atoms.input_system = ProtocolPath("parameterized_system",
                                                    apply_parameters.id)
        add_dummy_atoms.offset = ProtocolPath("dummy_atom_offset", "global")

        attach_coordinate_path = ProtocolPath(
            "output_coordinate_path",
            f"pull_add_dummy_atoms_0_{orientation_placeholder}",
        )
        attach_system = ProtocolPath(
            "output_system",
            f"pull_add_dummy_atoms_0_{orientation_placeholder}")

        # Apply the attach restraints
        generate_attach_restraints = GenerateAttachRestraints(
            f"attach_generate_restraints_{orientation_placeholder}")
        generate_attach_restraints.complex_coordinate_path = attach_coordinate_path
        generate_attach_restraints.attach_lambdas = ProtocolPath(
            "attach_lambdas", "global")
        generate_attach_restraints.restraint_schemas = restraint_schemas

        apply_attach_restraints = ApplyRestraints(
            f"attach_apply_restraints_{attach_replicator_id}")
        apply_attach_restraints.restraints_path = ProtocolPath(
            "restraints_path", generate_attach_restraints.id)
        apply_attach_restraints.phase = "attach"
        apply_attach_restraints.window_index = ReplicatorValue(
            attach_replicator.id)
        apply_attach_restraints.input_system = attach_system

        # Apply the pull restraints
        generate_pull_restraints = GeneratePullRestraints(
            f"pull_generate_restraints_{orientation_placeholder}")
        generate_pull_restraints.complex_coordinate_path = attach_coordinate_path
        generate_pull_restraints.attach_lambdas = ProtocolPath(
            "attach_lambdas", "global")
        generate_pull_restraints.n_pull_windows = ProtocolPath(
            "n_pull_windows", "global")
        generate_pull_restraints.restraint_schemas = restraint_schemas

        apply_pull_restraints = ApplyRestraints(
            f"pull_apply_restraints_{pull_replicator_id}")
        apply_pull_restraints.restraints_path = ProtocolPath(
            "restraints_path", generate_pull_restraints.id)
        apply_pull_restraints.phase = "pull"
        apply_pull_restraints.window_index = ReplicatorValue(
            pull_replicator.id)
        apply_pull_restraints.input_system = ProtocolPath(
            "output_system", add_dummy_atoms.id)

        # Setup the simulations for the attach and pull phases.
        (
            attach_minimization,
            attach_thermalization,
            attach_equilibration,
            attach_production,
        ) = cls._paprika_build_simulation_protocols(
            attach_coordinate_path,
            ProtocolPath("output_system", apply_attach_restraints.id),
            "attach",
            attach_replicator_id,
            minimization_template,
            thermalization_template,
            equilibration_template,
            production_template,
        )

        (
            pull_minimization,
            pull_thermalization,
            pull_equilibration,
            pull_production,
        ) = cls._paprika_build_simulation_protocols(
            ProtocolPath("output_coordinate_path", add_dummy_atoms.id),
            ProtocolPath("output_system", apply_pull_restraints.id),
            "pull",
            pull_replicator_id,
            minimization_template,
            thermalization_template,
            equilibration_template,
            production_template,
        )

        # Analyze the attach phase.
        attach_free_energy = AnalyzeAPRPhase(
            f"analyze_attach_phase_{orientation_placeholder}")
        attach_free_energy.topology_path = attach_coordinate_path
        attach_free_energy.trajectory_paths = ProtocolPath(
            "trajectory_file_path", attach_production.id)
        attach_free_energy.phase = "attach"
        attach_free_energy.restraints_path = ProtocolPath(
            "restraints_path", generate_attach_restraints.id)

        # Analyze the pull phase.
        pull_free_energy = AnalyzeAPRPhase(
            f"analyze_pull_phase_{orientation_placeholder}")
        pull_free_energy.topology_path = attach_coordinate_path
        pull_free_energy.trajectory_paths = ProtocolPath(
            "trajectory_file_path", pull_production.id)
        pull_free_energy.phase = "pull"
        pull_free_energy.restraints_path = ProtocolPath(
            "restraints_path", generate_pull_restraints.id)

        reference_state_work = ComputeReferenceWork(
            f"pull_reference_work_{orientation_placeholder}")
        reference_state_work.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")
        reference_state_work.restraints_path = ProtocolPath(
            "restraints_path", generate_pull_restraints.id)

        # Return the full list of protocols which make up the attach and pull parts
        # of a host-guest APR calculation.
        protocols = [
            filter_solvent,
            align_coordinates,
            solvate_coordinates,
            apply_parameters,
            add_dummy_atoms,
            generate_attach_restraints,
            apply_attach_restraints,
            generate_pull_restraints,
            apply_pull_restraints,
            attach_minimization,
            attach_thermalization,
            attach_equilibration,
            attach_production,
            pull_minimization,
            pull_thermalization,
            pull_equilibration,
            pull_production,
            attach_free_energy,
            pull_free_energy,
            reference_state_work,
        ]
        protocol_replicators = [pull_replicator, attach_replicator]

        return (
            protocols,
            protocol_replicators,
            ProtocolPath("result", attach_free_energy.id),
            ProtocolPath("result", pull_free_energy.id),
            ProtocolPath("result", reference_state_work.id),
        )