示例#1
0
def test_index_replicated_protocol():

    replicator = ProtocolReplicator("replicator")
    replicator.template_values = ["a", "b", "c", "d"]

    replicated_protocol = DummyInputOutputProtocol(
        f"protocol_{replicator.placeholder_id}")
    replicated_protocol.input_value = ReplicatorValue(replicator.id)

    schema = WorkflowSchema()
    schema.protocol_replicators = [replicator]
    schema.protocol_schemas = [replicated_protocol.schema]

    for index in range(len(replicator.template_values)):

        indexing_protocol = DummyInputOutputProtocol(
            f"indexing_protocol_{index}")
        indexing_protocol.input_value = ProtocolPath("output_value",
                                                     f"protocol_{index}")
        schema.protocol_schemas.append(indexing_protocol.schema)

    schema.validate()

    workflow = Workflow({})
    workflow.schema = schema
示例#2
0
def test_nested_replicators():

    dummy_schema = WorkflowSchema()

    dummy_protocol = DummyReplicableProtocol("dummy_$(rep_a)_$(rep_b)")

    dummy_protocol.replicated_value_a = ReplicatorValue("rep_a")
    dummy_protocol.replicated_value_b = ReplicatorValue("rep_b")

    dummy_schema.protocol_schemas = [dummy_protocol.schema]

    replicator_a = ProtocolReplicator(replicator_id="rep_a")
    replicator_a.template_values = ["a", "b"]

    replicator_b = ProtocolReplicator(replicator_id="rep_b")
    replicator_b.template_values = [1, 2]

    dummy_schema.protocol_replicators = [replicator_a, replicator_b]

    dummy_schema.validate()

    dummy_property = create_dummy_property(Density)

    dummy_metadata = Workflow.generate_default_metadata(
        dummy_property, "smirnoff99Frosst-1.1.0.offxml", [])

    dummy_workflow = Workflow(dummy_metadata, "")
    dummy_workflow.schema = dummy_schema

    assert len(dummy_workflow.protocols) == 4

    assert dummy_workflow.protocols["dummy_0_0"].replicated_value_a == "a"
    assert dummy_workflow.protocols["dummy_0_1"].replicated_value_a == "a"

    assert dummy_workflow.protocols["dummy_1_0"].replicated_value_a == "b"
    assert dummy_workflow.protocols["dummy_1_1"].replicated_value_a == "b"

    assert dummy_workflow.protocols["dummy_0_0"].replicated_value_b == 1
    assert dummy_workflow.protocols["dummy_0_1"].replicated_value_b == 2

    assert dummy_workflow.protocols["dummy_1_0"].replicated_value_b == 1
    assert dummy_workflow.protocols["dummy_1_1"].replicated_value_b == 2

    print(dummy_workflow.schema)
示例#3
0
def test_advanced_nested_replicators():

    dummy_schema = WorkflowSchema()

    replicator_a = ProtocolReplicator(replicator_id="replicator_a")
    replicator_a.template_values = ["a", "b"]

    replicator_b = ProtocolReplicator(
        replicator_id=f"replicator_b_{replicator_a.placeholder_id}")
    replicator_b.template_values = ProtocolPath(
        f"dummy_list[{replicator_a.placeholder_id}]", "global")

    dummy_protocol = DummyReplicableProtocol(f"dummy_"
                                             f"{replicator_a.placeholder_id}_"
                                             f"{replicator_b.placeholder_id}")

    dummy_protocol.replicated_value_a = ReplicatorValue(replicator_a.id)
    dummy_protocol.replicated_value_b = ReplicatorValue(replicator_b.id)

    dummy_schema.protocol_schemas = [dummy_protocol.schema]
    dummy_schema.protocol_replicators = [replicator_a, replicator_b]

    dummy_schema.validate()

    dummy_property = create_dummy_property(Density)
    dummy_metadata = Workflow.generate_default_metadata(
        dummy_property, "smirnoff99Frosst-1.1.0.offxml", [])
    dummy_metadata["dummy_list"] = [[1], [2]]

    dummy_workflow = Workflow(dummy_metadata, "")
    dummy_workflow.schema = dummy_schema

    assert len(dummy_workflow.protocols) == 2

    assert dummy_workflow.protocols["dummy_0_0"].replicated_value_a == "a"
    assert dummy_workflow.protocols["dummy_0_0"].replicated_value_b == 1

    assert dummy_workflow.protocols["dummy_1_0"].replicated_value_a == "b"
    assert dummy_workflow.protocols["dummy_1_0"].replicated_value_b == 2
示例#4
0
    def default_paprika_schema(
        cls,
        existing_schema: SimulationSchema = None,
        n_solvent_molecules: int = 2500,
        n_thermalization_steps: int = 50000,
        n_equilibration_steps: int = 200000,
        n_production_steps: int = 2500000,
        dt_thermalization: unit.Quantity = 1.0 * unit.femtosecond,
        dt_equilibration: unit.Quantity = 2.0 * unit.femtosecond,
        dt_production: unit.Quantity = 2.0 * unit.femtosecond,
        debug: bool = False,
    ):
        """Returns the default calculation schema to use when estimating
        a host-guest binding affinity measurement with an APR calculation
        using the ``paprika`` package.

        Notes
        -----
        * This schema requires additional metadata to be able to estimate
          each metadata. This metadata is automatically generated for properties
          loaded from the ``taproom`` package using the ``TaproomDataSet`` object.

        Parameters
        ----------
        existing_schema: SimulationSchema, optional
            An existing schema whose settings to use. If set,
            the schema's `workflow_schema` will be overwritten
            by this method.
        n_solvent_molecules
            The number of solvent molecules to add to the box.
        n_thermalization_steps
            The number of thermalization simulations steps to perform.
            Sample generated during this step will be discarded.
        n_equilibration_steps
            The number of equilibration simulations steps to perform.
            Sample generated during this step will be discarded.
        n_production_steps
            The number of production simulations steps to perform.
            Sample generated during this step will be used in the final
            free energy calculation.
        dt_thermalization
            The integration timestep during thermalization
        dt_equilibration
            The integration timestep during equilibration
        dt_production
            The integration timestep during production
        debug
            Whether to return a debug schema. This is nearly identical
            to the default schema, albeit with significantly less
            solvent molecules (10), all simulations run in NVT and much
            shorter simulation runs (500 steps). If True, the other input
            arguments will be ignored.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """

        calculation_schema = SimulationSchema()

        if existing_schema is not None:
            assert isinstance(existing_schema, SimulationSchema)
            calculation_schema = copy.deepcopy(existing_schema)

        # Initialize the protocols which will serve as templates for those
        # used in the actual workflows.
        solvation_template = cls._paprika_default_solvation_protocol(
            n_solvent_molecules=n_solvent_molecules)

        (
            minimization_template,
            *simulation_templates,
        ) = cls._paprika_default_simulation_protocols(
            n_thermalization_steps=n_thermalization_steps,
            n_equilibration_steps=n_equilibration_steps,
            n_production_steps=n_production_steps,
            dt_thermalization=dt_thermalization,
            dt_equilibration=dt_equilibration,
            dt_production=dt_production,
        )

        if debug:

            solvation_template.max_molecules = 10
            solvation_template.mass_density = 0.01 * unit.grams / unit.milliliters

            for simulation_template in simulation_templates:

                simulation_template.ensemble = Ensemble.NVT
                simulation_template.steps_per_iteration = 500
                simulation_template.output_frequency = 50

        # Set up a replicator which will perform the attach-pull calculation for
        # each of the guest orientations
        orientation_replicator = ProtocolReplicator("orientation_replicator")
        orientation_replicator.template_values = ProtocolPath(
            "guest_orientations", "global")

        restraint_schemas = {
            "static":
            ProtocolPath(
                f"guest_orientations[{orientation_replicator.placeholder_id}]."
                f"static_restraints",
                "global",
            ),
            "conformational":
            ProtocolPath(
                f"guest_orientations[{orientation_replicator.placeholder_id}]."
                f"conformational_restraints",
                "global",
            ),
            "guest":
            ProtocolPath("guest_restraints", "global"),
            "wall":
            ProtocolPath("wall_restraints", "global"),
            "symmetry":
            ProtocolPath("symmetry_restraints", "global"),
        }

        # Build the protocols to compute the attach and pull free energies.
        (
            attach_pull_protocols,
            attach_pull_replicators,
            attach_free_energy,
            pull_free_energy,
            reference_work,
        ) = cls._paprika_build_attach_pull_protocols(
            orientation_replicator,
            restraint_schemas,
            solvation_template,
            minimization_template,
            *simulation_templates,
        )

        # Build the protocols to compute the release free energies.
        (
            release_protocols,
            release_replicator,
            release_free_energy,
        ) = cls._paprika_build_release_protocols(
            orientation_replicator,
            restraint_schemas,
            solvation_template,
            minimization_template,
            *simulation_templates,
        )

        # Compute the symmetry correction.
        symmetry_correction = ComputeSymmetryCorrection("symmetry_correction")
        symmetry_correction.n_microstates = ProtocolPath(
            "n_guest_microstates", "global")
        symmetry_correction.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        # Sum together the free energies of the individual orientations
        orientation_free_energy = miscellaneous.AddValues(
            f"orientation_free_energy_{orientation_replicator.placeholder_id}")
        orientation_free_energy.values = [
            attach_free_energy,
            pull_free_energy,
            reference_work,
            release_free_energy,
            ProtocolPath("result", symmetry_correction.id),
        ]

        # Finally, combine all of the values together
        total_free_energy = analysis.AverageFreeEnergies("total_free_energy")
        total_free_energy.values = ProtocolPath("result",
                                                orientation_free_energy.id)
        total_free_energy.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        calculation_schema.workflow_schema = WorkflowSchema()

        calculation_schema.workflow_schema.protocol_schemas = [
            *(protocol.schema for protocol in attach_pull_protocols),
            *(protocol.schema for protocol in release_protocols),
            symmetry_correction.schema,
            orientation_free_energy.schema,
            total_free_energy.schema,
        ]
        calculation_schema.workflow_schema.protocol_replicators = [
            orientation_replicator,
            *attach_pull_replicators,
            release_replicator,
        ]

        # Define where the final value comes from.
        calculation_schema.workflow_schema.final_value_source = ProtocolPath(
            "result", total_free_energy.id)

        return calculation_schema
示例#5
0
    def _paprika_build_release_protocols(
        cls,
        orientation_replicator: ProtocolReplicator,
        restraint_schemas: Dict[str, ProtocolPath],
        solvation_template: coordinates.SolvateExistingStructure,
        minimization_template: openmm.OpenMMEnergyMinimisation,
        thermalization_template: openmm.OpenMMSimulation,
        equilibration_template: openmm.OpenMMSimulation,
        production_template: openmm.OpenMMSimulation,
    ):

        # Define a replicator to set up each release window
        release_replicator = ProtocolReplicator("release_replicator")
        release_replicator.template_values = ProtocolPath(
            "release_windows_indices", "global")

        orientation_placeholder = orientation_replicator.placeholder_id

        release_replicator_id = (f"{release_replicator.placeholder_id}_"
                                 f"{orientation_placeholder}")

        # Filter out only the solvent substance to help with the solvation step.
        filter_solvent = miscellaneous.FilterSubstanceByRole(
            "host-filter_solvent")
        filter_solvent.input_substance = ProtocolPath("host_substance",
                                                      "global")
        filter_solvent.component_roles = [Component.Role.Solvent]

        # Construct a set of coordinates for a host molecule correctly
        # aligned to the z-axis.
        align_coordinates = PrepareReleaseCoordinates(
            "release_align_coordinates")
        align_coordinates.substance = ProtocolPath("host_substance", "global")
        align_coordinates.complex_file_path = ProtocolPath(
            "host_coordinate_path", "global")

        solvate_coordinates = copy.deepcopy(solvation_template)
        solvate_coordinates.id = "release_solvate_coordinates"
        solvate_coordinates.substance = ProtocolPath("filtered_substance",
                                                     filter_solvent.id)
        solvate_coordinates.solute_coordinate_file = ProtocolPath(
            "output_coordinate_path", align_coordinates.id)

        # Apply the force field parameters. This only needs to be done for one
        # of the windows.
        apply_parameters = forcefield.BaseBuildSystem(
            "release_apply_parameters")
        apply_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        apply_parameters.substance = ProtocolPath("host_substance", "global")
        apply_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_coordinates.id)

        # Add the dummy atoms.
        add_dummy_atoms = AddDummyAtoms("release_add_dummy_atoms")
        add_dummy_atoms.substance = ProtocolPath("host_substance", "global")
        add_dummy_atoms.input_coordinate_path = ProtocolPath(
            "coordinate_file_path",
            solvate_coordinates.id,
        )
        add_dummy_atoms.input_system = ProtocolPath("parameterized_system",
                                                    apply_parameters.id)
        add_dummy_atoms.offset = ProtocolPath("dummy_atom_offset", "global")

        # Apply the restraints files
        generate_restraints = GenerateReleaseRestraints(
            f"release_generate_restraints_{orientation_placeholder}")
        generate_restraints.host_coordinate_path = ProtocolPath(
            "output_coordinate_path", add_dummy_atoms.id)
        generate_restraints.release_lambdas = ProtocolPath(
            "release_lambdas", "global")
        generate_restraints.restraint_schemas = restraint_schemas

        apply_restraints = ApplyRestraints(
            f"release_apply_restraints_{release_replicator_id}")
        apply_restraints.restraints_path = ProtocolPath(
            "restraints_path", generate_restraints.id)
        apply_restraints.phase = "release"
        apply_restraints.window_index = ReplicatorValue(release_replicator.id)
        apply_restraints.input_system = ProtocolPath("output_system",
                                                     add_dummy_atoms.id)

        # Setup the simulations for the release phase.
        (
            release_minimization,
            release_thermalization,
            release_equilibration,
            release_production,
        ) = cls._paprika_build_simulation_protocols(
            ProtocolPath("output_coordinate_path", add_dummy_atoms.id),
            ProtocolPath("output_system", apply_restraints.id),
            "release",
            release_replicator_id,
            minimization_template,
            thermalization_template,
            equilibration_template,
            production_template,
        )

        # Analyze the release phase.
        analyze_release_phase = AnalyzeAPRPhase(
            f"analyze_release_phase_{orientation_placeholder}")
        analyze_release_phase.topology_path = ProtocolPath(
            "output_coordinate_path", add_dummy_atoms.id)
        analyze_release_phase.trajectory_paths = ProtocolPath(
            "trajectory_file_path", release_production.id)
        analyze_release_phase.phase = "release"
        analyze_release_phase.restraints_path = ProtocolPath(
            "restraints_path", generate_restraints.id)

        # Return the full list of protocols which make up the release parts
        # of a host-guest APR calculation.
        protocols = [
            filter_solvent,
            align_coordinates,
            solvate_coordinates,
            apply_parameters,
            add_dummy_atoms,
            generate_restraints,
            apply_restraints,
            release_minimization,
            release_thermalization,
            release_equilibration,
            release_production,
            analyze_release_phase,
        ]

        return (
            protocols,
            release_replicator,
            ProtocolPath("result", analyze_release_phase.id),
        )
示例#6
0
    def _paprika_build_attach_pull_protocols(
        cls,
        orientation_replicator: ProtocolReplicator,
        restraint_schemas: Dict[str, ProtocolPath],
        solvation_template: coordinates.SolvateExistingStructure,
        minimization_template: openmm.OpenMMEnergyMinimisation,
        thermalization_template: openmm.OpenMMSimulation,
        equilibration_template: openmm.OpenMMSimulation,
        production_template: openmm.OpenMMSimulation,
    ):

        # Define a replicator to set and solvate up the coordinates for each pull window
        orientation_placeholder = orientation_replicator.placeholder_id

        pull_replicator = ProtocolReplicator(
            f"pull_replicator_{orientation_placeholder}")
        pull_replicator.template_values = ProtocolPath("pull_windows_indices",
                                                       "global")
        pull_replicator_id = (f"{pull_replicator.placeholder_id}_"
                              f"{orientation_placeholder}")

        attach_replicator = ProtocolReplicator(
            f"attach_replicator_{orientation_placeholder}")
        attach_replicator.template_values = ProtocolPath(
            "attach_windows_indices", "global")
        attach_replicator_id = (f"{attach_replicator.placeholder_id}_"
                                f"{orientation_placeholder}")

        # Filter out only the solvent substance to help with the solvation step.
        filter_solvent = miscellaneous.FilterSubstanceByRole(
            "host-guest-filter_solvent")
        filter_solvent.input_substance = ProtocolPath("substance", "global")
        filter_solvent.component_roles = [Component.Role.Solvent]

        # Define the protocols which will set and solvate up the coordinates for each
        # pull window
        align_coordinates = PreparePullCoordinates(
            f"pull_align_coordinates_{pull_replicator_id}")
        align_coordinates.substance = ProtocolPath("substance", "global")
        align_coordinates.complex_file_path = ProtocolPath(
            f"guest_orientations[{orientation_placeholder}].coordinate_path",
            "global")
        align_coordinates.guest_orientation_mask = ProtocolPath(
            "guest_orientation_mask", "global")
        align_coordinates.pull_window_index = ReplicatorValue(
            pull_replicator.id)
        align_coordinates.pull_distance = ProtocolPath("pull_distance",
                                                       "global")
        align_coordinates.n_pull_windows = ProtocolPath(
            "n_pull_windows", "global")

        solvate_coordinates = copy.deepcopy(solvation_template)
        solvate_coordinates.id = f"pull_solvate_coordinates_{pull_replicator_id}"
        solvate_coordinates.substance = ProtocolPath("filtered_substance",
                                                     filter_solvent.id)
        solvate_coordinates.solute_coordinate_file = ProtocolPath(
            "output_coordinate_path", align_coordinates.id)

        # Apply the force field parameters. This only needs to be done once.
        apply_parameters = forcefield.BuildSmirnoffSystem(
            f"pull_apply_parameters_{orientation_placeholder}")
        apply_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        apply_parameters.substance = ProtocolPath("substance", "global")
        apply_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path",
            f"pull_solvate_coordinates_0_{orientation_placeholder}",
        )

        # Add the dummy atoms.
        add_dummy_atoms = AddDummyAtoms(
            f"pull_add_dummy_atoms_{pull_replicator_id}")
        add_dummy_atoms.substance = ProtocolPath("substance", "global")
        add_dummy_atoms.input_coordinate_path = ProtocolPath(
            "coordinate_file_path", solvate_coordinates.id)
        add_dummy_atoms.input_system = ProtocolPath("parameterized_system",
                                                    apply_parameters.id)
        add_dummy_atoms.offset = ProtocolPath("dummy_atom_offset", "global")

        attach_coordinate_path = ProtocolPath(
            "output_coordinate_path",
            f"pull_add_dummy_atoms_0_{orientation_placeholder}",
        )
        attach_system = ProtocolPath(
            "output_system",
            f"pull_add_dummy_atoms_0_{orientation_placeholder}")

        # Apply the attach restraints
        generate_attach_restraints = GenerateAttachRestraints(
            f"attach_generate_restraints_{orientation_placeholder}")
        generate_attach_restraints.complex_coordinate_path = attach_coordinate_path
        generate_attach_restraints.attach_lambdas = ProtocolPath(
            "attach_lambdas", "global")
        generate_attach_restraints.restraint_schemas = restraint_schemas

        apply_attach_restraints = ApplyRestraints(
            f"attach_apply_restraints_{attach_replicator_id}")
        apply_attach_restraints.restraints_path = ProtocolPath(
            "restraints_path", generate_attach_restraints.id)
        apply_attach_restraints.phase = "attach"
        apply_attach_restraints.window_index = ReplicatorValue(
            attach_replicator.id)
        apply_attach_restraints.input_system = attach_system

        # Apply the pull restraints
        generate_pull_restraints = GeneratePullRestraints(
            f"pull_generate_restraints_{orientation_placeholder}")
        generate_pull_restraints.complex_coordinate_path = attach_coordinate_path
        generate_pull_restraints.attach_lambdas = ProtocolPath(
            "attach_lambdas", "global")
        generate_pull_restraints.n_pull_windows = ProtocolPath(
            "n_pull_windows", "global")
        generate_pull_restraints.restraint_schemas = restraint_schemas

        apply_pull_restraints = ApplyRestraints(
            f"pull_apply_restraints_{pull_replicator_id}")
        apply_pull_restraints.restraints_path = ProtocolPath(
            "restraints_path", generate_pull_restraints.id)
        apply_pull_restraints.phase = "pull"
        apply_pull_restraints.window_index = ReplicatorValue(
            pull_replicator.id)
        apply_pull_restraints.input_system = ProtocolPath(
            "output_system", add_dummy_atoms.id)

        # Setup the simulations for the attach and pull phases.
        (
            attach_minimization,
            attach_thermalization,
            attach_equilibration,
            attach_production,
        ) = cls._paprika_build_simulation_protocols(
            attach_coordinate_path,
            ProtocolPath("output_system", apply_attach_restraints.id),
            "attach",
            attach_replicator_id,
            minimization_template,
            thermalization_template,
            equilibration_template,
            production_template,
        )

        (
            pull_minimization,
            pull_thermalization,
            pull_equilibration,
            pull_production,
        ) = cls._paprika_build_simulation_protocols(
            ProtocolPath("output_coordinate_path", add_dummy_atoms.id),
            ProtocolPath("output_system", apply_pull_restraints.id),
            "pull",
            pull_replicator_id,
            minimization_template,
            thermalization_template,
            equilibration_template,
            production_template,
        )

        # Analyze the attach phase.
        attach_free_energy = AnalyzeAPRPhase(
            f"analyze_attach_phase_{orientation_placeholder}")
        attach_free_energy.topology_path = attach_coordinate_path
        attach_free_energy.trajectory_paths = ProtocolPath(
            "trajectory_file_path", attach_production.id)
        attach_free_energy.phase = "attach"
        attach_free_energy.restraints_path = ProtocolPath(
            "restraints_path", generate_attach_restraints.id)

        # Analyze the pull phase.
        pull_free_energy = AnalyzeAPRPhase(
            f"analyze_pull_phase_{orientation_placeholder}")
        pull_free_energy.topology_path = attach_coordinate_path
        pull_free_energy.trajectory_paths = ProtocolPath(
            "trajectory_file_path", pull_production.id)
        pull_free_energy.phase = "pull"
        pull_free_energy.restraints_path = ProtocolPath(
            "restraints_path", generate_pull_restraints.id)

        reference_state_work = ComputeReferenceWork(
            f"pull_reference_work_{orientation_placeholder}")
        reference_state_work.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")
        reference_state_work.restraints_path = ProtocolPath(
            "restraints_path", generate_pull_restraints.id)

        # Return the full list of protocols which make up the attach and pull parts
        # of a host-guest APR calculation.
        protocols = [
            filter_solvent,
            align_coordinates,
            solvate_coordinates,
            apply_parameters,
            add_dummy_atoms,
            generate_attach_restraints,
            apply_attach_restraints,
            generate_pull_restraints,
            apply_pull_restraints,
            attach_minimization,
            attach_thermalization,
            attach_equilibration,
            attach_production,
            pull_minimization,
            pull_thermalization,
            pull_equilibration,
            pull_production,
            attach_free_energy,
            pull_free_energy,
            reference_state_work,
        ]
        protocol_replicators = [pull_replicator, attach_replicator]

        return (
            protocols,
            protocol_replicators,
            ProtocolPath("result", attach_free_energy.id),
            ProtocolPath("result", pull_free_energy.id),
            ProtocolPath("result", reference_state_work.id),
        )
示例#7
0
def test_group_replicators():

    dummy_schema = WorkflowSchema()

    replicator_id = "replicator"

    dummy_replicated_protocol = DummyInputOutputProtocol(
        f"dummy_$({replicator_id})")
    dummy_replicated_protocol.input_value = ReplicatorValue(replicator_id)

    dummy_group = ProtocolGroup("dummy_group")
    dummy_group.add_protocols(dummy_replicated_protocol)

    dummy_protocol_single_value = DummyInputOutputProtocol(
        f"dummy_single_$({replicator_id})")
    dummy_protocol_single_value.input_value = ProtocolPath(
        "output_value", dummy_group.id, dummy_replicated_protocol.id)

    dummy_protocol_list_value = AddValues("dummy_list")
    dummy_protocol_list_value.values = ProtocolPath(
        "output_value", dummy_group.id, dummy_replicated_protocol.id)

    dummy_schema.protocol_schemas = [
        dummy_group.schema,
        dummy_protocol_single_value.schema,
        dummy_protocol_list_value.schema,
    ]

    replicator = ProtocolReplicator(replicator_id)

    replicator.template_values = [
        (1.0 * unit.kelvin).plus_minus(1.0 * unit.kelvin),
        (2.0 * unit.kelvin).plus_minus(2.0 * unit.kelvin),
    ]

    dummy_schema.protocol_replicators = [replicator]
    dummy_schema.validate()

    dummy_property = create_dummy_property(Density)

    dummy_metadata = Workflow.generate_default_metadata(
        dummy_property, "smirnoff99Frosst-1.1.0.offxml", [])

    dummy_workflow = Workflow(dummy_metadata, "")
    dummy_workflow.schema = dummy_schema

    assert len(dummy_workflow.protocols) == 4

    assert (dummy_workflow.protocols[dummy_group.id].protocols["dummy_0"].
            input_value.value == replicator.template_values[0].value)
    assert (dummy_workflow.protocols[dummy_group.id].protocols["dummy_1"].
            input_value.value == replicator.template_values[1].value)

    assert dummy_workflow.protocols[
        "dummy_single_0"].input_value == ProtocolPath("output_value",
                                                      dummy_group.id,
                                                      "dummy_0")
    assert dummy_workflow.protocols[
        "dummy_single_1"].input_value == ProtocolPath("output_value",
                                                      dummy_group.id,
                                                      "dummy_1")

    assert len(dummy_workflow.protocols["dummy_list"].values) == 2

    assert dummy_workflow.protocols["dummy_list"].values[0] == ProtocolPath(
        "output_value", dummy_group.id, "dummy_0")
    assert dummy_workflow.protocols["dummy_list"].values[1] == ProtocolPath(
        "output_value", dummy_group.id, "dummy_1")
示例#8
0
    def default_simulation_schema(
        cls,
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_molecules=1000,
    ) -> SimulationSchema:
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (absolute_tolerance != UNDEFINED
                                  or relative_tolerance != UNDEFINED)

        # Define the protocols to use for the fully mixed system.
        (
            mixture_protocols,
            mixture_value,
            mixture_stored_data,
        ) = generate_simulation_protocols(
            analysis.AverageObservable("extract_observable_mixture"),
            use_target_uncertainty,
            id_suffix="_mixture",
            n_molecules=n_molecules,
        )
        # Specify the average observable which should be estimated.
        mixture_protocols.analysis_protocol.observable = ProtocolPath(
            f"observables[{cls._observable_type().value}]",
            mixture_protocols.production_simulation.id,
        )
        (
            mixture_protocols.analysis_protocol.divisor,
            mixture_n_molar_molecules,
        ) = cls._n_molecules_divisor(
            ProtocolPath("output_number_of_molecules",
                         mixture_protocols.build_coordinates.id),
            "_mixture",
        )

        # Define the protocols to use for each component, creating a replicator that
        # will copy these for each component in the mixture substance.
        component_replicator = ProtocolReplicator("component_replicator")
        component_replicator.template_values = ProtocolPath(
            "components", "global")
        component_substance = ReplicatorValue(component_replicator.id)

        component_protocols, _, component_stored_data = generate_simulation_protocols(
            analysis.AverageObservable(
                f"extract_observable_component_{component_replicator.placeholder_id}"
            ),
            use_target_uncertainty,
            id_suffix=f"_component_{component_replicator.placeholder_id}",
            n_molecules=n_molecules,
        )
        # Make sure the protocols point to the correct substance.
        component_protocols.build_coordinates.substance = component_substance
        # Specify the average observable which should be estimated.
        component_protocols.analysis_protocol.observable = ProtocolPath(
            f"observables[{cls._observable_type().value}]",
            component_protocols.production_simulation.id,
        )
        (
            component_protocols.analysis_protocol.divisor,
            component_n_molar_molecules,
        ) = cls._n_molecules_divisor(
            ProtocolPath("output_number_of_molecules",
                         component_protocols.build_coordinates.id),
            f"_component_{component_replicator.placeholder_id}",
        )

        # Weight the component value by the mole fraction.
        weight_by_mole_fraction = miscellaneous.WeightByMoleFraction(
            f"weight_by_mole_fraction_{component_replicator.placeholder_id}")
        weight_by_mole_fraction.value = ProtocolPath(
            "value", component_protocols.analysis_protocol.id)
        weight_by_mole_fraction.full_substance = ProtocolPath(
            "substance", "global")
        weight_by_mole_fraction.component = component_substance

        component_protocols.converge_uncertainty.add_protocols(
            weight_by_mole_fraction)

        # Make sure the convergence criteria is set to use the per component
        # uncertainty target.
        if use_target_uncertainty:
            component_protocols.converge_uncertainty.conditions[
                0].right_hand_value = ProtocolPath("per_component_uncertainty",
                                                   "global")

        # Finally, set up the protocols which will be responsible for adding together
        # the component observables, and subtracting these from the mixture system value.
        add_component_observables = miscellaneous.AddValues(
            "add_component_observables")
        add_component_observables.values = ProtocolPath(
            "weighted_value",
            component_protocols.converge_uncertainty.id,
            weight_by_mole_fraction.id,
        )

        calculate_excess_observable = miscellaneous.SubtractValues(
            "calculate_excess_observable")
        calculate_excess_observable.value_b = mixture_value
        calculate_excess_observable.value_a = ProtocolPath(
            "result", add_component_observables.id)

        # Build the final workflow schema
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            component_protocols.build_coordinates.schema,
            component_protocols.assign_parameters.schema,
            component_protocols.energy_minimisation.schema,
            component_protocols.equilibration_simulation.schema,
            component_protocols.converge_uncertainty.schema,
            component_protocols.decorrelate_trajectory.schema,
            component_protocols.decorrelate_observables.schema,
            mixture_protocols.build_coordinates.schema,
            mixture_protocols.assign_parameters.schema,
            mixture_protocols.energy_minimisation.schema,
            mixture_protocols.equilibration_simulation.schema,
            mixture_protocols.converge_uncertainty.schema,
            mixture_protocols.decorrelate_trajectory.schema,
            mixture_protocols.decorrelate_observables.schema,
            add_component_observables.schema,
            calculate_excess_observable.schema,
        ]

        if component_n_molar_molecules is not None:
            schema.protocol_schemas.append(component_n_molar_molecules.schema)
        if mixture_n_molar_molecules is not None:
            schema.protocol_schemas.append(mixture_n_molar_molecules.schema)

        schema.protocol_replicators = [component_replicator]

        schema.final_value_source = ProtocolPath(
            "result", calculate_excess_observable.id)

        schema.outputs_to_store = {
            "full_system":
            mixture_stored_data,
            f"component_{component_replicator.placeholder_id}":
            component_stored_data,
        }

        calculation_schema.workflow_schema = schema
        return calculation_schema
示例#9
0
def generate_base_reweighting_protocols(
    statistical_inefficiency: S,
    reweight_observable: T,
    replicator_id: str = "data_replicator",
    id_suffix: str = "",
) -> Tuple[ReweightingProtocols[S, T], ProtocolReplicator]:
    """Constructs a set of protocols which, when combined in a workflow schema, may be
    executed to reweight a set of cached simulation data to estimate the average
    value of an observable.

    Parameters
    ----------
    statistical_inefficiency
        The protocol which will be used to compute the statistical inefficiency and
        equilibration time of the observable of interest. This information will be
        used to decorrelate the cached data prior to reweighting.
    reweight_observable
        The MBAR reweighting protocol to use to reweight the observable to the target
        state. This method will automatically set the reduced potentials on the
        object.
    replicator_id: str
        The id to use for the cached data replicator.
    id_suffix: str
        A string suffix to append to each of the protocol ids.

    Returns
    -------
        The protocols to add to the workflow, a reference to the average value of the
        estimated observable (an ``Observable`` object), and the replicator which will
        clone the workflow for each piece of cached simulation data.
    """

    # Create the replicator which will apply these protocol once for each piece of
    # cached simulation data.
    data_replicator = ProtocolReplicator(replicator_id=replicator_id)
    data_replicator.template_values = ProtocolPath("full_system_data", "global")

    # Validate the inputs.
    assert isinstance(statistical_inefficiency, analysis.BaseAverageObservable)

    assert data_replicator.placeholder_id in statistical_inefficiency.id
    assert data_replicator.placeholder_id not in reweight_observable.id

    replicator_suffix = f"_{data_replicator.placeholder_id}{id_suffix}"

    # Unpack all the of the stored data.
    unpack_stored_data = storage.UnpackStoredSimulationData(
        "unpack_data{}".format(replicator_suffix)
    )
    unpack_stored_data.simulation_data_path = ReplicatorValue(replicator_id)

    # Join the individual trajectories together.
    join_trajectories = reweighting.ConcatenateTrajectories(
        f"join_trajectories{id_suffix}"
    )
    join_trajectories.input_coordinate_paths = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id
    )
    join_trajectories.input_trajectory_paths = ProtocolPath(
        "trajectory_file_path", unpack_stored_data.id
    )
    join_observables = reweighting.ConcatenateObservables(
        f"join_observables{id_suffix}"
    )
    join_observables.input_observables = ProtocolPath(
        "observables", unpack_stored_data.id
    )

    # Calculate the reduced potentials for each of the reference states.
    build_reference_system = forcefield.BaseBuildSystem(
        f"build_system{replicator_suffix}"
    )
    build_reference_system.force_field_path = ProtocolPath(
        "force_field_path", unpack_stored_data.id
    )
    build_reference_system.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id
    )
    build_reference_system.substance = ProtocolPath("substance", unpack_stored_data.id)

    reduced_reference_potential = openmm.OpenMMEvaluateEnergies(
        f"reduced_potential{replicator_suffix}"
    )
    reduced_reference_potential.parameterized_system = ProtocolPath(
        "parameterized_system", build_reference_system.id
    )
    reduced_reference_potential.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", unpack_stored_data.id
    )
    reduced_reference_potential.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id
    )
    reduced_reference_potential.trajectory_file_path = ProtocolPath(
        "output_trajectory_path", join_trajectories.id
    )

    # Calculate the reduced potential of the target state.
    build_target_system = forcefield.BaseBuildSystem(f"build_system_target{id_suffix}")
    build_target_system.force_field_path = ProtocolPath("force_field_path", "global")
    build_target_system.substance = ProtocolPath("substance", "global")
    build_target_system.coordinate_file_path = ProtocolPath(
        "output_coordinate_path", join_trajectories.id
    )

    reduced_target_potential = openmm.OpenMMEvaluateEnergies(
        f"reduced_potential_target{id_suffix}"
    )
    reduced_target_potential.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    reduced_target_potential.parameterized_system = ProtocolPath(
        "parameterized_system", build_target_system.id
    )
    reduced_target_potential.coordinate_file_path = ProtocolPath(
        "output_coordinate_path", join_trajectories.id
    )
    reduced_target_potential.trajectory_file_path = ProtocolPath(
        "output_trajectory_path", join_trajectories.id
    )
    reduced_target_potential.gradient_parameters = ProtocolPath(
        "parameter_gradient_keys", "global"
    )

    # Compute the observable gradients.
    zero_gradients = gradients.ZeroGradients(f"zero_gradients{id_suffix}")
    zero_gradients.force_field_path = ProtocolPath("force_field_path", "global")
    zero_gradients.gradient_parameters = ProtocolPath(
        "parameter_gradient_keys", "global"
    )

    # Decorrelate the target potentials and observables.
    if not isinstance(statistical_inefficiency, analysis.BaseAverageObservable):
        raise NotImplementedError()

    decorrelate_target_potential = analysis.DecorrelateObservables(
        f"decorrelate_target_potential{id_suffix}"
    )
    decorrelate_target_potential.time_series_statistics = ProtocolPath(
        "time_series_statistics", statistical_inefficiency.id
    )
    decorrelate_target_potential.input_observables = ProtocolPath(
        "output_observables", reduced_target_potential.id
    )

    decorrelate_observable = analysis.DecorrelateObservables(
        f"decorrelate_observable{id_suffix}"
    )
    decorrelate_observable.time_series_statistics = ProtocolPath(
        "time_series_statistics", statistical_inefficiency.id
    )
    decorrelate_observable.input_observables = ProtocolPath(
        "output_observables", zero_gradients.id
    )

    # Decorrelate the reference potentials. Due to a quirk of how workflow replicators
    # work the time series statistics need to be passed via a dummy protocol first.
    #
    # Because the `statistical_inefficiency` and `decorrelate_reference_potential`
    # protocols are replicated by the same replicator the `time_series_statistics`
    # input of `decorrelate_reference_potential_X` will take its value from
    # the `time_series_statistics` output of `statistical_inefficiency_X` rather than
    # as a list of of [statistical_inefficiency_0.time_series_statistics...
    # statistical_inefficiency_N.time_series_statistics]. Passing the statistics via
    # an un-replicated intermediate resolves this.
    replicate_statistics = miscellaneous.DummyProtocol(
        f"replicated_statistics{id_suffix}"
    )
    replicate_statistics.input_value = ProtocolPath(
        "time_series_statistics", statistical_inefficiency.id
    )

    decorrelate_reference_potential = analysis.DecorrelateObservables(
        f"decorrelate_reference_potential{replicator_suffix}"
    )
    decorrelate_reference_potential.time_series_statistics = ProtocolPath(
        "output_value", replicate_statistics.id
    )
    decorrelate_reference_potential.input_observables = ProtocolPath(
        "output_observables", reduced_reference_potential.id
    )

    # Finally, apply MBAR to get the reweighted value.
    reweight_observable.reference_reduced_potentials = ProtocolPath(
        "output_observables[ReducedPotential]", decorrelate_reference_potential.id
    )
    reweight_observable.target_reduced_potentials = ProtocolPath(
        "output_observables[ReducedPotential]", decorrelate_target_potential.id
    )
    reweight_observable.observable = ProtocolPath(
        "output_observables", decorrelate_observable.id
    )
    reweight_observable.frame_counts = ProtocolPath(
        "time_series_statistics.n_uncorrelated_points", statistical_inefficiency.id
    )

    protocols = ReweightingProtocols(
        unpack_stored_data,
        #
        join_trajectories,
        join_observables,
        #
        build_reference_system,
        reduced_reference_potential,
        #
        build_target_system,
        reduced_target_potential,
        #
        statistical_inefficiency,
        replicate_statistics,
        #
        decorrelate_reference_potential,
        decorrelate_target_potential,
        #
        decorrelate_observable,
        zero_gradients,
        #
        reweight_observable,
    )

    return protocols, data_replicator
示例#10
0
    def _default_reweighting_schema(
        cls,
        observable_type: ObservableType,
        absolute_tolerance: unit.Quantity = UNDEFINED,
        relative_tolerance: float = UNDEFINED,
        n_effective_samples: int = 50,
    ) -> ReweightingSchema:
        """Returns the default calculation schema to use when estimating this class of
        property by re-weighting cached simulation data.

        This internal implementation allows re-weighting a different observable than
        may be specified by the `_observable_type` class property.

        Parameters
        ----------
        absolute_tolerance
            The absolute tolerance to estimate the property to within.
        relative_tolerance
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
            The default re-weighting calculation schema.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        # Set up the storage queries
        calculation_schema.storage_queries = cls._default_reweighting_storage_query(
        )

        # Define the protocols which will re-weight the observable computed for the
        # fully mixed system.
        mixture_protocols, mixture_data_replicator = generate_reweighting_protocols(
            observable_type,
            "mixture_data_replicator",
            "_mixture",
        )
        mixture_protocols.reweight_observable.required_effective_samples = (
            n_effective_samples)

        divide_by_mixture_molecules = miscellaneous.DivideValue(
            "divide_by_mixture_molecules")
        divide_by_mixture_molecules.value = ProtocolPath(
            "value", mixture_protocols.reweight_observable.id)
        (
            divide_by_mixture_molecules.divisor,
            mixture_n_molar_molecules,
        ) = cls._n_molecules_divisor(
            ProtocolPath(
                "total_number_of_molecules",
                mixture_protocols.unpack_stored_data.id.replace(
                    mixture_data_replicator.placeholder_id, "0"),
            ),
            "_mixture",
        )

        # Define the protocols to use for each component, creating a replicator that
        # will copy these for each component in the full substance.
        component_replicator = ProtocolReplicator("component_replicator")
        component_replicator.template_values = ProtocolPath(
            "components", "global")

        component_protocols, component_data_replicator = generate_reweighting_protocols(
            observable_type,
            f"component_{component_replicator.placeholder_id}_data_replicator",
            f"_component_{component_replicator.placeholder_id}",
        )
        component_protocols.reweight_observable.required_effective_samples = (
            n_effective_samples)
        component_data_replicator.template_values = ProtocolPath(
            f"component_data[$({component_replicator.id})]", "global")

        divide_by_component_molecules = miscellaneous.DivideValue(
            f"divide_by_component_{component_replicator.placeholder_id}_molecules"
        )
        divide_by_component_molecules.value = ProtocolPath(
            "value", component_protocols.reweight_observable.id)
        (
            divide_by_component_molecules.divisor,
            component_n_molar_molecules,
        ) = cls._n_molecules_divisor(
            ProtocolPath(
                "total_number_of_molecules",
                component_protocols.unpack_stored_data.id.replace(
                    component_data_replicator.placeholder_id, "0"),
            ),
            f"_component_{component_replicator.placeholder_id}",
        )

        # Make sure the protocols point to the correct substance.
        component_substance = ReplicatorValue(component_replicator.id)

        component_protocols.build_reference_system.substance = component_substance
        component_protocols.build_target_system.substance = component_substance

        # Weight the component value by the mole fraction.
        weight_by_mole_fraction = miscellaneous.WeightByMoleFraction(
            f"weight_by_mole_fraction_{component_replicator.placeholder_id}")
        weight_by_mole_fraction.value = ProtocolPath(
            "result", divide_by_component_molecules.id)
        weight_by_mole_fraction.full_substance = ProtocolPath(
            "substance", "global")
        weight_by_mole_fraction.component = component_substance

        # Finally, set up the protocols which will be responsible for adding together
        # the component observables, and subtracting these from the full system value.
        add_component_observables = miscellaneous.AddValues(
            "add_component_observables")
        add_component_observables.values = ProtocolPath(
            "weighted_value",
            weight_by_mole_fraction.id,
        )

        calculate_excess_observable = miscellaneous.SubtractValues(
            "calculate_excess_observable")
        calculate_excess_observable.value_b = ProtocolPath(
            "result", divide_by_mixture_molecules.id)
        calculate_excess_observable.value_a = ProtocolPath(
            "result", add_component_observables.id)

        # Build the final workflow schema
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            *[x.schema for x in mixture_protocols if x is not None],
            divide_by_mixture_molecules.schema,
            *[x.schema for x in component_protocols if x is not None],
            divide_by_component_molecules.schema,
            weight_by_mole_fraction.schema,
            add_component_observables.schema,
            calculate_excess_observable.schema,
        ]

        if component_n_molar_molecules is not None:
            schema.protocol_schemas.append(component_n_molar_molecules.schema)
        if mixture_n_molar_molecules is not None:
            schema.protocol_schemas.append(mixture_n_molar_molecules.schema)

        schema.protocol_replicators = [
            mixture_data_replicator,
            component_replicator,
            component_data_replicator,
        ]

        schema.final_value_source = ProtocolPath(
            "result", calculate_excess_observable.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
示例#11
0
    def _apply_replicator_to_replicators(replicator, schema, template_values):
        """Applies a replicator to any replicators which depend upon
        it (e.g. replicators with ids similar to `other_id_$(replicator.id)`).

        Parameters
        ----------
        replicator: ProtocolReplicator
            The replicator being applied.
        schema: WorkflowSchema
            The workflow schema to which the replicator belongs.
        template_values: List of Any
            The values which the replicator is applying.
        """

        # Look over all of the replicators left to apply and update them
        # to point to the newly replicated protocols where appropriate.
        new_indices = [str(index) for index in range(len(template_values))]

        replicators = []

        for original_replicator in schema.protocol_replicators:

            # Check whether this replicator will be replicated.
            if replicator.placeholder_id not in original_replicator.id:

                replicators.append(original_replicator)
                continue

            # Create the replicated replicators
            for template_index in new_indices:

                replicator_id = original_replicator.id.replace(
                    replicator.placeholder_id, template_index)

                new_replicator = ProtocolReplicator(replicator_id)
                new_replicator.template_values = original_replicator.template_values

                # Make sure to replace any reference to the applied replicator
                # with the actual index.
                if isinstance(new_replicator.template_values, ProtocolPath):

                    updated_path = new_replicator.template_values.full_path.replace(
                        replicator.placeholder_id, template_index)

                    new_replicator.template_values = ProtocolPath.from_string(
                        updated_path)

                elif isinstance(new_replicator.template_values, list):

                    updated_values = []

                    for template_value in new_replicator.template_values:

                        if not isinstance(template_value, ProtocolPath):

                            updated_values.append(template_value)
                            continue

                        updated_path = template_value.full_path.replace(
                            replicator.placeholder_id, template_index)
                        updated_values.append(
                            ProtocolPath.from_string(updated_path))

                    new_replicator.template_values = updated_values

                replicators.append(new_replicator)

        schema.protocol_replicators = replicators
示例#12
0
def generate_base_reweighting_protocols(
    analysis_protocol,
    mbar_protocol,
    replicator_id="data_repl",
    id_suffix="",
):
    """Constructs a set of protocols which, when combined in a workflow schema,
    may be executed to reweight a set of existing data to estimate a particular
    property. The reweighted observable of interest will be calculated by
    following the passed in `analysis_protocol`.

    Parameters
    ----------
    analysis_protocol: AveragePropertyProtocol
        The protocol which will take input from the stored data,
        and generate a set of observables to reweight.
    mbar_protocol: BaseReweightingProtocol
        A template mbar reweighting protocol, which has it's reference
        observables already set. This method will automatically set the
        reduced potentials on this object.
    replicator_id: str
        The id to use for the data replicator.
    id_suffix: str
        A string suffix to append to each of the protocol ids.

    Returns
    -------
    BaseReweightingProtocols:
        A named tuple of the protocol which should form the bulk of
        a property estimation workflow.
    ProtocolReplicator:
        A replicator which will clone the workflow for each piece of
        stored data.
    """

    assert isinstance(analysis_protocol, analysis.AveragePropertyProtocol)

    assert f"$({replicator_id})" in analysis_protocol.id
    assert f"$({replicator_id})" not in mbar_protocol.id

    replicator_suffix = "_$({}){}".format(replicator_id, id_suffix)

    # Unpack all the of the stored data.
    unpack_stored_data = storage.UnpackStoredSimulationData(
        "unpack_data{}".format(replicator_suffix))
    unpack_stored_data.simulation_data_path = ReplicatorValue(replicator_id)

    # The autocorrelation time of each of the stored files will be calculated for this property
    # using the passed in analysis protocol.
    if isinstance(analysis_protocol, analysis.ExtractAverageStatistic):

        analysis_protocol.statistics_path = ProtocolPath(
            "statistics_file_path", unpack_stored_data.id)

    elif isinstance(analysis_protocol, analysis.AverageTrajectoryProperty):

        analysis_protocol.input_coordinate_file = ProtocolPath(
            "coordinate_file_path", unpack_stored_data.id)
        analysis_protocol.trajectory_path = ProtocolPath(
            "trajectory_file_path", unpack_stored_data.id)

    # Decorrelate the frames of the stored trajectory and statistics arrays.
    decorrelate_statistics = analysis.ExtractUncorrelatedStatisticsData(
        "decorrelate_stats{}".format(replicator_suffix))
    decorrelate_statistics.statistical_inefficiency = ProtocolPath(
        "statistical_inefficiency", analysis_protocol.id)
    decorrelate_statistics.equilibration_index = ProtocolPath(
        "equilibration_index", analysis_protocol.id)
    decorrelate_statistics.input_statistics_path = ProtocolPath(
        "statistics_file_path", unpack_stored_data.id)

    decorrelate_trajectory = analysis.ExtractUncorrelatedTrajectoryData(
        "decorrelate_traj{}".format(replicator_suffix))
    decorrelate_trajectory.statistical_inefficiency = ProtocolPath(
        "statistical_inefficiency", analysis_protocol.id)
    decorrelate_trajectory.equilibration_index = ProtocolPath(
        "equilibration_index", analysis_protocol.id)
    decorrelate_trajectory.input_coordinate_file = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id)
    decorrelate_trajectory.input_trajectory_path = ProtocolPath(
        "trajectory_file_path", unpack_stored_data.id)

    # Stitch together all of the trajectories
    join_trajectories = reweighting.ConcatenateTrajectories("concat_traj" +
                                                            id_suffix)
    join_trajectories.input_coordinate_paths = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id)
    join_trajectories.input_trajectory_paths = ProtocolPath(
        "output_trajectory_path", decorrelate_trajectory.id)

    join_statistics = reweighting.ConcatenateStatistics("concat_stats" +
                                                        id_suffix)
    join_statistics.input_statistics_paths = ProtocolPath(
        "output_statistics_path", decorrelate_statistics.id)

    # Calculate the reduced potentials for each of the reference states.
    build_reference_system = forcefield.BaseBuildSystem(
        "build_system{}".format(replicator_suffix))
    build_reference_system.force_field_path = ProtocolPath(
        "force_field_path", unpack_stored_data.id)
    build_reference_system.substance = ProtocolPath("substance",
                                                    unpack_stored_data.id)
    build_reference_system.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id)

    reduced_reference_potential = openmm.OpenMMReducedPotentials(
        "reduced_potential{}".format(replicator_suffix))
    reduced_reference_potential.system_path = ProtocolPath(
        "system_path", build_reference_system.id)
    reduced_reference_potential.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", unpack_stored_data.id)
    reduced_reference_potential.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id)
    reduced_reference_potential.trajectory_file_path = ProtocolPath(
        "output_trajectory_path", join_trajectories.id)
    reduced_reference_potential.kinetic_energies_path = ProtocolPath(
        "output_statistics_path", join_statistics.id)

    # Calculate the reduced potential of the target state.
    build_target_system = forcefield.BaseBuildSystem("build_system_target" +
                                                     id_suffix)
    build_target_system.force_field_path = ProtocolPath(
        "force_field_path", "global")
    build_target_system.substance = ProtocolPath("substance", "global")
    build_target_system.coordinate_file_path = ProtocolPath(
        "output_coordinate_path", join_trajectories.id)

    reduced_target_potential = openmm.OpenMMReducedPotentials(
        "reduced_potential_target" + id_suffix)
    reduced_target_potential.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global")
    reduced_target_potential.system_path = ProtocolPath(
        "system_path", build_target_system.id)
    reduced_target_potential.coordinate_file_path = ProtocolPath(
        "output_coordinate_path", join_trajectories.id)
    reduced_target_potential.trajectory_file_path = ProtocolPath(
        "output_trajectory_path", join_trajectories.id)
    reduced_target_potential.kinetic_energies_path = ProtocolPath(
        "output_statistics_path", join_statistics.id)

    # Finally, apply MBAR to get the reweighted value.
    mbar_protocol.reference_reduced_potentials = ProtocolPath(
        "statistics_file_path", reduced_reference_potential.id)
    mbar_protocol.target_reduced_potentials = ProtocolPath(
        "statistics_file_path", reduced_target_potential.id)

    if (isinstance(mbar_protocol, reweighting.ReweightStatistics)
            and mbar_protocol.statistics_type != ObservableType.PotentialEnergy
            and mbar_protocol.statistics_type != ObservableType.TotalEnergy
            and mbar_protocol.statistics_type != ObservableType.Enthalpy and
            mbar_protocol.statistics_type != ObservableType.ReducedPotential):

        mbar_protocol.statistics_paths = ProtocolPath(
            "output_statistics_path", decorrelate_statistics.id)

    elif isinstance(mbar_protocol, reweighting.ReweightStatistics):

        mbar_protocol.statistics_paths = [
            ProtocolPath("statistics_file_path", reduced_target_potential.id)
        ]
        mbar_protocol.frame_counts = ProtocolPath(
            "number_of_uncorrelated_samples", decorrelate_statistics.id)

    base_protocols = BaseReweightingProtocols(
        unpack_stored_data,
        analysis_protocol,
        decorrelate_statistics,
        decorrelate_trajectory,
        join_trajectories,
        join_statistics,
        build_reference_system,
        reduced_reference_potential,
        build_target_system,
        reduced_target_potential,
        mbar_protocol,
    )

    # Create the replicator object.
    component_replicator = ProtocolReplicator(replicator_id=replicator_id)
    component_replicator.template_values = ProtocolPath(
        "full_system_data", "global")

    return base_protocols, component_replicator
示例#13
0
def generate_gradient_protocol_group(
    template_reweighting_protocol,
    force_field_path,
    coordinate_file_path,
    trajectory_file_path,
    statistics_file_path,
    replicator_id="repl",
    substance_source=None,
    id_suffix="",
    enable_pbc=True,
    effective_sample_indices=None,
):
    """Constructs a set of protocols which, when combined in a workflow schema,
    may be executed to reweight a set of existing data to estimate a particular
    property. The reweighted observable of interest will be calculated by
    following the passed in `analysis_protocol`.

    Parameters
    ----------
    template_reweighting_protocol: BaseMBARProtocol
        A template protocol which will be used to reweight the observable of
        interest to small perturbations to the parameter of interest. These
        will then be used to calculate the finite difference gradient.

        The template *must* have it's `reference_reduced_potentials` input set.
        The `target_reduced_potentials` input will be set automatically by this
        function.

        In the case that the template is of type `ReweightStatistics` and the
        observable is an energy, the statistics path will automatically be pointed
        to the energies evaluated using the perturbed parameter as opposed to the
        energy measured during the reference simulation.
    force_field_path: ProtocolPath
        The path to the force field parameters which the observables are being
         estimated at.
    coordinate_file_path: ProtocolPath
        A path to the initial coordinates of the simulation trajectory which
        was used to estimate the observable of interest.
    trajectory_file_path: ProtocolPath
        A path to the simulation trajectory which was used
        to estimate the observable of interest.
    statistics_file_path: ProtocolPath, optional
        A path to the statistics which were generated alongside
        the trajectory passed to the `trajectory_file_path`. These
        should have been generated using the passed `force_field_path`.
    replicator_id: str
        A unique id which will be used for the protocol replicator which will
        replicate this group for every parameter of interest.
    substance_source: PlaceholderValue, optional
        An optional protocol path to the substance whose gradient
        is being estimated. If None, the global property substance
        is used.
    id_suffix: str
        An optional string to append to the end of each of the
        protocol ids.
    enable_pbc: bool
        If true, periodic boundary conditions are employed when recalculating
        the reduced potentials.
    effective_sample_indices: ProtocolPath, optional
        A placeholder variable which in future will ensure that only samples
        with a non-zero weight are included in the gradient calculation.

    Returns
    -------
    ProtocolGroup
        The protocol group which will estimate the gradient of
        an observable with respect to one parameter.
    ProtocolReplicator
        The replicator which will copy the gradient group for
        every parameter of interest.
    ProtocolPath
        A protocol path which points to the final gradient value.
    """

    assert isinstance(template_reweighting_protocol,
                      reweighting.BaseMBARProtocol)
    assert template_reweighting_protocol.reference_reduced_potentials is not None
    assert template_reweighting_protocol.reference_reduced_potentials != UNDEFINED

    id_suffix = f"_$({replicator_id}){id_suffix}"

    # Set values of the optional parameters.
    substance_source = (ProtocolPath("substance", "global")
                        if substance_source is None else substance_source)
    effective_sample_indices = (effective_sample_indices if
                                effective_sample_indices is not None else [])

    # Define the protocol which will evaluate the reduced potentials of the
    # reference, forward and reverse states using only a subset of the full
    # force field.
    reduced_potentials = openmm.OpenMMGradientPotentials(
        f"gradient_reduced_potentials{id_suffix}")
    reduced_potentials.substance = substance_source
    reduced_potentials.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global")
    reduced_potentials.force_field_path = force_field_path
    reduced_potentials.statistics_path = statistics_file_path
    reduced_potentials.trajectory_file_path = trajectory_file_path
    reduced_potentials.coordinate_file_path = coordinate_file_path
    reduced_potentials.parameter_key = ReplicatorValue(replicator_id)
    reduced_potentials.enable_pbc = enable_pbc
    reduced_potentials.effective_sample_indices = effective_sample_indices

    # Set up the protocols which will actually reweight the value of the
    # observable to the forward and reverse states.
    template_reweighting_protocol.bootstrap_iterations = 1
    template_reweighting_protocol.required_effective_samples = 0

    # We need to make sure we use the observable evaluated at the target state
    # if the observable depends on the parameter being reweighted.
    use_target_state_energies = isinstance(
        template_reweighting_protocol, reweighting.ReweightStatistics) and (
            template_reweighting_protocol.statistics_type
            == ObservableType.PotentialEnergy
            or template_reweighting_protocol.statistics_type
            == ObservableType.ReducedPotential
            or template_reweighting_protocol.statistics_type
            == ObservableType.TotalEnergy
            or template_reweighting_protocol.statistics_type
            == ObservableType.Enthalpy)

    template_reweighting_schema = template_reweighting_protocol.schema

    # Create the reweighting protocols from the template schema.
    reverse_mbar_schema = copy.deepcopy(template_reweighting_schema)
    reverse_mbar_schema.id = f"reverse_reweight{id_suffix}"
    reverse_mbar = registered_workflow_protocols[reverse_mbar_schema.type](
        reverse_mbar_schema.id)
    reverse_mbar.schema = reverse_mbar_schema
    reverse_mbar.target_reduced_potentials = ProtocolPath(
        "reverse_potentials_path", reduced_potentials.id)

    forward_mbar_schema = copy.deepcopy(template_reweighting_schema)
    forward_mbar_schema.id = f"forward_reweight{id_suffix}"
    forward_mbar = registered_workflow_protocols[forward_mbar_schema.type](
        forward_mbar_schema.id)
    forward_mbar.schema = forward_mbar_schema
    forward_mbar.target_reduced_potentials = ProtocolPath(
        "forward_potentials_path", reduced_potentials.id)

    if use_target_state_energies:

        reverse_mbar.statistics_paths = [
            ProtocolPath("reverse_potentials_path", reduced_potentials.id)
        ]
        forward_mbar.statistics_paths = [
            ProtocolPath("forward_potentials_path", reduced_potentials.id)
        ]

    # Set up the protocol which will actually evaluate the parameter gradient
    # using the central difference method.
    central_difference = gradients.CentralDifferenceGradient(
        f"central_difference{id_suffix}")
    central_difference.parameter_key = ReplicatorValue(replicator_id)
    central_difference.reverse_observable_value = ProtocolPath(
        "value", reverse_mbar.id)
    central_difference.forward_observable_value = ProtocolPath(
        "value", forward_mbar.id)
    central_difference.reverse_parameter_value = ProtocolPath(
        "reverse_parameter_value", reduced_potentials.id)
    central_difference.forward_parameter_value = ProtocolPath(
        "forward_parameter_value", reduced_potentials.id)

    # Assemble all of the protocols into a convenient group.
    gradient_group = groups.ProtocolGroup(f"gradient_group{id_suffix}")
    gradient_group.add_protocols(reduced_potentials, reverse_mbar,
                                 forward_mbar, central_difference)

    # Create the replicator which will copy the group for each parameter gradient
    # which will be calculated.
    parameter_replicator = ProtocolReplicator(replicator_id=replicator_id)
    parameter_replicator.template_values = ProtocolPath(
        "parameter_gradient_keys", "global")

    return (
        gradient_group,
        parameter_replicator,
        ProtocolPath("gradient", gradient_group.id, central_difference.id),
    )
示例#14
0
    def default_reweighting_schema(
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_effective_samples=50,
    ):
        """Returns the default calculation schema to use when estimating
        this property by reweighting existing data.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        ReweightingSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        # Set up the storage queries
        calculation_schema.storage_queries = (
            ExcessMolarVolume._default_reweighting_storage_query()
        )

        # Set up a replicator that will re-run the component reweighting workflow for each
        # component in the system.
        component_replicator = ProtocolReplicator(replicator_id="component_replicator")
        component_replicator.template_values = ProtocolPath("components", "global")

        gradient_replicator = ProtocolReplicator("gradient")
        gradient_replicator.template_values = ProtocolPath(
            "parameter_gradient_keys", "global"
        )

        # Set up the protocols which will reweight data for the full system.
        full_data_replicator_id = "full_data_replicator"

        (
            full_protocols,
            full_volume,
            full_data_replicator,
            full_gradient_group,
            full_gradient_source,
        ) = ExcessMolarVolume._get_reweighting_protocols(
            "_full",
            gradient_replicator.id,
            full_data_replicator_id,
            n_effective_samples=n_effective_samples,
        )

        # Set up the protocols which will reweight data for each component.
        component_data_replicator_id = (
            f"component_{component_replicator.placeholder_id}_data_replicator"
        )

        (
            component_protocols,
            component_volumes,
            component_data_replicator,
            component_gradient_group,
            component_gradient_source,
        ) = ExcessMolarVolume._get_reweighting_protocols(
            "_component",
            gradient_replicator.id,
            component_data_replicator_id,
            replicator_id=component_replicator.id,
            weight_by_mole_fraction=True,
            substance_reference=ReplicatorValue(component_replicator.id),
            n_effective_samples=n_effective_samples,
        )

        # Make sure the replicator is only replicating over component data.
        component_data_replicator.template_values = ProtocolPath(
            f"component_data[$({component_replicator.id})]", "global"
        )

        add_component_molar_volumes = miscellaneous.AddValues(
            "add_component_molar_volumes"
        )
        add_component_molar_volumes.values = component_volumes

        calculate_excess_volume = miscellaneous.SubtractValues(
            "calculate_excess_potential"
        )
        calculate_excess_volume.value_b = full_volume
        calculate_excess_volume.value_a = ProtocolPath(
            "result", add_component_molar_volumes.id
        )

        # Combine the gradients.
        add_component_gradients = miscellaneous.AddValues(
            f"add_component_gradients" f"_{gradient_replicator.placeholder_id}"
        )
        add_component_gradients.values = component_gradient_source

        combine_gradients = miscellaneous.SubtractValues(
            f"combine_gradients_{gradient_replicator.placeholder_id}"
        )
        combine_gradients.value_b = full_gradient_source
        combine_gradients.value_a = ProtocolPath("result", add_component_gradients.id)

        # Build the final workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            *(x.schema for x in full_protocols),
            *(x.schema for x in component_protocols),
            add_component_molar_volumes.schema,
            calculate_excess_volume.schema,
            full_gradient_group.schema,
            component_gradient_group.schema,
            add_component_gradients.schema,
            combine_gradients.schema,
        ]

        schema.protocol_replicators = [
            full_data_replicator,
            component_replicator,
            component_data_replicator,
            gradient_replicator,
        ]

        schema.gradients_sources = [ProtocolPath("result", combine_gradients.id)]
        schema.final_value_source = ProtocolPath("result", calculate_excess_volume.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
示例#15
0
    def default_simulation_schema(
        absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000
    ):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (
            absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED
        )

        # Define the id of the replicator which will clone the gradient protocols
        # for each gradient key to be estimated.
        gradient_replicator_id = "gradient_replicator"

        # Set up a workflow to calculate the molar volume of the full, mixed system.
        (
            full_system_protocols,
            full_system_molar_molecules,
            full_system_volume,
            full_output,
            full_system_gradient_group,
            full_system_gradient_replicator,
            full_system_gradient,
        ) = ExcessMolarVolume._get_simulation_protocols(
            "_full",
            gradient_replicator_id,
            use_target_uncertainty=use_target_uncertainty,
            n_molecules=n_molecules,
        )

        # Set up a general workflow for calculating the molar volume of one of the system components.
        component_replicator_id = "component_replicator"
        component_substance = ReplicatorValue(component_replicator_id)

        # Make sure to weight by the mole fractions of the actual full system as these may be slightly
        # different to the mole fractions of the measure property due to rounding.
        full_substance = ProtocolPath(
            "output_substance", full_system_protocols.build_coordinates.id
        )

        (
            component_protocols,
            component_molar_molecules,
            component_volumes,
            component_output,
            component_gradient_group,
            component_gradient_replicator,
            component_gradient,
        ) = ExcessMolarVolume._get_simulation_protocols(
            "_component",
            gradient_replicator_id,
            replicator_id=component_replicator_id,
            weight_by_mole_fraction=True,
            component_substance_reference=component_substance,
            full_substance_reference=full_substance,
            use_target_uncertainty=use_target_uncertainty,
            n_molecules=n_molecules,
        )

        # Finally, set up the protocols which will be responsible for adding together
        # the component molar volumes, and subtracting these from the mixed system molar volume.
        add_component_molar_volumes = miscellaneous.AddValues(
            "add_component_molar_volumes"
        )
        add_component_molar_volumes.values = component_volumes

        calculate_excess_volume = miscellaneous.SubtractValues(
            "calculate_excess_volume"
        )
        calculate_excess_volume.value_b = full_system_volume
        calculate_excess_volume.value_a = ProtocolPath(
            "result", add_component_molar_volumes.id
        )

        # Create the replicator object which defines how the pure component
        # molar volume estimation protocols will be replicated for each component.
        component_replicator = ProtocolReplicator(replicator_id=component_replicator_id)
        component_replicator.template_values = ProtocolPath("components", "global")

        # Combine the gradients.
        add_component_gradients = miscellaneous.AddValues(
            f"add_component_gradients" f"_$({gradient_replicator_id})"
        )
        add_component_gradients.values = component_gradient

        combine_gradients = miscellaneous.SubtractValues(
            f"combine_gradients_$({gradient_replicator_id})"
        )
        combine_gradients.value_b = full_system_gradient
        combine_gradients.value_a = ProtocolPath("result", add_component_gradients.id)

        # Combine the gradient replicators.
        gradient_replicator = ProtocolReplicator(replicator_id=gradient_replicator_id)
        gradient_replicator.template_values = ProtocolPath(
            "parameter_gradient_keys", "global"
        )

        # Build the final workflow schema
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            component_protocols.build_coordinates.schema,
            component_protocols.assign_parameters.schema,
            component_protocols.energy_minimisation.schema,
            component_protocols.equilibration_simulation.schema,
            component_protocols.converge_uncertainty.schema,
            component_molar_molecules.schema,
            full_system_protocols.build_coordinates.schema,
            full_system_protocols.assign_parameters.schema,
            full_system_protocols.energy_minimisation.schema,
            full_system_protocols.equilibration_simulation.schema,
            full_system_protocols.converge_uncertainty.schema,
            full_system_molar_molecules.schema,
            component_protocols.extract_uncorrelated_trajectory.schema,
            component_protocols.extract_uncorrelated_statistics.schema,
            full_system_protocols.extract_uncorrelated_trajectory.schema,
            full_system_protocols.extract_uncorrelated_statistics.schema,
            add_component_molar_volumes.schema,
            calculate_excess_volume.schema,
            component_gradient_group.schema,
            full_system_gradient_group.schema,
            add_component_gradients.schema,
            combine_gradients.schema,
        ]

        schema.protocol_replicators = [gradient_replicator, component_replicator]

        # Finally, tell the schemas where to look for its final values.
        schema.gradients_sources = [ProtocolPath("result", combine_gradients.id)]
        schema.final_value_source = ProtocolPath("result", calculate_excess_volume.id)

        schema.outputs_to_store = {
            "full_system": full_output,
            f"component_$({component_replicator_id})": component_output,
        }

        calculation_schema.workflow_schema = schema
        return calculation_schema