Exemplo n.º 1
0
    def _execute(self, directory, available_resources):

        if len(self.simulation_data_path) != 3:

            raise ValueError(
                "The simulation data path should be a tuple of a path to the data "
                "object, directory, and a path to the force field used to generate it."
            )

        data_object_path = self.simulation_data_path[0]
        data_directory = self.simulation_data_path[1]
        force_field_path = self.simulation_data_path[2]

        if not path.isdir(data_directory):

            raise ValueError(
                f"The path to the data directory is invalid: {data_directory}")

        if not path.isfile(force_field_path):

            raise ValueError(
                f"The path to the force field is invalid: {force_field_path}")

        data_object = StoredSimulationData.from_json(data_object_path)

        if not isinstance(data_object, StoredSimulationData):

            raise ValueError(
                f"The data path must point to a `StoredSimulationData` "
                f"object, and not a {data_object.__class__.__name__}", )

        self.substance = data_object.substance
        self.total_number_of_molecules = data_object.number_of_molecules

        self.thermodynamic_state = data_object.thermodynamic_state

        self.observables = data_object.observables

        self.coordinate_file_path = path.join(data_directory,
                                              data_object.coordinate_file_name)
        self.trajectory_file_path = path.join(data_directory,
                                              data_object.trajectory_file_name)

        self.force_field_path = force_field_path
Exemplo n.º 2
0
    def return_bad_result(physical_property, layer_directory, **_):
        """Return a result which leads to an unhandled exception."""

        dummy_data_directory = path.join(layer_directory, "bad_dummy_data")
        makedirs(dummy_data_directory, exist_ok=True)

        dummy_stored_object = StoredSimulationData()
        dummy_stored_object_path = path.join(layer_directory,
                                             "bad_dummy_data.json")

        with open(dummy_stored_object_path, "w") as file:
            json.dump(dummy_stored_object, file, cls=TypedJSONEncoder)

        return_object = CalculationLayerResult()
        return_object.physical_property = physical_property
        return_object.data_to_store = [(dummy_stored_object_path,
                                        dummy_data_directory)]

        return return_object
Exemplo n.º 3
0
def create_dummy_simulation_data(
    directory_path,
    substance,
    force_field_id="dummy_ff_id",
    coordinate_file_name="output.pdb",
    trajectory_file_name="trajectory.dcd",
    statistics_file_name="statistics.csv",
    statistical_inefficiency=1.0,
    phase=PropertyPhase.Liquid,
    number_of_molecules=1,
    calculation_id=None,
):
    """Creates a dummy `StoredSimulationData` object and
    the corresponding data directory.

    Parameters
    ----------
    directory_path: str
        The path to the dummy data directory to create.
    substance: Substance
    force_field_id
    coordinate_file_name
    trajectory_file_name
    statistics_file_name
    statistical_inefficiency
    phase
    number_of_molecules
    calculation_id

    Returns
    -------
    StoredSimulationData
        The dummy stored data object.
    """

    os.makedirs(directory_path, exist_ok=True)

    data = StoredSimulationData()

    data.substance = substance
    data.force_field_id = force_field_id
    data.thermodynamic_state = ThermodynamicState(1.0 * unit.kelvin)
    data.property_phase = phase

    data.coordinate_file_name = coordinate_file_name
    data.trajectory_file_name = trajectory_file_name
    data.statistics_file_name = statistics_file_name

    with open(os.path.join(directory_path, coordinate_file_name), "w") as file:
        file.write("")
    with open(os.path.join(directory_path, trajectory_file_name), "w") as file:
        file.write("")
    with open(os.path.join(directory_path, statistics_file_name), "w") as file:
        file.write("")

    data.statistical_inefficiency = statistical_inefficiency

    data.number_of_molecules = number_of_molecules

    if calculation_id is None:
        calculation_id = str(uuid.uuid4())

    data.source_calculation_id = calculation_id

    return data
Exemplo n.º 4
0
def generate_simulation_protocols(
    analysis_protocol: S,
    use_target_uncertainty: bool,
    id_suffix: str = "",
    conditional_group: Optional[ConditionalGroup] = None,
    n_molecules: int = 1000,
) -> Tuple[SimulationProtocols[S], ProtocolPath, StoredSimulationData]:
    """Constructs a set of protocols which, when combined in a workflow schema, may be
    executed to run a single simulation to estimate the average value of an observable.

    The protocols returned will:

        1) Build a set of liquid coordinates for the
           property substance using packmol.

        2) Assign a set of smirnoff force field parameters
           to the system.

        3) Perform an energy minimisation on the system.

        4) Run a short NPT equilibration simulation for 100000 steps
           using a timestep of 2fs.

        5) Within a conditional group (up to a maximum of 100 times):

            5a) Run a longer NPT production simulation for 1000000 steps using a
                timestep of 2fs

            5b) Extract the average value of an observable and it's uncertainty.

            5c) If a convergence mode is set by the options, check if the target
                uncertainty has been met. If not, repeat steps 5a), 5b) and 5c).

        6) Extract uncorrelated configurations from a generated production
           simulation.

        7) Extract uncorrelated statistics from a generated production
           simulation.

    Parameters
    ----------
    analysis_protocol
        The protocol which will extract the observable of
        interest from the generated simulation data.
    use_target_uncertainty
        Whether to run the simulation until the observable is
        estimated to within the target uncertainty.
    id_suffix: str
        A string suffix to append to each of the protocol ids.
    conditional_group: ProtocolGroup, optional
        A custom group to wrap the main simulation / extraction
        protocols within. It is up to the caller of this method to
        manually add the convergence conditions to this group.
        If `None`, a default group with uncertainty convergence
        conditions is automatically constructed.
    n_molecules: int
        The number of molecules to use in the workflow.

    Returns
    -------
        The protocols to add to the workflow, a reference to the average value of the
        estimated observable (an ``Observable`` object), and an object which describes
        the default data from a simulation to store, such as the uncorrelated statistics
        and configurations.
    """

    build_coordinates = coordinates.BuildCoordinatesPackmol(
        f"build_coordinates{id_suffix}"
    )
    build_coordinates.substance = ProtocolPath("substance", "global")
    build_coordinates.max_molecules = n_molecules

    assign_parameters = forcefield.BaseBuildSystem(f"assign_parameters{id_suffix}")
    assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
    assign_parameters.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", build_coordinates.id
    )
    assign_parameters.substance = ProtocolPath("output_substance", build_coordinates.id)

    # Equilibration
    energy_minimisation = openmm.OpenMMEnergyMinimisation(
        f"energy_minimisation{id_suffix}"
    )
    energy_minimisation.input_coordinate_file = ProtocolPath(
        "coordinate_file_path", build_coordinates.id
    )
    energy_minimisation.parameterized_system = ProtocolPath(
        "parameterized_system", assign_parameters.id
    )

    equilibration_simulation = openmm.OpenMMSimulation(
        f"equilibration_simulation{id_suffix}"
    )
    equilibration_simulation.ensemble = Ensemble.NPT
    equilibration_simulation.steps_per_iteration = 100000
    equilibration_simulation.output_frequency = 5000
    equilibration_simulation.timestep = 2.0 * unit.femtosecond
    equilibration_simulation.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    equilibration_simulation.input_coordinate_file = ProtocolPath(
        "output_coordinate_file", energy_minimisation.id
    )
    equilibration_simulation.parameterized_system = ProtocolPath(
        "parameterized_system", assign_parameters.id
    )

    # Production
    production_simulation = openmm.OpenMMSimulation(f"production_simulation{id_suffix}")
    production_simulation.ensemble = Ensemble.NPT
    production_simulation.steps_per_iteration = 1000000
    production_simulation.output_frequency = 2000
    production_simulation.timestep = 2.0 * unit.femtosecond
    production_simulation.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    production_simulation.input_coordinate_file = ProtocolPath(
        "output_coordinate_file", equilibration_simulation.id
    )
    production_simulation.parameterized_system = ProtocolPath(
        "parameterized_system", assign_parameters.id
    )
    production_simulation.gradient_parameters = ProtocolPath(
        "parameter_gradient_keys", "global"
    )

    # Set up a conditional group to ensure convergence of uncertainty
    if conditional_group is None:

        conditional_group = groups.ConditionalGroup(f"conditional_group{id_suffix}")
        conditional_group.max_iterations = 100

        if use_target_uncertainty:

            condition = groups.ConditionalGroup.Condition()
            condition.right_hand_value = ProtocolPath("target_uncertainty", "global")
            condition.type = groups.ConditionalGroup.Condition.Type.LessThan
            condition.left_hand_value = ProtocolPath(
                "value.error", conditional_group.id, analysis_protocol.id
            )

            conditional_group.add_condition(condition)

            # Make sure the simulation gets extended after each iteration.
            production_simulation.total_number_of_iterations = ProtocolPath(
                "current_iteration", conditional_group.id
            )

    conditional_group.add_protocols(production_simulation, analysis_protocol)

    # Point the analyse protocol to the correct data sources
    if not isinstance(analysis_protocol, analysis.BaseAverageObservable):

        raise ValueError(
            "The analysis protocol must inherit from either the "
            "AverageTrajectoryObservable or BaseAverageObservable "
            "protocols."
        )

    analysis_protocol.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    analysis_protocol.potential_energies = ProtocolPath(
        f"observables[{ObservableType.PotentialEnergy.value}]",
        production_simulation.id,
    )

    # Finally, extract uncorrelated data
    time_series_statistics = ProtocolPath(
        "time_series_statistics", conditional_group.id, analysis_protocol.id
    )
    coordinate_file = ProtocolPath(
        "output_coordinate_file", conditional_group.id, production_simulation.id
    )
    trajectory_path = ProtocolPath(
        "trajectory_file_path", conditional_group.id, production_simulation.id
    )
    observables = ProtocolPath(
        "observables", conditional_group.id, production_simulation.id
    )

    decorrelate_trajectory = analysis.DecorrelateTrajectory(
        f"decorrelate_trajectory{id_suffix}"
    )
    decorrelate_trajectory.time_series_statistics = time_series_statistics
    decorrelate_trajectory.input_coordinate_file = coordinate_file
    decorrelate_trajectory.input_trajectory_path = trajectory_path

    decorrelate_observables = analysis.DecorrelateObservables(
        f"decorrelate_observables{id_suffix}"
    )
    decorrelate_observables.time_series_statistics = time_series_statistics
    decorrelate_observables.input_observables = observables

    # Build the object which defines which pieces of simulation data to store.
    output_to_store = StoredSimulationData()

    output_to_store.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
    output_to_store.property_phase = PropertyPhase.Liquid

    output_to_store.force_field_id = PlaceholderValue()

    output_to_store.number_of_molecules = ProtocolPath(
        "output_number_of_molecules", build_coordinates.id
    )
    output_to_store.substance = ProtocolPath("output_substance", build_coordinates.id)
    output_to_store.statistical_inefficiency = ProtocolPath(
        "time_series_statistics.statistical_inefficiency",
        conditional_group.id,
        analysis_protocol.id,
    )
    output_to_store.observables = ProtocolPath(
        "output_observables", decorrelate_observables.id
    )
    output_to_store.trajectory_file_name = ProtocolPath(
        "output_trajectory_path", decorrelate_trajectory.id
    )
    output_to_store.coordinate_file_name = coordinate_file

    output_to_store.source_calculation_id = PlaceholderValue()

    # Define where the final values come from.
    final_value_source = ProtocolPath(
        "value", conditional_group.id, analysis_protocol.id
    )

    base_protocols = SimulationProtocols(
        build_coordinates,
        assign_parameters,
        energy_minimisation,
        equilibration_simulation,
        production_simulation,
        analysis_protocol,
        conditional_group,
        decorrelate_trajectory,
        decorrelate_observables,
    )

    return base_protocols, final_value_source, output_to_store
Exemplo n.º 5
0
    def process_successful_property(physical_property, layer_directory, **_):
        """Return a result as if the property had been successfully estimated."""

        dummy_data_directory = path.join(layer_directory, "good_dummy_data")
        makedirs(dummy_data_directory, exist_ok=True)

        dummy_stored_object = StoredSimulationData()
        dummy_stored_object.substance = physical_property.substance
        dummy_stored_object.thermodynamic_state = physical_property.thermodynamic_state
        dummy_stored_object.property_phase = physical_property.phase
        dummy_stored_object.force_field_id = ""
        dummy_stored_object.coordinate_file_name = ""
        dummy_stored_object.trajectory_file_name = ""
        dummy_stored_object.statistics_file_name = ""
        dummy_stored_object.statistical_inefficiency = 1.0
        dummy_stored_object.number_of_molecules = 10
        dummy_stored_object.source_calculation_id = ""

        dummy_stored_object_path = path.join(layer_directory,
                                             "good_dummy_data.json")

        with open(dummy_stored_object_path, "w") as file:
            json.dump(dummy_stored_object, file, cls=TypedJSONEncoder)

        return_object = CalculationLayerResult()
        return_object.physical_property = physical_property
        return_object.data_to_store = [(dummy_stored_object_path,
                                        dummy_data_directory)]

        return return_object