示例#1
0
def main(input_data_set_path, server_port):

    # Create the options which propertyestimator should use.
    estimator_options = RequestOptions()

    # Choose which calculation layers to make available.
    estimator_options.calculation_layers = ["SimulationLayer"]

    # Load in the training data set and create schemas for each of the types
    # of property to be calculated.
    training_set = PhysicalPropertyDataSet.from_json(input_data_set_path)

    # Zero out any undefined uncertainties due to a bug in ForceBalance.
    for physical_property in training_set:
        physical_property.uncertainty = 0.0 * physical_property.default_unit()

    data_set_path = "training_set.json"
    training_set.json(data_set_path, format=True)

    # Create the force balance options
    target_options = Evaluator_SMIRNOFF.OptionsFile()
    target_options.connection_options = ConnectionOptions(
        server_address="localhost", server_port=server_port)
    target_options.estimation_options = estimator_options

    target_options.data_set_path = data_set_path

    # Set the property weights and denominators.
    target_options.weights = {x: 1.0 for x in training_set.property_types}
    target_options.denominators = calculate_denominators(training_set)

    # Save the options to file.
    with open("options.json", "w") as file:
        file.write(target_options.to_json())
示例#2
0
def main():

    setup_timestamp_logging()

    # Load in the force field
    force_field_path = "smirnoff99Frosst-1.1.0.offxml"
    force_field_source = SmirnoffForceFieldSource.from_path(force_field_path)

    # Load in the data set containing the pure and binary properties.
    data_set = PhysicalPropertyDataSet.from_json("pure_data_set.json")
    data_set.merge(PhysicalPropertyDataSet.from_json("binary_data_set.json"))

    # Set up a server object to run the calculations using.
    server = setup_server(backend_type=BackendType.LocalGPU,
                          max_number_of_workers=1,
                          port=8001)

    with server:

        # Request the estimates.
        property_estimator = EvaluatorClient(
            ConnectionOptions(server_port=8001))

        for calculation_layer in ["SimulationLayer", "ReweightingLayer"]:

            options = RequestOptions()
            options.calculation_layers = [calculation_layer]

            parameter_gradient_keys = [
                ParameterGradientKey(tag="vdW",
                                     smirks="[#6X4:1]",
                                     attribute="epsilon"),
                ParameterGradientKey(tag="vdW",
                                     smirks="[#6X4:1]",
                                     attribute="rmin_half"),
            ]

            request, _ = property_estimator.request_estimate(
                property_set=data_set,
                force_field_source=force_field_source,
                options=options,
                parameter_gradient_keys=parameter_gradient_keys,
            )

            # Wait for the results.
            results, _ = request.results(True, 5)

            layer_name = re.sub(r"(?<!^)(?=[A-Z])", "_",
                                calculation_layer).lower()
            results.json(f"pure_binary_{layer_name}.json", True)
def _estimate_required_simulations(properties_of_interest, data_set):
    """Attempt to estimate how many simulations the evaluator framework
    will try and run to estimate the given data set of properties.

    Parameters
    ----------
    properties_of_interest: list of tuple of type and SubstanceType
        A list of the property types which are of interest to optimise against.
    data_set: PhysicalPropertyDataSet
        The data set containing the data set of properties of interest.

    Returns
    -------
    int
        The estimated number of simulations required.
    """

    data_set = PhysicalPropertyDataSet.parse_json(data_set.json())

    options = RequestOptions()
    calculation_layer = "SimulationLayer"

    for property_type, _ in properties_of_interest:

        default_schema = property_type.default_simulation_schema()
        options.add_schema(calculation_layer, property_type.__name__,
                           default_schema)

    workflow_graph, _ = SimulationLayer._build_workflow_graph(
        "", LocalFileStorage(), data_set.properties, "", [], options)

    number_of_simulations = 0

    for protocol_id, protocol in workflow_graph.protocols.items():

        if not isinstance(protocol, ConditionalGroup):
            continue

        number_of_simulations += 1

    return number_of_simulations
示例#4
0
def main():

    setup_timestamp_logging()

    # Load in the force field
    force_field_path = "smirnoff99Frosst-1.1.0.offxml"
    force_field_source = SmirnoffForceFieldSource.from_path(force_field_path)

    # Create a data set containing three solvation free energies.
    data_set = PhysicalPropertyDataSet.from_json("hydration_data_set.json")
    data_set.json("hydration_data_set.json", format=True)

    # Set up a server object to run the calculations using.
    server = setup_server(backend_type=BackendType.LocalGPU,
                          max_number_of_workers=1,
                          port=8002)

    with server:

        # Request the estimates.
        property_estimator = EvaluatorClient(
            ConnectionOptions(server_port=8002))

        options = RequestOptions()
        options.calculation_layers = ["SimulationLayer"]
        options.add_schema("SimulationLayer", "SolvationFreeEnergy",
                           _get_fixed_lambda_schema())

        request, _ = property_estimator.request_estimate(
            property_set=data_set,
            force_field_source=force_field_source,
            options=options,
        )

        # Wait for the results.
        results, _ = request.results(True, 60)

        # Save the result to file.
        results.json(f"results.json", True)
示例#5
0
    def _batch_by_same_component(self, submission, force_field_id):
        """Batches a set of requested properties based on which substance they were
        measured for. Properties which were measured for substances containing the
        exact same components (but not necessarily in the same amounts) will be placed
        into the same batch.

        Parameters
        ----------
        submission: EvaluatorClient._Submission
            The full request submission.
        force_field_id: str
            The unique id of the force field to use.

        Returns
        -------
        list of Batch
            The property batches.
        """

        reserved_batch_ids = {
            *self._queued_batches.keys(),
            *self._finished_batches.keys(),
        }

        batches = []

        for substance in submission.dataset.substances:

            batch = Batch()
            batch.force_field_id = force_field_id

            # Make sure we don't somehow generate the same uuid
            # twice (although this is very unlikely to ever happen).
            while batch.id in reserved_batch_ids:
                batch.id = str(uuid.uuid4()).replace("-", "")

            batch.queued_properties = [
                x
                for x in submission.dataset.properties_by_substance(substance)
            ]
            batch.options = RequestOptions.parse_json(
                submission.options.json())

            batch.parameter_gradient_keys = copy.deepcopy(
                submission.parameter_gradient_keys)

            reserved_batch_ids.add(batch.id)
            batches.append(batch)

        return batches
def test_base_layer():

    properties_to_estimate = [
        create_dummy_property(Density),
        create_dummy_property(Density),
    ]

    dummy_options = RequestOptions()

    batch = server.Batch()
    batch.queued_properties = properties_to_estimate
    batch.options = dummy_options
    batch.force_field_id = ""
    batch.options.calculation_schemas = {
        "Density": {
            "DummyCalculationLayer": CalculationLayerSchema()
        }
    }

    with tempfile.TemporaryDirectory() as temporary_directory:

        with temporarily_change_directory(temporary_directory):

            # Create a simple calculation backend to test with.
            test_backend = DaskLocalCluster()
            test_backend.start()

            # Create a simple storage backend to test with.
            test_storage = LocalFileStorage()

            layer_directory = "dummy_layer"
            makedirs(layer_directory)

            def dummy_callback(returned_request):

                assert len(returned_request.estimated_properties) == 1
                assert len(returned_request.exceptions) == 2

            dummy_layer = DummyCalculationLayer()

            dummy_layer.schedule_calculation(
                test_backend,
                test_storage,
                layer_directory,
                batch,
                dummy_callback,
                True,
            )
def test_same_component_batching():

    thermodynamic_state = ThermodynamicState(temperature=1.0 * unit.kelvin,
                                             pressure=1.0 * unit.atmosphere)

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(
        Density(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "C"),
            value=0.0 * unit.kilogram / unit.meter**3,
        ),
        EnthalpyOfVaporization(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "C"),
            value=0.0 * unit.kilojoule / unit.mole,
        ),
        Density(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "CO"),
            value=0.0 * unit.kilogram / unit.meter**3,
        ),
        EnthalpyOfVaporization(
            thermodynamic_state=thermodynamic_state,
            substance=Substance.from_components("O", "CO"),
            value=0.0 * unit.kilojoule / unit.mole,
        ),
    )

    options = RequestOptions()

    submission = EvaluatorClient._Submission()
    submission.dataset = data_set
    submission.options = options

    with DaskLocalCluster() as calculation_backend:

        server = EvaluatorServer(calculation_backend)
        batches = server._batch_by_same_component(submission, "")

    assert len(batches) == 2

    assert len(batches[0].queued_properties) == 2
    assert len(batches[1].queued_properties) == 2
def test_launch_batch():

    # Set up a dummy data set
    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(create_dummy_property(Density),
                            create_dummy_property(Density))

    batch = Batch()
    batch.force_field_id = ""
    batch.options = RequestOptions()
    batch.options.calculation_layers = ["QuickCalculationLayer"]
    batch.options.calculation_schemas = {
        "Density": {
            "QuickCalculationLayer": CalculationLayerSchema()
        }
    }
    batch.parameter_gradient_keys = []
    batch.queued_properties = [*data_set]
    batch.validate()

    with tempfile.TemporaryDirectory() as directory:

        with temporarily_change_directory(directory):

            with DaskLocalCluster() as calculation_backend:

                server = EvaluatorServer(
                    calculation_backend=calculation_backend,
                    working_directory=directory,
                )

                server._queued_batches[batch.id] = batch
                server._launch_batch(batch)

                while len(batch.queued_properties) > 0:
                    sleep(0.01)

                assert len(batch.estimated_properties) == 1
                assert len(batch.unsuccessful_properties) == 1
def test_workflow_layer():
    """Test the `WorkflowLayer` calculation layer. As the `SimulationLayer`
    is the simplest implementation of the abstract layer, we settle for
    testing this."""

    properties_to_estimate = [
        create_dummy_property(Density),
        create_dummy_property(Density),
    ]

    # Create a very simple workflow which just returns some placeholder
    # value.
    estimated_value = (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin)
    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = estimated_value

    schema = WorkflowSchema()
    schema.protocol_schemas = [protocol_a.schema]
    schema.final_value_source = ProtocolPath("output_value", protocol_a.id)

    layer_schema = SimulationSchema()
    layer_schema.workflow_schema = schema

    options = RequestOptions()
    options.add_schema("SimulationLayer", "Density", layer_schema)

    batch = server.Batch()
    batch.queued_properties = properties_to_estimate
    batch.options = options

    with tempfile.TemporaryDirectory() as directory:

        with temporarily_change_directory(directory):

            # Create a directory for the layer.
            layer_directory = "simulation_layer"
            os.makedirs(layer_directory)

            # Set-up a simple storage backend and add a force field to it.
            force_field = SmirnoffForceFieldSource.from_path(
                "smirnoff99Frosst-1.1.0.offxml")

            storage_backend = LocalFileStorage()
            batch.force_field_id = storage_backend.store_force_field(
                force_field)

            # Create a simple calculation backend to test with.
            with DaskLocalCluster() as calculation_backend:

                def dummy_callback(returned_request):

                    assert len(returned_request.estimated_properties) == 2
                    assert len(returned_request.exceptions) == 0

                simulation_layer = SimulationLayer()

                simulation_layer.schedule_calculation(
                    calculation_backend,
                    storage_backend,
                    layer_directory,
                    batch,
                    dummy_callback,
                    True,
                )
示例#10
0
    def _batch_by_shared_component(self, submission, force_field_id):
        """Batches a set of requested properties based on which substance they were
        measured for. Properties which were measured for substances sharing at least
        one common component (defined only by its smiles pattern and not necessarily
        in the same amount) will be placed into the same batch.

        Parameters
        ----------
        submission: EvaluatorClient._Submission
            The full request submission.
        force_field_id: str
            The unique id of the force field to use.

        Returns
        -------
        list of Batch
            The property batches.
        """

        reserved_batch_ids = {
            *self._queued_batches.keys(),
            *self._finished_batches.keys(),
        }

        all_smiles = set(x.smiles for y in submission.dataset.substances
                         for x in y)

        # Build a graph containing all of the different component
        # smiles patterns as nodes.
        substance_graph = networkx.Graph()
        substance_graph.add_nodes_from(all_smiles)

        # Add edges to the graph based on which substances contain
        # the different component nodes.
        for substance in submission.dataset.substances:

            if len(substance) < 2:
                continue

            smiles = [x.smiles for x in substance]

            for smiles_a, smiles_b in zip(smiles, smiles[1:]):
                substance_graph.add_edge(smiles_a, smiles_b)

        # Find clustered islands of those smiles which exist in
        # overlapping substances.
        islands = [
            substance_graph.subgraph(c)
            for c in networkx.connected_components(substance_graph)
        ]

        # Create one batch per island
        batches = []

        for _ in range(len(islands)):

            batch = Batch()
            batch.force_field_id = force_field_id

            # Make sure we don't somehow generate the same uuid
            # twice (although this is very unlikely to ever happen).
            while batch.id in reserved_batch_ids:
                batch.id = str(uuid.uuid4()).replace("-", "")

            batch.options = RequestOptions.parse_json(
                submission.options.json())

            batch.parameter_gradient_keys = copy.deepcopy(
                submission.parameter_gradient_keys)

            reserved_batch_ids.add(batch.id)
            batches.append(batch)

        for physical_property in submission.dataset:

            smiles = [x.smiles for x in physical_property.substance]

            island_id = 0

            for island_id, island in enumerate(islands):

                if not any(x in island for x in smiles):
                    continue

                break

            batches[island_id].queued_properties.append(physical_property)

        return batches