Exemplo n.º 1
0
def test_pandas_round_trip(evaluator_data_set):
    """A simple test that the `DataSet.from_pandas` and `DataSet.to_pandas`
    functions work in conjunction with one another."""

    data_frame = evaluator_data_set.to_pandas()

    data_set = DataSet.from_pandas(
        data_frame,
        "id",
        description="Lorem Ipsum",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
    )

    data_frame = data_set.to_pandas()

    data_set = DataSet.from_pandas(
        data_frame,
        "id",
        description="Lorem Ipsum",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
    )

    evaluator_properties_by_id = {x.id: x for x in evaluator_data_set}

    for entry in data_set.entries:

        evaluator_property = evaluator_properties_by_id[str(entry.id)]
        compare_properties(evaluator_property, entry)
Exemplo n.º 2
0
def test_author_validation():
    """Test that pydantic correctly validates authors"""

    # Create a valid author
    Author(name="SB", email="*****@*****.**", institute="Inst")

    # Create an author with an invalid email
    with pytest.raises(ValidationError):
        Author(name="SB", email="fakeemail.com", institute="Inst")
Exemplo n.º 3
0
def test_project_validation(valid_optimization_kwargs, valid_benchmark_kwargs):
    """Test that pydantic correctly validates studies"""

    project_id = "project-1"
    study_id = "study-1"

    # Test that a valid project can be produced
    project_kwargs = {
        "id": project_id,
        "name": " ",
        "description": " ",
        "authors": [Author(name=" ", email="*****@*****.**", institute=" ")],
    }

    valid_study = Study(
        id=study_id,
        project_id=project_id,
        name=" ",
        description=" ",
        optimizations=[
            Optimization(
                **{
                    **valid_optimization_kwargs,
                    "project_id": project_id,
                    "study_id": study_id,
                })
        ],
        benchmarks=[
            Benchmark(
                **{
                    **valid_benchmark_kwargs,
                    "project_id": project_id,
                    "study_id": study_id,
                })
        ],
    )

    Project(**project_kwargs, studies=[valid_study])

    # Test non-unique ids.
    with pytest.raises(ValidationError):
        Project(**project_kwargs, studies=[valid_study, valid_study])

    # Test bad project id.
    bad_study = Study(**{**valid_study.dict(), "project_id": "a"})

    with pytest.raises(ValidationError):
        Project(**project_kwargs, studies=[bad_study])

    bad_study = Study(**valid_study.dict())
    bad_study.optimizations[0].project_id = "a"

    with pytest.raises(ValidationError):
        Project(**project_kwargs, studies=[bad_study])

    bad_study = Study(**valid_study.dict())
    bad_study.benchmarks[0].project_id = "a"

    with pytest.raises(ValidationError):
        Project(**project_kwargs, studies=[bad_study])
Exemplo n.º 4
0
def estimated_reference_sets():
    estimated_density = Density(
        thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin,
                                               pressure=1.0 * unit.atmosphere),
        phase=PropertyPhase.Liquid,
        substance=Substance.from_components("O", "CC=O"),
        value=1.0 * unit.kilogram / unit.meter**3,
        uncertainty=0.1 * unit.kilogram / unit.meter**3,
    )
    estimated_density.id = "1"
    estimated_enthalpy = EnthalpyOfMixing(
        thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin,
                                               pressure=1.0 * unit.atmosphere),
        phase=PropertyPhase.Liquid,
        substance=Substance.from_components("O", "CC=O"),
        value=1.0 * unit.kilocalorie / unit.mole,
        uncertainty=0.1 * unit.kilojoule / unit.mole,
    )
    estimated_enthalpy.id = "2"

    estimated_data_set = PhysicalPropertyDataSet()
    estimated_data_set.add_properties(estimated_density, estimated_enthalpy)

    reference_density = DataSetEntry(
        id=1,
        property_type="Density",
        temperature=298.15,
        pressure=101.325,
        value=0.001,
        std_error=0.0001,
        doi=" ",
        components=[
            Component(smiles="O", mole_fraction=0.5),
            Component(smiles="CC=O", mole_fraction=0.5),
        ],
    )
    reference_enthalpy = DataSetEntry(
        id=2,
        property_type="EnthalpyOfMixing",
        temperature=298.15,
        pressure=101.325,
        value=4.184,
        std_error=0.1,
        doi=" ",
        components=[
            Component(smiles="O", mole_fraction=0.5),
            Component(smiles="CC=O", mole_fraction=0.5),
        ],
    )

    reference_data_set = DataSet(
        id="ref",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=[reference_density, reference_enthalpy],
    )

    return estimated_data_set, reference_data_set
Exemplo n.º 5
0
def test_reindex_data_set_no_mole_fraction():
    """Tests that the ``reindex_data_set`` function behaves as expected
    when exact amounts are present."""

    setup_timestamp_logging(logging.INFO)

    substance = substances.Substance()
    substance.add_component(substances.Component(smiles="O"),
                            amount=substances.MoleFraction(1.0))
    substance.add_component(
        substances.Component(smiles="CO",
                             role=substances.Component.Role.Solute),
        amount=substances.ExactAmount(1),
    )

    evaluator_data_set = PhysicalPropertyDataSet()

    evaluator_data_set.add_properties(
        SolvationFreeEnergy(
            thermodynamic_state=ThermodynamicState(
                temperature=298.15 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substance,
            value=1.0 * SolvationFreeEnergy.default_unit(),
            uncertainty=1.0 * SolvationFreeEnergy.default_unit(),
        ), )

    data_set = DataSet(
        id="data-set",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=[
            DataSetEntry(
                id=1,
                property_type="SolvationFreeEnergy",
                temperature=298.15,
                pressure=101.325,
                value=1.0,
                std_error=1.0,
                doi=" ",
                components=[
                    Component(smiles="O", mole_fraction=1.0),
                    Component(smiles="CO",
                              mole_fraction=0.0,
                              exact_amount=1,
                              role="Solute"),
                ],
            )
        ],
    )

    reindex_data_set(evaluator_data_set, data_set)
    assert evaluator_data_set.properties[0].id == "1"
Exemplo n.º 6
0
def create_author():
    """Creates an author objects with

        * name="Fake Name"
        * email="*****@*****.**"
        * institute="None"

    Returns
    -------
    Author
        The created author
    """
    return Author(name="Fake Name", email="*****@*****.**", institute="None")
Exemplo n.º 7
0
def test_evaluator_round_trip(evaluator_data_set):
    """A simple test that the `DataSet.from_pandas` and `DataSet.to_evaluator`
    functions work in conjunction with one another."""

    data_frame = evaluator_data_set.to_pandas()

    data_set = DataSet.from_pandas(
        data_frame,
        "id",
        description="Lorem Ipsum",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
    )

    recreated_data_set = data_set.to_evaluator()
    assert len(recreated_data_set) == len(evaluator_data_set)

    evaluator_properties_by_id = {x.id: x for x in evaluator_data_set}

    for recreated_property in recreated_data_set:

        evaluator_property = evaluator_properties_by_id[recreated_property.id]
        compare_evaluator_properties(evaluator_property, recreated_property)
Exemplo n.º 8
0
def test_collection_to_evaluator(evaluator_data_set):
    """A simple test that the `DataSetCollection.to_evaluator` function
    works as expected."""

    data_frame = evaluator_data_set.to_pandas()

    data_set = DataSet.from_pandas(
        data_frame,
        "id",
        description="Lorem Ipsum",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
    )

    data_set_collection = DataSetCollection(data_sets=[data_set])

    recreated_data_set = data_set_collection.to_evaluator()
    assert len(recreated_data_set) == len(evaluator_data_set)

    evaluator_properties_by_id = {x.id: x for x in evaluator_data_set}

    for recreated_property in recreated_data_set:

        evaluator_property = evaluator_properties_by_id[recreated_property.id]
        compare_evaluator_properties(evaluator_property, recreated_property)
Exemplo n.º 9
0
def test_from_pandas(evaluator_data_set):
    """A test that the `DataSet.from_pandas` function works as expected."""

    data_frame = evaluator_data_set.to_pandas()

    data_set = DataSet.from_pandas(
        data_frame,
        "id",
        description="Lorem Ipsum",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
    )

    assert data_set.id == "id"
    assert data_set.description == "Lorem Ipsum"
    assert len(data_set.authors) == 1

    assert len(data_set.entries) == len(evaluator_data_set)

    evaluator_properties_by_id = {x.id: x for x in evaluator_data_set}

    for entry in data_set.entries:

        evaluator_property = evaluator_properties_by_id[str(entry.id)]
        compare_properties(evaluator_property, entry)
Exemplo n.º 10
0
def test_analysed_result_from_evaluator():
    """Tests the `AnalysedResult.from_evaluator` function."""
    expected_mean = 0.0
    expected_std = numpy.random.rand() + 1.0

    values = numpy.random.normal(expected_mean, expected_std, 1000)

    estimated_properties = []
    reference_entries = []

    for index, value in enumerate(values):
        property_id = index + 1

        estimated_density = Density(
            thermodynamic_state=ThermodynamicState(298.15 * unit.kelvin,
                                                   pressure=1.0 *
                                                   unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=Substance.from_components("O"),
            value=value * Density.default_unit(),
            uncertainty=0.0 * Density.default_unit(),
        )
        estimated_density.id = str(property_id)
        estimated_properties.append(estimated_density)

        reference_density = DataSetEntry(
            id=property_id,
            property_type="Density",
            temperature=298.15,
            pressure=101.325,
            value=expected_mean,
            std_error=None,
            doi=" ",
            components=[Component(smiles="O", mole_fraction=1.0)],
        )
        reference_entries.append(reference_density)

    estimated_data_set = PhysicalPropertyDataSet()
    estimated_data_set.add_properties(*estimated_properties)

    reference_data_set = DataSet(
        id="ref",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=reference_entries,
    )

    analysis_environments = [ChemicalEnvironment.Aqueous]

    analysed_results = DataSetResult.from_evaluator(
        reference_data_set=reference_data_set,
        estimated_data_set=estimated_data_set,
        analysis_environments=analysis_environments,
        statistic_types=[StatisticType.RMSE],
        bootstrap_iterations=1000,
    )

    assert len(analysed_results.result_entries) == len(estimated_properties)

    full_statistics = next(
        iter(x for x in analysed_results.statistic_entries
             if x.category is None))

    assert full_statistics.property_type == "Density"
    assert full_statistics.n_components == 1
    assert full_statistics.statistic_type == StatisticType.RMSE
    assert numpy.isclose(full_statistics.value, expected_std, rtol=0.10)
Exemplo n.º 11
0
    def analyze(
        cls,
        optimization: Optimization,
        target: EvaluatorTarget,
        target_directory: str,
        result_directory: str,
        reindex: bool = False,
    ) -> Optional[EvaluatorTargetResult]:

        from openff.evaluator.client import RequestResult
        from openff.evaluator.datasets import PhysicalPropertyDataSet

        results_path = os.path.join(result_directory, "results.json")

        if not os.path.isfile(results_path):
            return None

        # Load the reference data set
        reference_data_set: PhysicalPropertyDataSet = PhysicalPropertyDataSet.from_json(
            os.path.join(target_directory, "training-set.json")
        )

        # Check to see if any of the ids were set to strings that can't be cast to
        # integers, and if so, apply slight re-indexing
        try:
            {int(entry.id) for entry in reference_data_set.properties}
        except (TypeError, ValueError):

            _logger.warning(
                "The reference data set contains properties with ids that cannot be "
                "cast to integers - attempting to fix. Note this in general is not "
                "recommended and in future it is suggested to use integer ids in "
                "physical property data sets."
            )

            for i, physical_property in enumerate(reference_data_set):
                physical_property.id = str(i + 1)

            reindex = True

        reference_data_set: DataSet = DataSet.from_pandas(
            reference_data_set.to_pandas(),
            identifier="empty",
            description="empty",
            authors=[Author(name="empty", email="*****@*****.**", institute="empty")],
        )

        results = RequestResult.from_json(results_path)

        if reindex:
            results = reindex_results(results, reference_data_set)

        estimated_data_set = results.estimated_properties

        # Generate statistics about each iteration.
        data_set_result = DataSetResult.from_evaluator(
            reference_data_set=reference_data_set,
            estimated_data_set=estimated_data_set,
            analysis_environments=optimization.analysis_environments,
            statistic_types=[StatisticType.RMSE],
        )

        objective_function = cls._read_objective_function(result_directory)

        return EvaluatorTargetResult(
            objective_function=target.weight * objective_function,
            statistic_entries=data_set_result.statistic_entries,
        )
Exemplo n.º 12
0
    def create(db: Session, author: authors.Author) -> models.Author:

        db_author = models.Author.unique(db, models.Author(**author.dict()))
        return db_author
Exemplo n.º 13
0
def test_reindex_data_set():
    """Tests that the ``reindex_data_set`` function behaves as expected."""

    setup_timestamp_logging(logging.INFO)

    evaluator_data_set = PhysicalPropertyDataSet()

    evaluator_data_set.add_properties(
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=298.15 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substances.Substance.from_components("O"),
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
        ),
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=298.15 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substances.Substance.from_components("C", "O"),
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
        ),
        Density(
            thermodynamic_state=ThermodynamicState(
                temperature=300.0 * unit.kelvin,
                pressure=1.0 * unit.atmosphere),
            phase=PropertyPhase.Liquid,
            substance=substances.Substance.from_components("C", "O"),
            value=1.0 * Density.default_unit(),
            uncertainty=1.0 * Density.default_unit(),
        ),
    )

    data_set = DataSet(
        id="data-set",
        description=" ",
        authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
        entries=[
            DataSetEntry(
                id=1,
                property_type="Density",
                temperature=298.15,
                pressure=101.325,
                value=1.0,
                std_error=1.0,
                doi=" ",
                components=[
                    Component(smiles="O", mole_fraction=0.5),
                    Component(smiles="C", mole_fraction=0.5),
                ],
            ),
            DataSetEntry(
                id=2,
                property_type="Density",
                temperature=298.15,
                pressure=101.325,
                value=1.0,
                std_error=1.0,
                doi=" ",
                components=[Component(smiles="O", mole_fraction=1.0)],
            ),
        ],
    )

    un_indexed_id = evaluator_data_set.properties[2].id

    reindex_data_set(evaluator_data_set, data_set)

    assert evaluator_data_set.properties[0].id == "2"
    assert evaluator_data_set.properties[1].id == "1"
    assert evaluator_data_set.properties[2].id == un_indexed_id

    data_set_collection = DataSetCollection(data_sets=[
        DataSet(
            id="0",
            description=" ",
            authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
            entries=[
                DataSetEntry(
                    id=3,
                    property_type="Density",
                    temperature=298.15,
                    pressure=101.325,
                    value=1.0,
                    std_error=1.0,
                    doi=" ",
                    components=[
                        Component(smiles="O", mole_fraction=0.5),
                        Component(smiles="C", mole_fraction=0.5),
                    ],
                )
            ],
        ),
        DataSet(
            id="1",
            description=" ",
            authors=[Author(name=" ", email="*****@*****.**", institute=" ")],
            entries=[
                DataSetEntry(
                    id=4,
                    property_type="Density",
                    temperature=298.15,
                    pressure=101.325,
                    value=1.0,
                    std_error=1.0,
                    doi=" ",
                    components=[Component(smiles="O", mole_fraction=1.0)],
                )
            ],
        ),
    ])

    reindex_data_set(evaluator_data_set, data_set_collection)

    assert evaluator_data_set.properties[0].id == "4"
    assert evaluator_data_set.properties[1].id == "3"
    assert evaluator_data_set.properties[2].id == un_indexed_id