Exemplo n.º 1
0
def test_serialization():
    """A test to ensure that data sets are JSON serializable."""

    data_set = PhysicalPropertyDataSet()
    data_set.add_properties(create_dummy_property(Density))

    data_set_json = data_set.json()

    parsed_data_set = PhysicalPropertyDataSet.parse_json(data_set_json)
    assert len(data_set) == len(parsed_data_set)

    parsed_data_set_json = parsed_data_set.json()
    assert parsed_data_set_json == data_set_json
def main():

    os.makedirs("raw_data_v2", exist_ok=True)

    for data_set_name in [
            "curated_data_set",
            "gaff 1.81",
            "gaff 2.11",
            "parsley 1.0.0",
            "smirnoff99frosst 1.1.0",
    ]:

        with open(os.path.join("raw_data", f"{data_set_name}.json")) as file:
            raw_data_set = json.load(file)

        assert (raw_data_set["@type"] ==
                "propertyestimator.datasets.datasets.PhysicalPropertyDataSet")

        physical_properties = []

        for raw_data_set_entries in raw_data_set["properties"].values():

            for raw_data_set_entry in raw_data_set_entries:

                # Extract the substance this entry was measured for.
                substance = Substance()

                for raw_component in raw_data_set_entry["substance"][
                        "components"]:

                    component = Component(
                        smiles=raw_component["smiles"],
                        role=Component.Role[raw_component["role"]["value"]],
                    )

                    raw_amounts = raw_data_set_entry["substance"]["amounts"][
                        raw_component["smiles"]]

                    for raw_amount in raw_amounts["value"]:

                        if (raw_amount["@type"] ==
                                "propertyestimator.substances.Substance->MoleFraction"
                            ):

                            substance.add_component(
                                component, MoleFraction(raw_amount["value"]))

                        elif (raw_amount["@type"] ==
                              "propertyestimator.substances.Substance->ExactAmount"
                              ):

                            substance.add_component(
                                component, ExactAmount(raw_amount["value"]))

                        else:
                            raise NotImplementedError()

                # Extract the source of the property
                if (raw_data_set_entry["source"]["@type"] ==
                        "propertyestimator.properties.properties.CalculationSource"
                    ):
                    source = CalculationSource(
                        fidelity=raw_data_set_entry["source"]["fidelity"])
                elif (raw_data_set_entry["source"]["@type"] ==
                      "propertyestimator.properties.properties.MeasurementSource"
                      ):
                    source = MeasurementSource(doi=correct_doi(
                        raw_data_set_entry["source"]["reference"]))
                else:
                    raise NotImplementedError()

                # Generate the new property object.
                property_class = getattr(
                    properties, raw_data_set_entry["@type"].split(".")[-1])

                physical_property = property_class(
                    thermodynamic_state=ThermodynamicState(
                        temperature=(
                            raw_data_set_entry["thermodynamic_state"]
                            ["temperature"]["value"] *
                            unit.Unit(raw_data_set_entry["thermodynamic_state"]
                                      ["temperature"]["unit"])),
                        pressure=(
                            raw_data_set_entry["thermodynamic_state"]
                            ["pressure"]["value"] *
                            unit.Unit(raw_data_set_entry["thermodynamic_state"]
                                      ["pressure"]["unit"])),
                    ),
                    phase=PropertyPhase(raw_data_set_entry["phase"]),
                    substance=substance,
                    value=(raw_data_set_entry["value"]["value"] *
                           unit.Unit(raw_data_set_entry["value"]["unit"])),
                    uncertainty=(
                        None if isinstance(source, MeasurementSource) else
                        (raw_data_set_entry["uncertainty"]["value"] *
                         unit.Unit(raw_data_set_entry["uncertainty"]["unit"])
                         )),
                    source=source,
                )
                physical_property.id = raw_data_set_entry["id"]

                physical_properties.append(physical_property)

        data_set = PhysicalPropertyDataSet()
        data_set.add_properties(*physical_properties)

        data_set.json(os.path.join("raw_data_v2", f"{data_set_name}.json"),
                      format=True)
        data_set.to_pandas().to_csv(
            os.path.join("raw_data_v2", f"{data_set_name}.csv"))