def test_serialization(): """A test to ensure that data sets are JSON serializable.""" data_set = PhysicalPropertyDataSet() data_set.add_properties(create_dummy_property(Density)) data_set_json = data_set.json() parsed_data_set = PhysicalPropertyDataSet.parse_json(data_set_json) assert len(data_set) == len(parsed_data_set) parsed_data_set_json = parsed_data_set.json() assert parsed_data_set_json == data_set_json
def main(): os.makedirs("raw_data_v2", exist_ok=True) for data_set_name in [ "curated_data_set", "gaff 1.81", "gaff 2.11", "parsley 1.0.0", "smirnoff99frosst 1.1.0", ]: with open(os.path.join("raw_data", f"{data_set_name}.json")) as file: raw_data_set = json.load(file) assert (raw_data_set["@type"] == "propertyestimator.datasets.datasets.PhysicalPropertyDataSet") physical_properties = [] for raw_data_set_entries in raw_data_set["properties"].values(): for raw_data_set_entry in raw_data_set_entries: # Extract the substance this entry was measured for. substance = Substance() for raw_component in raw_data_set_entry["substance"][ "components"]: component = Component( smiles=raw_component["smiles"], role=Component.Role[raw_component["role"]["value"]], ) raw_amounts = raw_data_set_entry["substance"]["amounts"][ raw_component["smiles"]] for raw_amount in raw_amounts["value"]: if (raw_amount["@type"] == "propertyestimator.substances.Substance->MoleFraction" ): substance.add_component( component, MoleFraction(raw_amount["value"])) elif (raw_amount["@type"] == "propertyestimator.substances.Substance->ExactAmount" ): substance.add_component( component, ExactAmount(raw_amount["value"])) else: raise NotImplementedError() # Extract the source of the property if (raw_data_set_entry["source"]["@type"] == "propertyestimator.properties.properties.CalculationSource" ): source = CalculationSource( fidelity=raw_data_set_entry["source"]["fidelity"]) elif (raw_data_set_entry["source"]["@type"] == "propertyestimator.properties.properties.MeasurementSource" ): source = MeasurementSource(doi=correct_doi( raw_data_set_entry["source"]["reference"])) else: raise NotImplementedError() # Generate the new property object. property_class = getattr( properties, raw_data_set_entry["@type"].split(".")[-1]) physical_property = property_class( thermodynamic_state=ThermodynamicState( temperature=( raw_data_set_entry["thermodynamic_state"] ["temperature"]["value"] * unit.Unit(raw_data_set_entry["thermodynamic_state"] ["temperature"]["unit"])), pressure=( raw_data_set_entry["thermodynamic_state"] ["pressure"]["value"] * unit.Unit(raw_data_set_entry["thermodynamic_state"] ["pressure"]["unit"])), ), phase=PropertyPhase(raw_data_set_entry["phase"]), substance=substance, value=(raw_data_set_entry["value"]["value"] * unit.Unit(raw_data_set_entry["value"]["unit"])), uncertainty=( None if isinstance(source, MeasurementSource) else (raw_data_set_entry["uncertainty"]["value"] * unit.Unit(raw_data_set_entry["uncertainty"]["unit"]) )), source=source, ) physical_property.id = raw_data_set_entry["id"] physical_properties.append(physical_property) data_set = PhysicalPropertyDataSet() data_set.add_properties(*physical_properties) data_set.json(os.path.join("raw_data_v2", f"{data_set_name}.json"), format=True) data_set.to_pandas().to_csv( os.path.join("raw_data_v2", f"{data_set_name}.csv"))