def test_from_too_large(self, caplog): df = pd.DataFrame( { "a": np.arange(10), "b": np.arange(10), "c": np.arange(10), "entity": ["CO2"] * 10, "unit": ["Gg"] * 10, "2001": np.arange(10), } ) df.attrs = { "attrs": {}, "dimensions": {"CO2": ["a", "b", "c"]}, "time_format": "%Y", } # projected array size should be 1000 > 100 with pytest.raises(ValueError, match="Resulting array too large"): pm2io.from_interchange_format(df, max_array_size=100) assert "ERROR" in caplog.text assert ( "Set with 1 entities and a total of 3 dimensions will have a size of 1,000" in caplog.text )
def test_from_add_coord_non_unique(self, caplog): df = pd.DataFrame( { "a": np.arange(3), "b": np.arange(3), "c": np.arange(3), "entity": ["CO2"] * 3, "entity_name": ["Carbondioxide", "Carbondioxide", "Methane"], "unit": ["Gg"] * 3, "2001": np.arange(3), } ) df.attrs = { "attrs": {}, "dimensions": {"CO2": ["a", "b", "c"]}, "time_format": "%Y", "additional_coordinates": {"entity_name": "entity"}, } with pytest.raises( ValueError, match="Different secondary coordinate values " "for given first coordinate value for " "entity_name.", ): pm2io.from_interchange_format(df) assert "ERROR" in caplog.text assert ( "Different secondary coordinate values for given first coordinate " "value for entity_name." in caplog.text )
def test_inharmonic_units(minimal_ds, tmp_path): path = tmp_path / "if" pm2io.write_interchange_format(path, minimal_ds.pr.to_interchange_format()) df = pd.read_csv(path.with_suffix(".csv")) df.loc[3, "unit"] = "m" df.to_csv(path.with_suffix(".csv"), index=False, quoting=csv.QUOTE_NONNUMERIC) with pytest.raises(ValueError, match="More than one unit"): pm2io.from_interchange_format(pm2io.read_interchange_format(path))
def test_from(self): file_input = DATA_PATH / "test_read_wide_csv_file_output.csv" file_expected = DATA_PATH / "test_from_interchange_format_output.nc" ds_expected = primap2.open_dataset(file_expected) df_input = pd.read_csv(file_input, index_col=0) dims = [ "area (ISO3)", "category (IPCC2006)", "scenario (general)", "Class (class)", "Type (type)", "unit", "entity", "source", ] attrs = { "attrs": { "area": "area (ISO3)", "cat": "category (IPCC2006)", "scen": "scenario (general)", "sec_cats": ["Class (class)", "Type (type)"], }, "time_format": "%Y", "dimensions": {"*": dims}, } ds_result = pm2io.from_interchange_format(df_input, attrs) assert_ds_aligned_equal(ds_result, ds_expected, equal_nan=True)
def test_round_trip(any_ds: xr.Dataset, tmp_path): path = tmp_path / "if" pm2io.write_interchange_format(path, any_ds.pr.to_interchange_format()) with path.with_suffix(".yaml").open() as fd: print(fd.read()) actual = pm2io.from_interchange_format(pm2io.read_interchange_format(path)) utils.assert_ds_aligned_equal(any_ds, actual)
def test_read_published_data(self): actual = pm2io.from_interchange_format( pm2io.read_interchange_format( DATA_PATH / "Guetschow-et-al-2021-PRIMAP-crf96_2021-v1" ) ) expected = primap2.open_dataset( DATA_PATH / "Guetschow-et-al-2021-PRIMAP-crf96_2021-v1.nc" ) assert_ds_aligned_equal(actual, expected, equal_nan=True)
def test_missing_file(minimal_ds, tmp_path): path = tmp_path / "if" pm2io.write_interchange_format(path, minimal_ds.pr.to_interchange_format()) with path.with_suffix(".yaml").open() as fd: content = fd.readlines() with path.with_suffix(".yaml").open("w") as fd: for line in content: if "data_file" in line: continue fd.write(line) # first test automatic discovery actual = pm2io.from_interchange_format(pm2io.read_interchange_format(path)) utils.assert_ds_aligned_equal(minimal_ds, actual) # now test without csv file path.with_suffix(".csv").unlink() with pytest.raises(FileNotFoundError, match="Data file not found at"): pm2io.read_interchange_format(path)