def test_no_netcdf(scm_run): with pytest.raises( ImportError, match="netcdf4 is not installed. Run 'pip install netcdf4'"): run_to_nc(scm_run.__class__, "ignored") with pytest.raises( ImportError, match="netcdf4 is not installed. Run 'pip install netcdf4'"): nc_to_run(scm_run, "ignored")
def test_nc_read_failure(scm_run, test_data_path, caplog): test_fname = join( test_data_path, "netcdf-scm_tas_Amon_bcc-csm1-1_rcp26_r1i1p1_209001-211012.nc") with pytest.raises(Exception): nc_to_run(scm_run.__class__, test_fname) assert caplog.record_tuples[0][0] == "scmdata.netcdf" assert caplog.record_tuples[0][1] == logging.ERROR assert caplog.record_tuples[0][ 2] == "Failed reading netcdf file: {}".format(test_fname)
def test_nc_with_metadata(scm_run, mdata): def _cmp(a, b): if isinstance(a, (list, np.ndarray)): return (a == b).all() else: return a == b scm_run.metadata = mdata.copy() with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") run_to_nc(scm_run, out_fname, dimensions=("scenario", )) ds = nc.Dataset(out_fname) nc_attrs = {a: ds.getncattr(a) for a in ds.ncattrs()} for k, v in mdata.items(): assert k in nc_attrs assert _cmp(nc_attrs[k], v) assert "created_at" in nc_attrs run_read = nc_to_run(scm_run.__class__, out_fname) for k, v in mdata.items(): assert k in run_read.metadata assert _cmp(run_read.metadata[k], v) assert "created_at" in run_read.metadata
def test_nc_to_run_4d(scm_run): df = scm_run.timeseries() val_cols = df.columns.tolist() df = df.reset_index() df["climate_model"] = "base_m" df["run_id"] = 1 df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols)) big_df = [df] for climate_model in ["abc_m", "def_m", "ghi_m"]: for run_id in range(10): new_df = df.copy() new_df["run_id"] = run_id new_df["climate_model"] = climate_model new_df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols)) big_df.append(new_df) scm_run = scm_run.__class__(pd.concat(big_df).reset_index(drop=True)) with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") run_to_nc(scm_run, out_fname, dimensions=("scenario", "climate_model", "run_id")) assert exists(out_fname) run_read = nc_to_run(scm_run.__class__, out_fname) assert isinstance(run_read, scm_run.__class__) assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
def test_run_to_nc_case(scm_run, v): with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") scm_run = scm_run.filter(variable="Primary Energy") scm_run["variable"] = v run_to_nc(scm_run, out_fname, dimensions=("scenario", )) res = nc_to_run(scm_run.__class__, out_fname) assert res.get_unique_meta("variable", True) == v
def test_nc_to_run(scm_run, dimensions): with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") run_to_nc(scm_run, out_fname, dimensions=dimensions) assert exists(out_fname) run_read = nc_to_run(scm_run.__class__, out_fname) assert isinstance(run_read, scm_run.__class__) assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
def test_nc_to_run_without_dimensions(scm_run): with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") scm_run = scm_run.filter(scenario="a_scenario2") scm_run["run_id"] = [2] run_to_nc(scm_run, out_fname, dimensions=(), extras=("run_id", )) assert exists(out_fname) run_read = nc_to_run(scm_run.__class__, out_fname) assert isinstance(run_read, scm_run.__class__) assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
def test_nc_to_run_with_extras_id_needed_and_not_needed(scm_run): scmrun = scm_run.filter(scenario="a_scenario") full_df = [] for model in ("model_a", "model_b"): for scenario in ("scenario_a", "scenario_b"): for run_id in range(10): tmp = scmrun.timeseries() tmp["run_id"] = run_id tmp["model"] = model tmp["scenario"] = scenario tmp.index = tmp.index.droplevel(["model", "scenario"]) full_df.append(tmp) full_df = pd.concat(full_df) scm_run = scm_run.__class__(full_df) scm_run["paraset_id"] = scm_run["run_id"].apply(lambda x: x // 3) with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") run_to_nc( scm_run, out_fname, dimensions=("climate_model", "run_id", "scenario"), extras=("paraset_id", "model"), ) assert exists(out_fname) xr_ds = xr.load_dataset(out_fname) # Should save with dimensions: "time", "climate_model", # "run_id", "scenario" and "_id" assert len(xr_ds["Primary_Energy"].shape) == 5 # model must be saved with id assert xr_ds["model"].dims == ("_id", ) # paraset_id is wholly defined by run_id assert xr_ds["paraset_id"].dims == ("run_id", ) run_read = nc_to_run(scm_run.__class__, out_fname) assert isinstance(run_read, scm_run.__class__) assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
def test_nc_to_run_with_extras_id_not_needed_sparsity(scm_run): df = scm_run.filter(scenario="a_scenario").timeseries() val_cols = df.columns.tolist() df = df.reset_index() df["climate_model"] = "base_m" df["run_id"] = 1 df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols)) big_df = [df] for climate_model in ["abc_m", "def_m", "ghi_m"]: for run_id in range(10): new_df = df.copy() new_df["run_id"] = run_id new_df["climate_model"] = climate_model new_df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols)) big_df.append(new_df) big_df = pd.concat(big_df).reset_index(drop=True) big_df["paraset_id"] = big_df["run_id"].apply(lambda x: x // 3) scm_run = scm_run.__class__(big_df) with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") run_to_nc( scm_run, out_fname, dimensions=("climate_model", "run_id"), extras=("paraset_id", ), ) assert exists(out_fname) xr_ds = xr.load_dataset(out_fname) # Should save with three dimensions: "time", "climate_model", "run_id" # There should be no "_id" as paraset_id is uniquely defined by "run_id" assert len(xr_ds["Primary_Energy"].shape) == 3 run_read = nc_to_run(scm_run.__class__, out_fname) assert isinstance(run_read, scm_run.__class__) assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
def test_nc_to_run_with_extras_sparsity(scm_run): df = scm_run.timeseries() val_cols = df.columns.tolist() df = df.reset_index() df["climate_model"] = "base_m" df["run_id"] = 1 df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols)) big_df = [df] for climate_model in ["abc_m", "def_m", "ghi_m"]: for run_id in range(10): new_df = df.copy() new_df["run_id"] = run_id new_df["climate_model"] = climate_model new_df.loc[:, val_cols] = np.random.rand(df.shape[0], len(val_cols)) big_df.append(new_df) scm_run = scm_run.__class__(pd.concat(big_df).reset_index(drop=True)) with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") run_to_nc( scm_run, out_fname, dimensions=("climate_model", "run_id"), extras=("scenario", ), ) assert exists(out_fname) xr_ds = xr.load_dataset(out_fname) # Should save with four dimensions: "time", "climate_model", "run_id", "_id" # the "_id" dimension is required as a short-hand mapping between extras and # the data. # There is no way to avoid this sparsity. assert len(xr_ds["Primary_Energy"].shape) == 4 run_read = nc_to_run(scm_run.__class__, out_fname) assert isinstance(run_read, scm_run.__class__) assert_scmdf_almost_equal(scm_run, run_read, check_ts_names=False)
def test_run_to_nc_weird_name(scm_run, ch, start_with_weird): with tempfile.TemporaryDirectory() as tempdir: out_fname = join(tempdir, "out.nc") scm_run = scm_run.filter(variable="Primary Energy") variable = scm_run.get_unique_meta("variable", True) if start_with_weird: variable = ch + " " + variable else: variable = variable + " " + ch scm_run["variable"] = variable if start_with_weird: error_msg = re.escape("NetCDF: Name contains illegal characters") with pytest.raises(RuntimeError, match=error_msg): run_to_nc(scm_run, out_fname, dimensions=("scenario", )) else: run_to_nc(scm_run, out_fname, dimensions=("scenario", )) res = nc_to_run(scm_run.__class__, out_fname) assert res.get_unique_meta("variable", True) == variable