def test_adapt_freq_add_dims(use_dask): time = pd.date_range("1990-01-01", "2020-12-31", freq="D") prvals = np.random.randint(0, 100, size=(time.size, 3)) pr = xr.DataArray( prvals, coords={ "time": time, "lat": [0, 1, 2] }, dims=("time", "lat"), attrs={"units": "mm d-1"}, ) if use_dask: pr = pr.chunk() group = Grouper("time.month", add_dims=["lat"]) with xr.set_options(keep_attrs=True): prsim = xr.where(pr < 20, pr / 20, pr) prref = xr.where(pr < 10, pr / 20, pr) sim_ad, pth, dP0 = adapt_freq(prref, prsim, thresh="1 mm d-1", group=group) assert set(sim_ad.dims) == set(prsim.dims) assert "lat" not in pth.dims group = Grouper("time.dayofyear", window=5) with xr.set_options(keep_attrs=True): prsim = xr.where(pr < 20, pr / 20, pr) prref = xr.where(pr < 10, pr / 20, pr) sim_ad, pth, dP0 = adapt_freq(prref, prsim, thresh="1 mm d-1", group=group) assert set(sim_ad.dims) == set(prsim.dims)
def test_add_dims(self, use_dask): if use_dask: chunks = {"location": -1} else: chunks = None ref = (open_dataset( "sdba/ahccd_1950-2013.nc", chunks=chunks, drop_variables=["lat", "lon"]).sel(time=slice("1981", "2010")).tasmax) ref = convert_units_to(ref, "K") ref = ref.isel(location=1, drop=True).expand_dims(location=["Amos"]) dsim = open_dataset("sdba/CanESM2_1950-2100.nc", chunks=chunks, drop_variables=["lat", "lon"]).tasmax hist = dsim.sel(time=slice("1981", "2010")) sim = dsim.sel(time=slice("2041", "2070")) # With add_dims, "does it run" test group = Grouper("time.dayofyear", window=5, add_dims=["location"]) EQM = EmpiricalQuantileMapping.train(ref, hist, group=group) EQM.adjust(sim).load() # Without, sanity test. group = Grouper("time.dayofyear", window=5) EQM2 = EmpiricalQuantileMapping.train(ref, hist, group=group) scen2 = EQM2.adjust(sim).load() assert scen2.sel(location=["Kugluktuk", "Vancouver"]).isnull().all()
def test_grouper_group(tas_series, group, window, nvals): tas = tas_series(np.ones(366), start="2000-01-01") grouper = Grouper(group, window=window) grpd = grouper.group(tas) if window > 1: assert "window" in grpd.dims assert grpd.count().max() == nvals
def test_real_data(self): dsim = open_dataset("sdba/CanESM2_1950-2100.nc").chunk() dref = open_dataset("sdba/ahccd_1950-2013.nc").chunk() ref = convert_units_to(dref.sel(time=slice("1950", "2009")).pr, "mm/d") hist = convert_units_to( dsim.sel(time=slice("1950", "2009")).pr, "mm/d") quantiles = np.linspace(0.01, 0.99, num=50) with xr.set_options(keep_attrs=True): ref = ref + uniform_noise_like(ref, low=1e-6, high=1e-3) hist = hist + uniform_noise_like(hist, low=1e-6, high=1e-3) EQM = EmpiricalQuantileMapping.train(ref, hist, group=Grouper("time.dayofyear", window=31), nquantiles=quantiles) scen = EQM.adjust(hist, interp="linear", extrapolation="constant") EX = ExtremeValues.train(ref, hist, cluster_thresh="1 mm/day", q_thresh=0.97) new_scen = EX.adjust(scen, hist, frac=0.000000001) new_scen.load()
def test_param_class(): gr = Grouper(group="time.month") in_params = dict( anint=4, abool=True, astring="a string", adict={"key": "val"}, group=gr ) obj = Parametrizable(**in_params) assert obj.parameters == in_params repr(obj).startswith( "ParametrizableClass(anint=4, abool=True, astring='a string', adict={'key': 'val'}, " "group=Grouper(dim='time'," )
def test_interp_on_quantiles_monthly(): t = xr.cftime_range("2000-01-01", "2030-12-31", freq="D", calendar="noleap") ref = xr.DataArray( (-20 * np.cos(2 * np.pi * t.dayofyear / 365) + 2 * np.random.random_sample((t.size, )) + 273.15 + 0.1 * (t - t[0]).days / 365), # "warming" of 1K per decade, dims=("time", ), coords={"time": t}, attrs={"units": "K"}, ) sim = xr.DataArray( (-18 * np.cos(2 * np.pi * t.dayofyear / 365) + 2 * np.random.random_sample((t.size, )) + 273.15 + 0.11 * (t - t[0]).days / 365), # "warming" of 1.1K per decade dims=("time", ), coords={"time": t}, attrs={"units": "K"}, ) ref = ref.sel(time=slice(None, "2015-01-01")) hist = sim.sel(time=slice(None, "2015-01-01")) group = Grouper("time.month") quantiles = u.equally_spaced_nodes(15, eps=1e-6) ref_q = group.apply(nbu.quantile, ref, main_only=True, q=quantiles) hist_q = group.apply(nbu.quantile, hist, main_only=True, q=quantiles) af = u.get_correction(hist_q, ref_q, "+") for interp in ["nearest", "linear", "cubic"]: afi = u.interp_on_quantiles(sim, hist_q, af, group="time.month", method=interp, extrapolation="constant") assert afi.isnull().sum("time") == 0, interp
def test_simple(self, group, crd_dims, pts_dims): n = 15 * 365 m = 2 # A dummy dimension to test vectorizing. ref_y = norm.rvs(loc=10, scale=1, size=(m, n)) ref_x = norm.rvs(loc=3, scale=2, size=(m, n)) sim_x = norm.rvs(loc=4, scale=2, size=(m, n)) sim_y = sim_x + norm.rvs(loc=1, scale=1, size=(m, n)) ref = xr.DataArray([ref_x, ref_y], dims=("lat", "lon", "time")) ref["time"] = xr.cftime_range("1990-01-01", periods=n, calendar="noleap") sim = xr.DataArray([sim_x, sim_y], dims=("lat", "lon", "time")) sim["time"] = ref["time"] PCA = PrincipalComponents(group=group, crd_dims=crd_dims, pts_dims=pts_dims) PCA.train(ref, sim) scen = PCA.adjust(sim) group = group if isinstance(group, Grouper) else Grouper("time") crds = crd_dims or ["lat", "lon"] pts = (pts_dims or []) + ["time"] vec = list({"lat", "lon"} - set(crds) - set(pts)) refs = ref.stack(crd=crds) sims = sim.stack(crd=crds) scens = scen.stack(crd=crds) def _assert(ds): cov_ref = nancov(ds.ref.transpose("crd", "pt")) cov_sim = nancov(ds.sim.transpose("crd", "pt")) cov_scen = nancov(ds.scen.transpose("crd", "pt")) # PC adjustment makes the covariance of scen match the one of ref. np.testing.assert_allclose(cov_ref - cov_scen, 0, atol=1e-6) with pytest.raises(AssertionError): np.testing.assert_allclose(cov_ref - cov_sim, 0, atol=1e-6) def _group_assert(ds, dim): ds = ds.stack(pt=pts) if len(vec) == 1: for v in ds[vec[0]]: _assert(ds.sel({vec[0]: 0})) else: _assert(ds) return ds.unstack("pt") group.apply(_group_assert, {"ref": refs, "sim": sims, "scen": scens})
def test_interp_on_quantiles(shape, group, method): group = Grouper(group) raw = np.random.random_sample(shape) # [0, 1] t = pd.date_range("2000-01-01", periods=shape[0], freq="D") # obs : [9, 11] obs = xr.DataArray(raw * 2 + 9, dims=("time", "lat", "lon")[:len(shape)], coords={"time": t}) # sim [9, 11.4] (x1.2 + 0.2) sim = xr.DataArray(raw * 2.4 + 9, dims=("time", "lat", "lon")[:len(shape)], coords={"time": t}) # fut [9.02, 11.38] (x1.18 + 0.2) In order to have every point of fut inside the range of sim fut_raw = raw * 2.36 + 9.02 fut_raw[np.array([100, 300, 500, 700])] = 1000 # Points outside the sim range will be NaN fut = xr.DataArray(fut_raw, dims=("time", "lat", "lon")[:len(shape)], coords={"time": t}) q = np.linspace(0, 1, 11) xq = group.apply("quantile", sim, q=q).rename(quantile="quantiles") yq = group.apply("quantile", obs, q=q).rename(quantile="quantiles") fut_corr = u.interp_on_quantiles( fut, xq, yq, group=group, method=method).transpose(*("time", "lat", "lon")[:len(shape)]) if method == "nearest": np.testing.assert_allclose(fut_corr.values, obs.values, rtol=0.3) assert fut_corr.isnull().sum() == 0 else: np.testing.assert_allclose(fut_corr.values, obs.where(fut != 1000).values, rtol=2e-3) xr.testing.assert_equal(fut_corr.isnull(), fut == 1000)
def test_add_dim(self, series, mon_series): n = 10000 u = np.random.rand(n, 4) xd = uniform(loc=2, scale=1) x = xd.ppf(u) hist = sim = series(x, "tas") ref = mon_series(apply_correction(x, 2, "+"), "tas") group = Grouper("time.month", add_dims=["lon"]) scaling = Scaling.train(ref, hist, group=group, kind="+") assert "lon" not in scaling.ds p = scaling.adjust(sim) assert "lon" in p.dims np.testing.assert_array_almost_equal(p.transpose(*ref.dims), ref)
def test_param_class(): gr = Grouper(group="time.month") in_params = dict( anint=4, abool=True, astring="a string", adict={"key": "val"}, group=gr ) obj = Parametrizable(**in_params) assert obj.parameters == in_params assert repr(obj).startswith( "Parametrizable(anint=4, abool=True, astring='a string', adict={'key': 'val'}, " "group=Grouper(" ) s = jsonpickle.encode(obj) obj2 = jsonpickle.decode(s) assert obj.parameters == obj2.parameters
def test_adapt_freq(use_dask): time = pd.date_range("1990-01-01", "2020-12-31", freq="D") prvals = np.random.randint(0, 100, size=(time.size, 3)) pr = xr.DataArray( prvals, coords={ "time": time, "lat": [0, 1, 2] }, dims=("time", "lat"), attrs={"units": "mm d-1"}, ) if use_dask: pr = pr.chunk({"lat": 1}) group = Grouper("time.month") with xr.set_options(keep_attrs=True): prsim = xr.where(pr < 20, pr / 20, pr) prref = xr.where(pr < 10, pr / 20, pr) sim_ad, pth, dP0 = adapt_freq(prref, prsim, thresh="1 mm d-1", group=group) # Where the input is considered zero input_zeros = sim_ad.where(prsim <= 1) # The proportion of corrected values (time.size * 3 * 0.2 is the theoritical number of values under 1 in prsim) dP0_out = (input_zeros > 1).sum() / (time.size * 3 * 0.2) np.testing.assert_allclose(dP0_out, 0.5, atol=0.1) # Assert that corrected values were generated in the range ]1, 20 + tol[ corrected = (input_zeros.where(input_zeros > 1).stack( flat=["lat", "time"]).reset_index("flat").dropna("flat")) assert ((corrected < 20.1) & (corrected > 1)).all() # Assert that non-corrected values are untouched # Again we add a 0.5 tol because of randomness. xr.testing.assert_equal( sim_ad.where(prsim > 20.1), prsim.where(prsim > 20.5).transpose("lat", "time"), ) # Assert that Pth and dP0 are approx the good values np.testing.assert_allclose(pth, 20, rtol=0.05) np.testing.assert_allclose(dP0, 0.5, atol=0.14) assert sim_ad.units == "mm d-1" assert sim_ad.attrs["references"].startswith("Themeßl") assert pth.units == "mm d-1"
def test_dask_julia(self): dsim = open_dataset("sdba/CanESM2_1950-2100.nc").chunk() dref = open_dataset("sdba/ahccd_1950-2013.nc").chunk() dexp = open_dataset("sdba/adjusted_external.nc") ref = convert_units_to(dref.sel(time=slice("1950", "2009")).pr, "mm/d") hist = convert_units_to( dsim.sel(time=slice("1950", "2009")).pr, "mm/d") quantiles = np.linspace(0.01, 0.99, num=50) EQM = EmpiricalQuantileMapping(group=Grouper("time.dayofyear", window=31), nquantiles=quantiles) with xr.set_options(keep_attrs=True): ref = ref + uniform_noise_like(ref, low=1e-6, high=1e-3) hist = hist + uniform_noise_like(hist, low=1e-6, high=1e-3) EQM.train(ref, hist) scen = EQM.adjust(hist, interp="linear", extrapolation="constant") EX = ExtremeValues(cluster_thresh="1 mm/day", q_thresh=0.97) EX.train(ref, hist) new_scen = EX.adjust(scen, hist, frac=0.000000001) new_scen.load() exp_scen = dexp.extreme_values_julia xr.testing.assert_allclose( new_scen.where(new_scen != scen).transpose("time", "location"), exp_scen.where(new_scen != scen).transpose("time", "location"), atol=0.005, rtol=2e-3, )
def test_grouper_apply(tas_series, use_dask, group, n): tas1 = tas_series(np.arange(366), start="2000-01-01") tas0 = tas_series(np.zeros(366), start="2000-01-01") tas = xr.concat((tas1, tas0), dim="lat") grouper = Grouper(group) if not group.startswith("time"): tas = tas.rename(time=grouper.dim) tas1 = tas1.rename(time=grouper.dim) tas0 = tas0.rename(time=grouper.dim) if use_dask: tas = tas.chunk({"lat": 1, grouper.dim: -1}) tas0 = tas1.chunk({grouper.dim: -1}) tas1 = tas0.chunk({grouper.dim: -1}) # Normal monthly mean out_mean = grouper.apply("mean", tas) if grouper.prop: exp = tas.groupby(group).mean() else: exp = tas.mean(dim=grouper.dim) np.testing.assert_array_equal(out_mean, exp) # With additionnal dimension included grouper = Grouper(group, add_dims=["lat"]) out = grouper.apply("mean", tas) assert out.ndim == int(grouper.prop is not None) np.testing.assert_array_equal(out, exp.mean("lat")) assert out.attrs["group"] == group assert out.attrs["group_compute_dims"] == [grouper.dim, "lat"] assert out.attrs["group_window"] == 1 # Additionnal but main_only out = grouper.apply("mean", tas, main_only=True) np.testing.assert_array_equal(out, out_mean) # With window win_grouper = Grouper(group, window=5) out = win_grouper.apply("mean", tas) rolld = tas.rolling({ win_grouper.dim: 5 }, center=True).construct(window_dim="window") if grouper.prop: exp = rolld.groupby(group).mean(dim=[win_grouper.dim, "window"]) else: exp = rolld.mean(dim=[grouper.dim, "window"]) np.testing.assert_array_equal(out, exp) # With function + nongrouping-grouped grouper = Grouper(group) def normalize(grp, dim): return grp / grp.mean(dim=dim) normed = grouper.apply(normalize, tas) assert normed.shape == tas.shape if use_dask: assert normed.chunks == ((1, 1), (366, )) # With window + nongrouping-grouped out = win_grouper.apply(normalize, tas) assert out.shape == tas.shape # Mixed output def mixed_reduce(grdds, dim=None): tas1 = grdds.tas1.mean(dim=dim) tas0 = grdds.tas0 / grdds.tas0.mean(dim=dim) tas1.attrs["_group_apply_reshape"] = True return xr.Dataset(data_vars={"tas1_mean": tas1, "norm_tas0": tas0}) out = grouper.apply(mixed_reduce, {"tas1": tas1, "tas0": tas0}) if grouper.prop: assert grouper.prop not in out.norm_tas0.dims assert grouper.prop in out.tas1_mean.dims if use_dask: assert out.tas1_mean.chunks == (((n, ), ) if grouper.prop else tuple()) assert out.norm_tas0.chunks == ((366, ), ) # Mixed input if grouper.prop: def normalize_from_precomputed(grpds, dim=None): return (grpds.tas / grpds.tas1_mean).mean(dim=dim) out = grouper.apply(normalize_from_precomputed, { "tas": tas, "tas1_mean": out.tas1_mean }).isel(lat=0) exp = normed.groupby(group).mean().isel(lat=0) assert grouper.prop in out.dims np.testing.assert_array_equal(out, exp)
def test_grouper_get_index(tas_series, group, interp, val90): tas = tas_series(np.ones(366), start="2000-01-01") grouper = Grouper(group, interp=interp) indx = grouper.get_index(tas) # 90 is March 31st assert indx[90] == val90
def test_raise_on_multiple_chunks(tas_series): ref = tas_series(np.arange(730).astype(float)).chunk({"time": 365}) with pytest.raises(ValueError): EmpiricalQuantileMapping.train(ref, ref, group=Grouper("time.month"))
class TestPrincipalComponents: @pytest.mark.parametrize( "group", (Grouper("time.month"), Grouper("time", add_dims=["lon"]))) def test_simple(self, group): n = 15 * 365 m = 2 # A dummy dimension to test vectorizing. ref_y = norm.rvs(loc=10, scale=1, size=(m, n)) ref_x = norm.rvs(loc=3, scale=2, size=(m, n)) sim_x = norm.rvs(loc=4, scale=2, size=(m, n)) sim_y = sim_x + norm.rvs(loc=1, scale=1, size=(m, n)) ref = xr.DataArray([ref_x, ref_y], dims=("lat", "lon", "time"), attrs={"units": "degC"}) ref["time"] = xr.cftime_range("1990-01-01", periods=n, calendar="noleap") sim = xr.DataArray([sim_x, sim_y], dims=("lat", "lon", "time"), attrs={"units": "degC"}) sim["time"] = ref["time"] PCA = PrincipalComponents.train(ref, sim, group=group, crd_dim="lat") scen = PCA.adjust(sim) def _assert(ds): cov_ref = nancov(ds.ref.transpose("lat", "pt")) cov_sim = nancov(ds.sim.transpose("lat", "pt")) cov_scen = nancov(ds.scen.transpose("lat", "pt")) # PC adjustment makes the covariance of scen match the one of ref. np.testing.assert_allclose(cov_ref - cov_scen, 0, atol=1e-6) with pytest.raises(AssertionError): np.testing.assert_allclose(cov_ref - cov_sim, 0, atol=1e-6) def _group_assert(ds, dim): if "lon" not in dim: for lon in ds.lon: _assert(ds.sel(lon=lon).stack(pt=dim)) else: _assert(ds.stack(pt=dim)) return ds group.apply(_group_assert, {"ref": ref, "sim": sim, "scen": scen}) @pytest.mark.parametrize("use_dask", [True, False]) @pytest.mark.parametrize("pcorient", ["full", "simple"]) def test_real_data(self, atmosds, use_dask, pcorient): ref = stack_variables( xr.Dataset({ "tasmax": atmosds.tasmax, "tasmin": atmosds.tasmin, "tas": atmosds.tas })).isel(location=3) hist = stack_variables( xr.Dataset({ "tasmax": 1.001 * atmosds.tasmax, "tasmin": atmosds.tasmin - 0.25, "tas": atmosds.tas + 1, })).isel(location=3) with xr.set_options(keep_attrs=True): sim = hist + 5 sim["time"] = sim.time + np.timedelta64(10, "Y").astype("<m8[ns]") if use_dask: ref = ref.chunk() hist = hist.chunk() sim = sim.chunk() PCA = PrincipalComponents.train(ref, hist, crd_dim="multivar", best_orientation=pcorient) scen = PCA.adjust(sim) def dist(ref, sim): """Pointwise distance between ref and sim in the PC space.""" sim["time"] = ref.time return np.sqrt(((ref - sim)**2).sum("multivar")) # Most points are closer after transform. assert (dist(ref, sim) < dist(ref, scen)).mean() < 0.01 ref = unstack_variables(ref) scen = unstack_variables(scen) # "Error" is very small assert (ref - scen).mean().tasmin < 5e-3
def test_raise_on_multiple_chunks(tas_series): ref = tas_series(np.arange(730)).chunk({"time": 365}) Adj = BaseAdjustment(group=Grouper("time.month")) with pytest.raises(ValueError): Adj.train(ref, ref)
class TestPrincipalComponents: @pytest.mark.parametrize( "group,crd_dims,pts_dims", ( ["time", ["lat"], None], # Lon as vectorizing dim ["time", None, None], # Lon as second coord dims ["time", ["lat"], ["lon"]], # Lon as a Points dim # Testing time grouping, vectorization on lon [Grouper("time.month"), ["lat"], None], ), ) def test_simple(self, group, crd_dims, pts_dims): n = 15 * 365 m = 2 # A dummy dimension to test vectorizing. ref_y = norm.rvs(loc=10, scale=1, size=(m, n)) ref_x = norm.rvs(loc=3, scale=2, size=(m, n)) sim_x = norm.rvs(loc=4, scale=2, size=(m, n)) sim_y = sim_x + norm.rvs(loc=1, scale=1, size=(m, n)) ref = xr.DataArray([ref_x, ref_y], dims=("lat", "lon", "time")) ref["time"] = xr.cftime_range("1990-01-01", periods=n, calendar="noleap") sim = xr.DataArray([sim_x, sim_y], dims=("lat", "lon", "time")) sim["time"] = ref["time"] PCA = PrincipalComponents(group=group, crd_dims=crd_dims, pts_dims=pts_dims) PCA.train(ref, sim) scen = PCA.adjust(sim) group = group if isinstance(group, Grouper) else Grouper("time") crds = crd_dims or ["lat", "lon"] pts = (pts_dims or []) + ["time"] vec = list({"lat", "lon"} - set(crds) - set(pts)) refs = ref.stack(crd=crds) sims = sim.stack(crd=crds) scens = scen.stack(crd=crds) def _assert(ds): cov_ref = nancov(ds.ref.transpose("crd", "pt")) cov_sim = nancov(ds.sim.transpose("crd", "pt")) cov_scen = nancov(ds.scen.transpose("crd", "pt")) # PC adjustment makes the covariance of scen match the one of ref. np.testing.assert_allclose(cov_ref - cov_scen, 0, atol=1e-6) with pytest.raises(AssertionError): np.testing.assert_allclose(cov_ref - cov_sim, 0, atol=1e-6) def _group_assert(ds, dim): ds = ds.stack(pt=pts) if len(vec) == 1: for v in ds[vec[0]]: _assert(ds.sel({vec[0]: 0})) else: _assert(ds) return ds.unstack("pt") group.apply(_group_assert, {"ref": refs, "sim": sims, "scen": scens}) @pytest.mark.parametrize( "group", [Grouper("time"), Grouper("time.month", window=11)]) def test_real_data(self, group): ds = xr.tutorial.open_dataset("air_temperature") ref = ds.air.isel(lat=21, lon=[40, 52]).drop_vars(["lon", "lat"]) sim = ds.air.isel(lat=18, lon=[17, 35]).drop_vars(["lon", "lat"]) PCA = PrincipalComponents(group=group) PCA.train(ref, sim) scen = PCA.adjust(sim) def dist(ref, sim): """Pointwise distance between ref and sim in the PC space.""" return np.sqrt(((ref - sim)**2).sum("lon")) # Most points are closer after transform. assert (dist(ref, sim) < dist(ref, scen)).mean() < 0.05 # "Error" is very small assert (ref - scen).mean() < 5e-3