Пример #1
0
def test_adapt_freq_add_dims(use_dask):
    time = pd.date_range("1990-01-01", "2020-12-31", freq="D")
    prvals = np.random.randint(0, 100, size=(time.size, 3))
    pr = xr.DataArray(
        prvals,
        coords={
            "time": time,
            "lat": [0, 1, 2]
        },
        dims=("time", "lat"),
        attrs={"units": "mm d-1"},
    )

    if use_dask:
        pr = pr.chunk()
    group = Grouper("time.month", add_dims=["lat"])
    with xr.set_options(keep_attrs=True):
        prsim = xr.where(pr < 20, pr / 20, pr)
        prref = xr.where(pr < 10, pr / 20, pr)
    sim_ad, pth, dP0 = adapt_freq(prref, prsim, thresh="1 mm d-1", group=group)
    assert set(sim_ad.dims) == set(prsim.dims)
    assert "lat" not in pth.dims

    group = Grouper("time.dayofyear", window=5)
    with xr.set_options(keep_attrs=True):
        prsim = xr.where(pr < 20, pr / 20, pr)
        prref = xr.where(pr < 10, pr / 20, pr)
    sim_ad, pth, dP0 = adapt_freq(prref, prsim, thresh="1 mm d-1", group=group)
    assert set(sim_ad.dims) == set(prsim.dims)
Пример #2
0
    def test_add_dims(self, use_dask):
        if use_dask:
            chunks = {"location": -1}
        else:
            chunks = None
        ref = (open_dataset(
            "sdba/ahccd_1950-2013.nc",
            chunks=chunks,
            drop_variables=["lat",
                            "lon"]).sel(time=slice("1981", "2010")).tasmax)
        ref = convert_units_to(ref, "K")
        ref = ref.isel(location=1, drop=True).expand_dims(location=["Amos"])

        dsim = open_dataset("sdba/CanESM2_1950-2100.nc",
                            chunks=chunks,
                            drop_variables=["lat", "lon"]).tasmax
        hist = dsim.sel(time=slice("1981", "2010"))
        sim = dsim.sel(time=slice("2041", "2070"))

        # With add_dims, "does it run" test
        group = Grouper("time.dayofyear", window=5, add_dims=["location"])
        EQM = EmpiricalQuantileMapping.train(ref, hist, group=group)
        EQM.adjust(sim).load()

        # Without, sanity test.
        group = Grouper("time.dayofyear", window=5)
        EQM2 = EmpiricalQuantileMapping.train(ref, hist, group=group)
        scen2 = EQM2.adjust(sim).load()
        assert scen2.sel(location=["Kugluktuk", "Vancouver"]).isnull().all()
Пример #3
0
def test_grouper_group(tas_series, group, window, nvals):
    tas = tas_series(np.ones(366), start="2000-01-01")

    grouper = Grouper(group, window=window)
    grpd = grouper.group(tas)

    if window > 1:
        assert "window" in grpd.dims

    assert grpd.count().max() == nvals
Пример #4
0
    def test_real_data(self):

        dsim = open_dataset("sdba/CanESM2_1950-2100.nc").chunk()
        dref = open_dataset("sdba/ahccd_1950-2013.nc").chunk()

        ref = convert_units_to(dref.sel(time=slice("1950", "2009")).pr, "mm/d")
        hist = convert_units_to(
            dsim.sel(time=slice("1950", "2009")).pr, "mm/d")

        quantiles = np.linspace(0.01, 0.99, num=50)

        with xr.set_options(keep_attrs=True):
            ref = ref + uniform_noise_like(ref, low=1e-6, high=1e-3)
            hist = hist + uniform_noise_like(hist, low=1e-6, high=1e-3)

        EQM = EmpiricalQuantileMapping.train(ref,
                                             hist,
                                             group=Grouper("time.dayofyear",
                                                           window=31),
                                             nquantiles=quantiles)

        scen = EQM.adjust(hist, interp="linear", extrapolation="constant")

        EX = ExtremeValues.train(ref,
                                 hist,
                                 cluster_thresh="1 mm/day",
                                 q_thresh=0.97)
        new_scen = EX.adjust(scen, hist, frac=0.000000001)
        new_scen.load()
Пример #5
0
def test_param_class():
    gr = Grouper(group="time.month")
    in_params = dict(
        anint=4, abool=True, astring="a string", adict={"key": "val"}, group=gr
    )
    obj = Parametrizable(**in_params)

    assert obj.parameters == in_params

    repr(obj).startswith(
        "ParametrizableClass(anint=4, abool=True, astring='a string', adict={'key': 'val'}, "
        "group=Grouper(dim='time',"
    )
Пример #6
0
def test_interp_on_quantiles_monthly():
    t = xr.cftime_range("2000-01-01",
                        "2030-12-31",
                        freq="D",
                        calendar="noleap")
    ref = xr.DataArray(
        (-20 * np.cos(2 * np.pi * t.dayofyear / 365) +
         2 * np.random.random_sample((t.size, )) + 273.15 + 0.1 *
         (t - t[0]).days / 365),  # "warming" of 1K per decade,
        dims=("time", ),
        coords={"time": t},
        attrs={"units": "K"},
    )
    sim = xr.DataArray(
        (-18 * np.cos(2 * np.pi * t.dayofyear / 365) +
         2 * np.random.random_sample((t.size, )) + 273.15 + 0.11 *
         (t - t[0]).days / 365),  # "warming" of 1.1K per decade
        dims=("time", ),
        coords={"time": t},
        attrs={"units": "K"},
    )

    ref = ref.sel(time=slice(None, "2015-01-01"))
    hist = sim.sel(time=slice(None, "2015-01-01"))

    group = Grouper("time.month")
    quantiles = u.equally_spaced_nodes(15, eps=1e-6)
    ref_q = group.apply(nbu.quantile, ref, main_only=True, q=quantiles)
    hist_q = group.apply(nbu.quantile, hist, main_only=True, q=quantiles)
    af = u.get_correction(hist_q, ref_q, "+")

    for interp in ["nearest", "linear", "cubic"]:
        afi = u.interp_on_quantiles(sim,
                                    hist_q,
                                    af,
                                    group="time.month",
                                    method=interp,
                                    extrapolation="constant")
        assert afi.isnull().sum("time") == 0, interp
Пример #7
0
    def test_simple(self, group, crd_dims, pts_dims):
        n = 15 * 365
        m = 2  # A dummy dimension to test vectorizing.
        ref_y = norm.rvs(loc=10, scale=1, size=(m, n))
        ref_x = norm.rvs(loc=3, scale=2, size=(m, n))
        sim_x = norm.rvs(loc=4, scale=2, size=(m, n))
        sim_y = sim_x + norm.rvs(loc=1, scale=1, size=(m, n))

        ref = xr.DataArray([ref_x, ref_y], dims=("lat", "lon", "time"))
        ref["time"] = xr.cftime_range("1990-01-01",
                                      periods=n,
                                      calendar="noleap")
        sim = xr.DataArray([sim_x, sim_y], dims=("lat", "lon", "time"))
        sim["time"] = ref["time"]

        PCA = PrincipalComponents(group=group,
                                  crd_dims=crd_dims,
                                  pts_dims=pts_dims)
        PCA.train(ref, sim)
        scen = PCA.adjust(sim)

        group = group if isinstance(group, Grouper) else Grouper("time")
        crds = crd_dims or ["lat", "lon"]
        pts = (pts_dims or []) + ["time"]

        vec = list({"lat", "lon"} - set(crds) - set(pts))
        refs = ref.stack(crd=crds)
        sims = sim.stack(crd=crds)
        scens = scen.stack(crd=crds)

        def _assert(ds):
            cov_ref = nancov(ds.ref.transpose("crd", "pt"))
            cov_sim = nancov(ds.sim.transpose("crd", "pt"))
            cov_scen = nancov(ds.scen.transpose("crd", "pt"))

            # PC adjustment makes the covariance of scen match the one of ref.
            np.testing.assert_allclose(cov_ref - cov_scen, 0, atol=1e-6)
            with pytest.raises(AssertionError):
                np.testing.assert_allclose(cov_ref - cov_sim, 0, atol=1e-6)

        def _group_assert(ds, dim):
            ds = ds.stack(pt=pts)
            if len(vec) == 1:
                for v in ds[vec[0]]:
                    _assert(ds.sel({vec[0]: 0}))
            else:
                _assert(ds)
            return ds.unstack("pt")

        group.apply(_group_assert, {"ref": refs, "sim": sims, "scen": scens})
Пример #8
0
def test_interp_on_quantiles(shape, group, method):
    group = Grouper(group)
    raw = np.random.random_sample(shape)  # [0, 1]
    t = pd.date_range("2000-01-01", periods=shape[0], freq="D")
    # obs : [9, 11]
    obs = xr.DataArray(raw * 2 + 9,
                       dims=("time", "lat", "lon")[:len(shape)],
                       coords={"time": t})
    # sim [9, 11.4] (x1.2 + 0.2)
    sim = xr.DataArray(raw * 2.4 + 9,
                       dims=("time", "lat", "lon")[:len(shape)],
                       coords={"time": t})
    # fut [9.02, 11.38] (x1.18 + 0.2) In order to have every point of fut inside the range of sim
    fut_raw = raw * 2.36 + 9.02
    fut_raw[np.array([100, 300, 500,
                      700])] = 1000  # Points outside the sim range will be NaN
    fut = xr.DataArray(fut_raw,
                       dims=("time", "lat", "lon")[:len(shape)],
                       coords={"time": t})

    q = np.linspace(0, 1, 11)
    xq = group.apply("quantile", sim, q=q).rename(quantile="quantiles")
    yq = group.apply("quantile", obs, q=q).rename(quantile="quantiles")

    fut_corr = u.interp_on_quantiles(
        fut, xq, yq, group=group,
        method=method).transpose(*("time", "lat", "lon")[:len(shape)])

    if method == "nearest":
        np.testing.assert_allclose(fut_corr.values, obs.values, rtol=0.3)
        assert fut_corr.isnull().sum() == 0
    else:
        np.testing.assert_allclose(fut_corr.values,
                                   obs.where(fut != 1000).values,
                                   rtol=2e-3)
        xr.testing.assert_equal(fut_corr.isnull(), fut == 1000)
Пример #9
0
    def test_add_dim(self, series, mon_series):
        n = 10000
        u = np.random.rand(n, 4)

        xd = uniform(loc=2, scale=1)
        x = xd.ppf(u)

        hist = sim = series(x, "tas")
        ref = mon_series(apply_correction(x, 2, "+"), "tas")

        group = Grouper("time.month", add_dims=["lon"])

        scaling = Scaling.train(ref, hist, group=group, kind="+")
        assert "lon" not in scaling.ds
        p = scaling.adjust(sim)
        assert "lon" in p.dims
        np.testing.assert_array_almost_equal(p.transpose(*ref.dims), ref)
Пример #10
0
def test_param_class():
    gr = Grouper(group="time.month")
    in_params = dict(
        anint=4, abool=True, astring="a string", adict={"key": "val"}, group=gr
    )
    obj = Parametrizable(**in_params)

    assert obj.parameters == in_params

    assert repr(obj).startswith(
        "Parametrizable(anint=4, abool=True, astring='a string', adict={'key': 'val'}, "
        "group=Grouper("
    )

    s = jsonpickle.encode(obj)
    obj2 = jsonpickle.decode(s)
    assert obj.parameters == obj2.parameters
Пример #11
0
def test_adapt_freq(use_dask):
    time = pd.date_range("1990-01-01", "2020-12-31", freq="D")
    prvals = np.random.randint(0, 100, size=(time.size, 3))
    pr = xr.DataArray(
        prvals,
        coords={
            "time": time,
            "lat": [0, 1, 2]
        },
        dims=("time", "lat"),
        attrs={"units": "mm d-1"},
    )

    if use_dask:
        pr = pr.chunk({"lat": 1})
    group = Grouper("time.month")
    with xr.set_options(keep_attrs=True):
        prsim = xr.where(pr < 20, pr / 20, pr)
        prref = xr.where(pr < 10, pr / 20, pr)
    sim_ad, pth, dP0 = adapt_freq(prref, prsim, thresh="1 mm d-1", group=group)

    # Where the input is considered zero
    input_zeros = sim_ad.where(prsim <= 1)

    # The proportion of corrected values (time.size * 3 * 0.2 is the theoritical number of values under 1 in prsim)
    dP0_out = (input_zeros > 1).sum() / (time.size * 3 * 0.2)
    np.testing.assert_allclose(dP0_out, 0.5, atol=0.1)

    # Assert that corrected values were generated in the range ]1, 20 + tol[
    corrected = (input_zeros.where(input_zeros > 1).stack(
        flat=["lat", "time"]).reset_index("flat").dropna("flat"))
    assert ((corrected < 20.1) & (corrected > 1)).all()

    # Assert that non-corrected values are untouched
    # Again we add a 0.5 tol because of randomness.
    xr.testing.assert_equal(
        sim_ad.where(prsim > 20.1),
        prsim.where(prsim > 20.5).transpose("lat", "time"),
    )
    # Assert that Pth and dP0 are approx the good values
    np.testing.assert_allclose(pth, 20, rtol=0.05)
    np.testing.assert_allclose(dP0, 0.5, atol=0.14)
    assert sim_ad.units == "mm d-1"
    assert sim_ad.attrs["references"].startswith("Themeßl")
    assert pth.units == "mm d-1"
Пример #12
0
    def test_dask_julia(self):

        dsim = open_dataset("sdba/CanESM2_1950-2100.nc").chunk()
        dref = open_dataset("sdba/ahccd_1950-2013.nc").chunk()
        dexp = open_dataset("sdba/adjusted_external.nc")

        ref = convert_units_to(dref.sel(time=slice("1950", "2009")).pr, "mm/d")
        hist = convert_units_to(
            dsim.sel(time=slice("1950", "2009")).pr, "mm/d")

        quantiles = np.linspace(0.01, 0.99, num=50)

        EQM = EmpiricalQuantileMapping(group=Grouper("time.dayofyear",
                                                     window=31),
                                       nquantiles=quantiles)

        with xr.set_options(keep_attrs=True):
            ref = ref + uniform_noise_like(ref, low=1e-6, high=1e-3)
            hist = hist + uniform_noise_like(hist, low=1e-6, high=1e-3)

        EQM.train(ref, hist)
        scen = EQM.adjust(hist, interp="linear", extrapolation="constant")

        EX = ExtremeValues(cluster_thresh="1 mm/day", q_thresh=0.97)
        EX.train(ref, hist)
        new_scen = EX.adjust(scen, hist, frac=0.000000001)

        new_scen.load()

        exp_scen = dexp.extreme_values_julia
        xr.testing.assert_allclose(
            new_scen.where(new_scen != scen).transpose("time", "location"),
            exp_scen.where(new_scen != scen).transpose("time", "location"),
            atol=0.005,
            rtol=2e-3,
        )
Пример #13
0
def test_grouper_apply(tas_series, use_dask, group, n):
    tas1 = tas_series(np.arange(366), start="2000-01-01")
    tas0 = tas_series(np.zeros(366), start="2000-01-01")
    tas = xr.concat((tas1, tas0), dim="lat")

    grouper = Grouper(group)
    if not group.startswith("time"):
        tas = tas.rename(time=grouper.dim)
        tas1 = tas1.rename(time=grouper.dim)
        tas0 = tas0.rename(time=grouper.dim)

    if use_dask:
        tas = tas.chunk({"lat": 1, grouper.dim: -1})
        tas0 = tas1.chunk({grouper.dim: -1})
        tas1 = tas0.chunk({grouper.dim: -1})

    # Normal monthly mean
    out_mean = grouper.apply("mean", tas)
    if grouper.prop:
        exp = tas.groupby(group).mean()
    else:
        exp = tas.mean(dim=grouper.dim)
    np.testing.assert_array_equal(out_mean, exp)

    # With additionnal dimension included
    grouper = Grouper(group, add_dims=["lat"])
    out = grouper.apply("mean", tas)
    assert out.ndim == int(grouper.prop is not None)
    np.testing.assert_array_equal(out, exp.mean("lat"))
    assert out.attrs["group"] == group
    assert out.attrs["group_compute_dims"] == [grouper.dim, "lat"]
    assert out.attrs["group_window"] == 1

    # Additionnal but main_only
    out = grouper.apply("mean", tas, main_only=True)
    np.testing.assert_array_equal(out, out_mean)

    # With window
    win_grouper = Grouper(group, window=5)
    out = win_grouper.apply("mean", tas)
    rolld = tas.rolling({
        win_grouper.dim: 5
    }, center=True).construct(window_dim="window")
    if grouper.prop:
        exp = rolld.groupby(group).mean(dim=[win_grouper.dim, "window"])
    else:
        exp = rolld.mean(dim=[grouper.dim, "window"])
    np.testing.assert_array_equal(out, exp)

    # With function + nongrouping-grouped
    grouper = Grouper(group)

    def normalize(grp, dim):
        return grp / grp.mean(dim=dim)

    normed = grouper.apply(normalize, tas)
    assert normed.shape == tas.shape
    if use_dask:
        assert normed.chunks == ((1, 1), (366, ))

    # With window + nongrouping-grouped
    out = win_grouper.apply(normalize, tas)
    assert out.shape == tas.shape

    # Mixed output
    def mixed_reduce(grdds, dim=None):
        tas1 = grdds.tas1.mean(dim=dim)
        tas0 = grdds.tas0 / grdds.tas0.mean(dim=dim)
        tas1.attrs["_group_apply_reshape"] = True
        return xr.Dataset(data_vars={"tas1_mean": tas1, "norm_tas0": tas0})

    out = grouper.apply(mixed_reduce, {"tas1": tas1, "tas0": tas0})
    if grouper.prop:
        assert grouper.prop not in out.norm_tas0.dims
        assert grouper.prop in out.tas1_mean.dims

    if use_dask:
        assert out.tas1_mean.chunks == (((n, ), ) if grouper.prop else tuple())
        assert out.norm_tas0.chunks == ((366, ), )

    # Mixed input
    if grouper.prop:

        def normalize_from_precomputed(grpds, dim=None):
            return (grpds.tas / grpds.tas1_mean).mean(dim=dim)

        out = grouper.apply(normalize_from_precomputed, {
            "tas": tas,
            "tas1_mean": out.tas1_mean
        }).isel(lat=0)
        exp = normed.groupby(group).mean().isel(lat=0)
        assert grouper.prop in out.dims
        np.testing.assert_array_equal(out, exp)
Пример #14
0
def test_grouper_get_index(tas_series, group, interp, val90):
    tas = tas_series(np.ones(366), start="2000-01-01")
    grouper = Grouper(group, interp=interp)
    indx = grouper.get_index(tas)
    # 90 is March 31st
    assert indx[90] == val90
Пример #15
0
def test_raise_on_multiple_chunks(tas_series):
    ref = tas_series(np.arange(730).astype(float)).chunk({"time": 365})
    with pytest.raises(ValueError):
        EmpiricalQuantileMapping.train(ref, ref, group=Grouper("time.month"))
Пример #16
0
class TestPrincipalComponents:
    @pytest.mark.parametrize(
        "group", (Grouper("time.month"), Grouper("time", add_dims=["lon"])))
    def test_simple(self, group):
        n = 15 * 365
        m = 2  # A dummy dimension to test vectorizing.
        ref_y = norm.rvs(loc=10, scale=1, size=(m, n))
        ref_x = norm.rvs(loc=3, scale=2, size=(m, n))
        sim_x = norm.rvs(loc=4, scale=2, size=(m, n))
        sim_y = sim_x + norm.rvs(loc=1, scale=1, size=(m, n))

        ref = xr.DataArray([ref_x, ref_y],
                           dims=("lat", "lon", "time"),
                           attrs={"units": "degC"})
        ref["time"] = xr.cftime_range("1990-01-01",
                                      periods=n,
                                      calendar="noleap")
        sim = xr.DataArray([sim_x, sim_y],
                           dims=("lat", "lon", "time"),
                           attrs={"units": "degC"})
        sim["time"] = ref["time"]

        PCA = PrincipalComponents.train(ref, sim, group=group, crd_dim="lat")
        scen = PCA.adjust(sim)

        def _assert(ds):
            cov_ref = nancov(ds.ref.transpose("lat", "pt"))
            cov_sim = nancov(ds.sim.transpose("lat", "pt"))
            cov_scen = nancov(ds.scen.transpose("lat", "pt"))

            # PC adjustment makes the covariance of scen match the one of ref.
            np.testing.assert_allclose(cov_ref - cov_scen, 0, atol=1e-6)
            with pytest.raises(AssertionError):
                np.testing.assert_allclose(cov_ref - cov_sim, 0, atol=1e-6)

        def _group_assert(ds, dim):
            if "lon" not in dim:
                for lon in ds.lon:
                    _assert(ds.sel(lon=lon).stack(pt=dim))
            else:
                _assert(ds.stack(pt=dim))
            return ds

        group.apply(_group_assert, {"ref": ref, "sim": sim, "scen": scen})

    @pytest.mark.parametrize("use_dask", [True, False])
    @pytest.mark.parametrize("pcorient", ["full", "simple"])
    def test_real_data(self, atmosds, use_dask, pcorient):
        ref = stack_variables(
            xr.Dataset({
                "tasmax": atmosds.tasmax,
                "tasmin": atmosds.tasmin,
                "tas": atmosds.tas
            })).isel(location=3)
        hist = stack_variables(
            xr.Dataset({
                "tasmax": 1.001 * atmosds.tasmax,
                "tasmin": atmosds.tasmin - 0.25,
                "tas": atmosds.tas + 1,
            })).isel(location=3)
        with xr.set_options(keep_attrs=True):
            sim = hist + 5
            sim["time"] = sim.time + np.timedelta64(10, "Y").astype("<m8[ns]")

        if use_dask:
            ref = ref.chunk()
            hist = hist.chunk()
            sim = sim.chunk()

        PCA = PrincipalComponents.train(ref,
                                        hist,
                                        crd_dim="multivar",
                                        best_orientation=pcorient)
        scen = PCA.adjust(sim)

        def dist(ref, sim):
            """Pointwise distance between ref and sim in the PC space."""
            sim["time"] = ref.time
            return np.sqrt(((ref - sim)**2).sum("multivar"))

        # Most points are closer after transform.
        assert (dist(ref, sim) < dist(ref, scen)).mean() < 0.01

        ref = unstack_variables(ref)
        scen = unstack_variables(scen)
        # "Error" is very small
        assert (ref - scen).mean().tasmin < 5e-3
Пример #17
0
def test_raise_on_multiple_chunks(tas_series):
    ref = tas_series(np.arange(730)).chunk({"time": 365})
    Adj = BaseAdjustment(group=Grouper("time.month"))
    with pytest.raises(ValueError):
        Adj.train(ref, ref)
Пример #18
0
class TestPrincipalComponents:
    @pytest.mark.parametrize(
        "group,crd_dims,pts_dims",
        (
            ["time", ["lat"], None],  # Lon as vectorizing dim
            ["time", None, None],  # Lon as second coord dims
            ["time", ["lat"], ["lon"]],  # Lon as a Points dim
            # Testing time grouping, vectorization on lon
            [Grouper("time.month"), ["lat"], None],
        ),
    )
    def test_simple(self, group, crd_dims, pts_dims):
        n = 15 * 365
        m = 2  # A dummy dimension to test vectorizing.
        ref_y = norm.rvs(loc=10, scale=1, size=(m, n))
        ref_x = norm.rvs(loc=3, scale=2, size=(m, n))
        sim_x = norm.rvs(loc=4, scale=2, size=(m, n))
        sim_y = sim_x + norm.rvs(loc=1, scale=1, size=(m, n))

        ref = xr.DataArray([ref_x, ref_y], dims=("lat", "lon", "time"))
        ref["time"] = xr.cftime_range("1990-01-01",
                                      periods=n,
                                      calendar="noleap")
        sim = xr.DataArray([sim_x, sim_y], dims=("lat", "lon", "time"))
        sim["time"] = ref["time"]

        PCA = PrincipalComponents(group=group,
                                  crd_dims=crd_dims,
                                  pts_dims=pts_dims)
        PCA.train(ref, sim)
        scen = PCA.adjust(sim)

        group = group if isinstance(group, Grouper) else Grouper("time")
        crds = crd_dims or ["lat", "lon"]
        pts = (pts_dims or []) + ["time"]

        vec = list({"lat", "lon"} - set(crds) - set(pts))
        refs = ref.stack(crd=crds)
        sims = sim.stack(crd=crds)
        scens = scen.stack(crd=crds)

        def _assert(ds):
            cov_ref = nancov(ds.ref.transpose("crd", "pt"))
            cov_sim = nancov(ds.sim.transpose("crd", "pt"))
            cov_scen = nancov(ds.scen.transpose("crd", "pt"))

            # PC adjustment makes the covariance of scen match the one of ref.
            np.testing.assert_allclose(cov_ref - cov_scen, 0, atol=1e-6)
            with pytest.raises(AssertionError):
                np.testing.assert_allclose(cov_ref - cov_sim, 0, atol=1e-6)

        def _group_assert(ds, dim):
            ds = ds.stack(pt=pts)
            if len(vec) == 1:
                for v in ds[vec[0]]:
                    _assert(ds.sel({vec[0]: 0}))
            else:
                _assert(ds)
            return ds.unstack("pt")

        group.apply(_group_assert, {"ref": refs, "sim": sims, "scen": scens})

    @pytest.mark.parametrize(
        "group",
        [Grouper("time"), Grouper("time.month", window=11)])
    def test_real_data(self, group):
        ds = xr.tutorial.open_dataset("air_temperature")

        ref = ds.air.isel(lat=21, lon=[40, 52]).drop_vars(["lon", "lat"])
        sim = ds.air.isel(lat=18, lon=[17, 35]).drop_vars(["lon", "lat"])

        PCA = PrincipalComponents(group=group)
        PCA.train(ref, sim)
        scen = PCA.adjust(sim)

        def dist(ref, sim):
            """Pointwise distance between ref and sim in the PC space."""
            return np.sqrt(((ref - sim)**2).sum("lon"))

        # Most points are closer after transform.
        assert (dist(ref, sim) < dist(ref, scen)).mean() < 0.05

        # "Error" is very small
        assert (ref - scen).mean() < 5e-3