示例#1
0
def test_wps_empirical_quantile_mapping(netcdf_sdba_ds, kind, name):
    client = client_for(
        Service(processes=[EmpiricalQuantileMappingProcess()],
                cfgfiles=CFG_FILE))

    sdba_ds, u = netcdf_sdba_ds

    datainputs = (
        f"ref=files@xlink:href=file://{sdba_ds[f'qdm_{name}_ref']};"
        f"hist=files@xlink:href=file://{sdba_ds[f'qdm_{name}_hist']};"
        f"sim=files@xlink:href=file://{sdba_ds[f'qdm_{name}_hist']};"
        "group=time;"
        f"kind={quote_plus(kind)};"
        "nquantiles=50;"
        "interp=linear;")

    resp = client.get(
        f"?service=WPS&request=Execute&version=1.0.0&identifier=empirical_quantile_mapping&datainputs={datainputs}"
    )
    print(resp.response)
    assert_response_success(resp)
    out = get_output(resp.xml)
    p = xr.open_dataset(out["output"][7:])[name]

    uc = convert_calendar(u, "noleap")
    middle = ((uc > 1e-2) * (uc < 0.99)).data

    ref = xr.open_dataset(sdba_ds[f"qdm_{name}_ref"])[name]
    refc = convert_calendar(ref, "noleap")
    np.testing.assert_allclose(p[middle], refc[middle], rtol=0.03)
示例#2
0
    def test_bias_correction(self):

        ds_fut_sub = xr.open_dataset(
            get_local_testdata(
                "cmip5/nasa_nex-gddp-1.0_day_inmcm4_historical+rcp85_nex-gddp_2070-2071_subset.nc",
            )
        )
        ds_ref_sub = xr.open_dataset(
            get_local_testdata(
                "cmip5/nasa_nex-gddp-1.0_day_inmcm4_historical+rcp45_nex-gddp_1971-1972_subset.nc",
            )
        )
        ds_ref_sub = convert_calendar(ds_ref_sub, "noleap")

        ds_his_sub = xr.open_dataset(
            get_local_testdata("nrcan/NRCAN_1971-1972_subset.nc")
        )
        ds_his_sub = convert_calendar(ds_his_sub, "noleap")
        group = xclim.sdba.Grouper("time.month")
        # Train the model to find the correction factors
        Adj = sdba.DetrendedQuantileMapping.train(
            ref=ds_ref_sub["pr"],
            hist=ds_his_sub["pr"],
            nquantiles=50,
            kind="+",
            group=group,
        )

        # Apply the factors to the future data to bias-correct
        Adj.adjust(ds_fut_sub["pr"], interp="linear")

        # Repeat for temperature max
        Adj = sdba.DetrendedQuantileMapping.train(
            ref=ds_ref_sub["tasmax"],
            hist=ds_his_sub["tasmax"],
            nquantiles=50,
            kind="+",
            group=group,
        )

        # Apply the factors to the future data to bias-correct
        Adj.adjust(ds_fut_sub["tasmax"], interp="linear")

        # Repeat for tasmin
        Adj = sdba.DetrendedQuantileMapping.train(
            ref=ds_ref_sub["tasmin"],
            hist=ds_his_sub["tasmin"],
            nquantiles=50,
            kind="+",
            group=group,
        )

        Adj.adjust(ds_fut_sub["tasmin"], interp="linear")
示例#3
0
def test_convert_calendar_360_days(source, target, freq, align_on):
    src = xr.DataArray(
        date_range("2004-01-01", "2004-12-30", freq=freq, calendar=source),
        dims=("time", ),
        name="time",
    )
    da_src = xr.DataArray(np.linspace(0, 1, src.size),
                          dims=("time", ),
                          coords={"time": src})

    conv = convert_calendar(da_src, target, align_on=align_on)

    assert get_calendar(conv) == target

    if align_on == "date":
        np.testing.assert_array_equal(
            conv.time.resample(time="M").last().dt.day,
            [30, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30],
        )
    elif target == "360_day":
        np.testing.assert_array_equal(
            conv.time.resample(time="M").last().dt.day,
            [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29],
        )
    else:
        np.testing.assert_array_equal(
            conv.time.resample(time="M").last().dt.day,
            [30, 29, 30, 30, 31, 30, 30, 31, 30, 31, 29, 31],
        )
    if source == "360_day" and align_on == "year":
        assert conv.size == 360 if freq == "D" else 360 * 4
    else:
        assert conv.size == 359 if freq == "D" else 359 * 4
示例#4
0
def test_convert_calendar(source, target, target_as_str, freq):
    src = xr.DataArray(
        date_range("2004-01-01", "2004-12-31", freq=freq, calendar=source),
        dims=("time", ),
        name="time",
    )
    da_src = xr.DataArray(np.linspace(0, 1, src.size),
                          dims=("time", ),
                          coords={"time": src})
    tgt = xr.DataArray(
        date_range("2004-01-01", "2004-12-31", freq=freq, calendar=target),
        dims=("time", ),
        name="time",
    )

    conv = convert_calendar(da_src, target if target_as_str else tgt)

    assert get_calendar(conv) == target

    if target_as_str and max_doy[source] < max_doy[target]:
        assert conv.size == src.size
    elif not target_as_str:
        assert conv.size == tgt.size

        assert conv.isnull().sum() == max(max_doy[target] - max_doy[source], 0)
示例#5
0
    def test_season(self, tasmin_series, calendar):
        ts = tasmin_series(np.zeros(360))
        ts = convert_calendar(ts, calendar, missing=0, align_on="date")

        miss = missing.missing_any(ts, freq="YS", season="MAM")
        np.testing.assert_equal(miss, [False])

        miss = missing.missing_any(ts, freq="YS", season="JJA")
        np.testing.assert_array_equal(miss, [True, True])

        miss = missing.missing_any(ts, freq="YS", season="SON")
        np.testing.assert_equal(miss, [False])
示例#6
0
def xclim_remove_leapdays(ds):
    """

    Parameters
    ----------
    ds : xr.Dataset

    Returns
    -------
    xr.Dataset
    """
    ds_noleap = convert_calendar(ds, target="noleap")
    return ds_noleap
示例#7
0
def test_convert_calendar_360_days_random():
    da_std = xr.DataArray(
        np.linspace(0, 1, 366 * 2),
        dims=("time",),
        coords={
            "time": date_range(
                "2004-01-01", "2004-12-31T23:59:59", freq="12H", calendar="default"
            )
        },
    )
    da_360 = xr.DataArray(
        np.linspace(0, 1, 360 * 2),
        dims=("time",),
        coords={
            "time": date_range(
                "2004-01-01", "2004-12-30T23:59:59", freq="12H", calendar="360_day"
            )
        },
    )

    conv = convert_calendar(da_std, "360_day", align_on="random")
    assert get_calendar(conv) == "360_day"
    assert conv.size == 720
    conv2 = convert_calendar(da_std, "360_day", align_on="random")
    assert (conv != conv2).any()

    conv = convert_calendar(da_360, "default", align_on="random")
    assert get_calendar(conv) == "default"
    assert conv.size == 720
    assert np.datetime64("2004-02-29") not in conv.time
    conv2 = convert_calendar(da_360, "default", align_on="random")
    assert (conv2 != conv).any()

    conv = convert_calendar(da_360, "noleap", align_on="random", missing=np.NaN)
    conv = conv.where(conv.isnull(), drop=True)
    nandoys = conv.time.dt.dayofyear[::2]
    assert all(nandoys < np.array([74, 147, 220, 293, 366]))
    assert all(nandoys > np.array([0, 73, 146, 219, 292]))
示例#8
0
def test_convert_calendar_missing(source, target, freq):
    src = xr.DataArray(
        date_range(
            "2004-01-01",
            "2004-12-31" if source != "360_day" else "2004-12-30",
            freq=freq,
            calendar=source,
        ),
        dims=("time",),
        name="time",
    )
    da_src = xr.DataArray(
        np.linspace(0, 1, src.size), dims=("time",), coords={"time": src}
    )
    out = convert_calendar(da_src, target, missing=np.nan, align_on="date")
    assert xr.infer_freq(out.time) == freq
    if source == "360_day":
        assert out.time[-1].dt.day == 31
示例#9
0
def aggregate_between_dates(
    data: xr.DataArray,
    start: Union[xr.DataArray, DayOfYearStr],
    end: Union[xr.DataArray, DayOfYearStr],
    op: str = "sum",
    freq: Optional[str] = None,
) -> xr.DataArray:
    """Aggregate the data over a period between start and end dates and apply the operator on the aggregated data.

    Parameters
    ----------
    data : xr.DataArray
      Data to aggregate between start and end dates.
    start : xr.DataArray or DayOfYearStr
      Start dates (as day-of-year) for the aggregation periods.
    end : xr.DataArray or DayOfYearStr
      End (as day-of-year) dates for the aggregation periods.
    op : {'min', 'max', 'sum', 'mean', 'std'}
      Operator.
    freq : str
      Resampling frequency.

    Returns
    -------
    xarray.DataArray, [dimensionless]
      Aggregated data between the start and end dates. If the end date is before the start date, returns np.nan.
      If there is no start and/or end date, returns np.nan.
    """
    def _get_days(_bound, _group, _base_time):
        """Get bound in number of days since base_time. Bound can be a days_since array or a DayOfYearStr."""
        if isinstance(_bound, str):
            b_i = rl.index_of_date(_group.time, _bound, max_idxs=1)  # noqa
            if not len(b_i):
                return None
            return (_group.time.isel(time=b_i[0]) -
                    _group.time.isel(time=0)).dt.days
        if _base_time in _bound.time:
            return _bound.sel(time=_base_time)
        return None

    if freq is None:
        frequencies = []
        for i, bound in enumerate([start, end], start=1):
            try:
                frequencies.append(xr.infer_freq(bound.time))
            except AttributeError:
                frequencies.append(None)

        good_freq = set(frequencies) - {None}

        if len(good_freq) != 1:
            raise ValueError(
                f"Non-inferrable resampling frequency or inconsistent frequencies. Got start, end = {frequencies}."
                " Please consider providing `freq` manually.")
        freq = good_freq.pop()

    cal = get_calendar(data, dim="time")

    if not isinstance(start, str):
        start = convert_calendar(start, cal)
        start.attrs["calendar"] = cal
        start = doy_to_days_since(start)
    if not isinstance(end, str):
        end = convert_calendar(end, cal)
        end.attrs["calendar"] = cal
        end = doy_to_days_since(end)

    out = list()
    for base_time, indexes in data.resample(time=freq).groups.items():
        # get group slice
        group = data.isel(time=indexes)

        start_d = _get_days(start, group, base_time)
        end_d = _get_days(end, group, base_time)

        # convert bounds for this group
        if start_d is not None and end_d is not None:

            days = (group.time - base_time).dt.days
            days[days < 0] = np.nan

            masked = group.where((days >= start_d) & (days <= end_d - 1))
            res = getattr(masked, op)(dim="time", skipna=True)
            res = xr.where(
                ((start_d > end_d) | (start_d.isnull()) | (end_d.isnull())),
                np.nan, res)
            # Re-add the time dimension with the period's base time.
            res = res.expand_dims(time=[base_time])
            out.append(res)
        else:
            # Get an array with the good shape, put nans and add the new time.
            res = (group.isel(time=0) * np.nan).expand_dims(time=[base_time])
            out.append(res)
            continue

    out = xr.concat(out, dim="time")
    return out
示例#10
0
def xclim_convert_360day_calendar_interpolate(
    ds,
    target="noleap",
    align_on="random",
    interpolation="linear",
    return_indices=False,
    ignore_nans=True,
):
    """
    Parameters
    ----------
    ds : xr.Dataset
    target : str
        see xclim.core.calendar.convert_calendar
    align_on : str
        this determines which days in the calendar will have missing values or will be the product of interpolation, if there is.
        It could be every year the same calendar days, or the days could randomly change. see xclim.core.calendar.convert_calendar
    interpolation : None or str
        passed to xr.Dataset.interpolate_na if not None
    return_indices : bool
        on top of the converted dataset, return a list of the array indices identifying values that were inserted.
        This assumes there were no NaNs before conversion.
    ignore_nans : bool
        if False and there are any NaNs in `ds` variables, an assertion error will be raised. NaNs are ignored otherwise.
    Returns
    -------
    tuple(xr.Dataset, xr.Dataset) if return_indices is True, xr.Dataset otherwise.

    Notes
    -----
    The default values of `target`, `align_on` and `interpolation` mean that our default approach is equivalent to that of the LOCA
    calendar conversion [1] for conversion from 360 days calendars to noleap calendars. In that approach, 5 calendar days are added (noleap
    calendars always have 365 days) to each year. But those calendar days are not necessarily those that will have their value be the product
    of interpolation. The days for which we interpolate are selected randomly every block of 72 days, so that they change every year.

    [1] http://loca.ucsd.edu/loca-calendar/
    """

    if get_calendar(ds) != "360_day":
        raise ValueError(
            "tried to use 360 day calendar conversion for a non-360-day calendar dataset"
        )

    if not ignore_nans:
        for var in ds:
            assert (
                ds[var].isnull().sum() == 0
            ), "360 days calendar conversion with interpolation : there are nans !"

    ds_converted = convert_calendar(
        ds, target=target, align_on=align_on, missing=np.NaN
    )

    if interpolation:
        ds_out = ds_converted.interpolate_na("time", interpolation)
    else:
        ds_out = ds_converted

    if return_indices:
        return (ds_out, xr.ufuncs.isnan(ds_converted))
    else:
        return ds_out
示例#11
0
def _ens_align_datasets(
    datasets: List[Union[xr.Dataset, Path, str, List[Union[Path, str]]]],
    mf_flag: bool = False,
    resample_freq: str = None,
    calendar: str = "default",
    **xr_kwargs,
) -> List[xr.Dataset]:
    """Create a list of aligned xarray Datasets for ensemble Dataset creation.

    Parameters
    ----------
    datasets : List[Union[xr.Dataset, xr.DataArray, Path, str, List[Path, str]]]
      List of netcdf file paths or xarray Dataset/DataArray objects . If mf_flag is True, ncfiles should be a list of lists where
      each sublist contains input .nc files of an xarray multifile Dataset. DataArrays should have a name so they can be converted to datasets.
    mf_flag : bool
      If True climate simulations are treated as xarray multifile datasets before concatenation.
      Only applicable when datasets is a sequence of file paths.
    resample_freq : Optional[str]
      If the members of the ensemble have the same frequency but not the same offset, they cannot be properly aligned.
      If resample_freq is set, the time coordinate of each members will be modified to fit this frequency.
    calendar : str
      The calendar of the time coordinate of the ensemble. For conversions involving '360_day', the align_on='date' option is used.
      See `xclim.core.calendar.convert_calendar`. 'default' is the standard calendar using np.datetime64 objects.
    xr_kwargs :
      Any keyword arguments to be given to xarray when opening the files.

    Returns
    -------
    List[xr.Dataset]
    """
    xr_kwargs.setdefault("chunks", "auto")
    xr_kwargs.setdefault("decode_times", False)

    ds_all = []
    for i, n in enumerate(datasets):
        logging.info(f"Accessing {n} of {len(datasets)}")
        if mf_flag:
            ds = xr.open_mfdataset(n, combine="by_coords", **xr_kwargs)
        else:
            if isinstance(n, xr.Dataset):
                ds = n
            elif isinstance(n, xr.DataArray):
                ds = n.to_dataset()
            else:
                ds = xr.open_dataset(n, **xr_kwargs)

        if "time" in ds.coords:
            time = xr.decode_cf(ds).time

            if resample_freq is not None:
                counts = time.resample(time=resample_freq).count()
                if any(counts > 1):
                    raise ValueError(
                        f"Alignment of dataset #{i:02d} failed : its time axis cannot be resampled to freq {resample_freq}."
                    )
                time = counts.time

            ds["time"] = time

            cal = get_calendar(time)
            ds = convert_calendar(
                ds,
                calendar,
                align_on="date" if "360_day" in [cal, calendar] else None,
            )

        ds_all.append(ds)

    return ds_all
示例#12
0
    def _handler(self, request, response):
        def _log(message, percentage):
            write_log(self, message, subtask_percentage=percentage)

        res = {}
        group = {}
        train = {}
        adj = {}

        variable = request.inputs.pop(wpsio.variable.identifier, None)

        for key, input in request.inputs.items():
            if key in resources:
                ds = try_opendap(request.inputs[key][0])
                name = variable or list(ds.data_vars)[0]

                # Force calendar to noleap
                res[key] = convert_calendar(ds[name], "noleap")

            elif key in group_args:
                group[key] = single_input_or_none(request.inputs, key)

            elif key in adjust_args:
                adj[key] = single_input_or_none(request.inputs, key)

            else:
                train[key] = single_input_or_none(request.inputs, key)

        _log("Successfully read inputs from request.", 1)

        group = xclim.sdba.Grouper(**group)
        _log("Grouper object created.", 2)

        bc = xclim.sdba.EmpiricalQuantileMapping.train(res["ref"],
                                                       res["hist"],
                                                       **train,
                                                       group=group)

        _log("Training object created.", 3)

        out = bc.adjust(res["sim"], **adj).to_dataset(name=name)
        _log("Adjustment object created.", 5)

        filename = valid_filename(
            single_input_or_none(request.inputs, "output_name")
            or "bias_corrected")
        out_fn = Path(self.workdir) / f"{filename}.nc"
        with FinchProgressBar(
                logging_function=_log,
                start_percentage=5,
                end_percentage=98,
                width=15,
                dt=1,
        ):
            dataset_to_netcdf(out, out_fn)

        metalink = make_metalink_output(self, [out_fn])

        response.outputs["output"].file = str(out_fn)
        response.outputs["output_log"].file = str(log_file_path(self))
        response.outputs["ref"].data = metalink.xml

        write_log(self,
                  "Processing finished successfully",
                  process_step="done")

        return response