def test_wps_empirical_quantile_mapping(netcdf_sdba_ds, kind, name): client = client_for( Service(processes=[EmpiricalQuantileMappingProcess()], cfgfiles=CFG_FILE)) sdba_ds, u = netcdf_sdba_ds datainputs = ( f"ref=files@xlink:href=file://{sdba_ds[f'qdm_{name}_ref']};" f"hist=files@xlink:href=file://{sdba_ds[f'qdm_{name}_hist']};" f"sim=files@xlink:href=file://{sdba_ds[f'qdm_{name}_hist']};" "group=time;" f"kind={quote_plus(kind)};" "nquantiles=50;" "interp=linear;") resp = client.get( f"?service=WPS&request=Execute&version=1.0.0&identifier=empirical_quantile_mapping&datainputs={datainputs}" ) print(resp.response) assert_response_success(resp) out = get_output(resp.xml) p = xr.open_dataset(out["output"][7:])[name] uc = convert_calendar(u, "noleap") middle = ((uc > 1e-2) * (uc < 0.99)).data ref = xr.open_dataset(sdba_ds[f"qdm_{name}_ref"])[name] refc = convert_calendar(ref, "noleap") np.testing.assert_allclose(p[middle], refc[middle], rtol=0.03)
def test_bias_correction(self): ds_fut_sub = xr.open_dataset( get_local_testdata( "cmip5/nasa_nex-gddp-1.0_day_inmcm4_historical+rcp85_nex-gddp_2070-2071_subset.nc", ) ) ds_ref_sub = xr.open_dataset( get_local_testdata( "cmip5/nasa_nex-gddp-1.0_day_inmcm4_historical+rcp45_nex-gddp_1971-1972_subset.nc", ) ) ds_ref_sub = convert_calendar(ds_ref_sub, "noleap") ds_his_sub = xr.open_dataset( get_local_testdata("nrcan/NRCAN_1971-1972_subset.nc") ) ds_his_sub = convert_calendar(ds_his_sub, "noleap") group = xclim.sdba.Grouper("time.month") # Train the model to find the correction factors Adj = sdba.DetrendedQuantileMapping.train( ref=ds_ref_sub["pr"], hist=ds_his_sub["pr"], nquantiles=50, kind="+", group=group, ) # Apply the factors to the future data to bias-correct Adj.adjust(ds_fut_sub["pr"], interp="linear") # Repeat for temperature max Adj = sdba.DetrendedQuantileMapping.train( ref=ds_ref_sub["tasmax"], hist=ds_his_sub["tasmax"], nquantiles=50, kind="+", group=group, ) # Apply the factors to the future data to bias-correct Adj.adjust(ds_fut_sub["tasmax"], interp="linear") # Repeat for tasmin Adj = sdba.DetrendedQuantileMapping.train( ref=ds_ref_sub["tasmin"], hist=ds_his_sub["tasmin"], nquantiles=50, kind="+", group=group, ) Adj.adjust(ds_fut_sub["tasmin"], interp="linear")
def test_convert_calendar_360_days(source, target, freq, align_on): src = xr.DataArray( date_range("2004-01-01", "2004-12-30", freq=freq, calendar=source), dims=("time", ), name="time", ) da_src = xr.DataArray(np.linspace(0, 1, src.size), dims=("time", ), coords={"time": src}) conv = convert_calendar(da_src, target, align_on=align_on) assert get_calendar(conv) == target if align_on == "date": np.testing.assert_array_equal( conv.time.resample(time="M").last().dt.day, [30, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30], ) elif target == "360_day": np.testing.assert_array_equal( conv.time.resample(time="M").last().dt.day, [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 29], ) else: np.testing.assert_array_equal( conv.time.resample(time="M").last().dt.day, [30, 29, 30, 30, 31, 30, 30, 31, 30, 31, 29, 31], ) if source == "360_day" and align_on == "year": assert conv.size == 360 if freq == "D" else 360 * 4 else: assert conv.size == 359 if freq == "D" else 359 * 4
def test_convert_calendar(source, target, target_as_str, freq): src = xr.DataArray( date_range("2004-01-01", "2004-12-31", freq=freq, calendar=source), dims=("time", ), name="time", ) da_src = xr.DataArray(np.linspace(0, 1, src.size), dims=("time", ), coords={"time": src}) tgt = xr.DataArray( date_range("2004-01-01", "2004-12-31", freq=freq, calendar=target), dims=("time", ), name="time", ) conv = convert_calendar(da_src, target if target_as_str else tgt) assert get_calendar(conv) == target if target_as_str and max_doy[source] < max_doy[target]: assert conv.size == src.size elif not target_as_str: assert conv.size == tgt.size assert conv.isnull().sum() == max(max_doy[target] - max_doy[source], 0)
def test_season(self, tasmin_series, calendar): ts = tasmin_series(np.zeros(360)) ts = convert_calendar(ts, calendar, missing=0, align_on="date") miss = missing.missing_any(ts, freq="YS", season="MAM") np.testing.assert_equal(miss, [False]) miss = missing.missing_any(ts, freq="YS", season="JJA") np.testing.assert_array_equal(miss, [True, True]) miss = missing.missing_any(ts, freq="YS", season="SON") np.testing.assert_equal(miss, [False])
def xclim_remove_leapdays(ds): """ Parameters ---------- ds : xr.Dataset Returns ------- xr.Dataset """ ds_noleap = convert_calendar(ds, target="noleap") return ds_noleap
def test_convert_calendar_360_days_random(): da_std = xr.DataArray( np.linspace(0, 1, 366 * 2), dims=("time",), coords={ "time": date_range( "2004-01-01", "2004-12-31T23:59:59", freq="12H", calendar="default" ) }, ) da_360 = xr.DataArray( np.linspace(0, 1, 360 * 2), dims=("time",), coords={ "time": date_range( "2004-01-01", "2004-12-30T23:59:59", freq="12H", calendar="360_day" ) }, ) conv = convert_calendar(da_std, "360_day", align_on="random") assert get_calendar(conv) == "360_day" assert conv.size == 720 conv2 = convert_calendar(da_std, "360_day", align_on="random") assert (conv != conv2).any() conv = convert_calendar(da_360, "default", align_on="random") assert get_calendar(conv) == "default" assert conv.size == 720 assert np.datetime64("2004-02-29") not in conv.time conv2 = convert_calendar(da_360, "default", align_on="random") assert (conv2 != conv).any() conv = convert_calendar(da_360, "noleap", align_on="random", missing=np.NaN) conv = conv.where(conv.isnull(), drop=True) nandoys = conv.time.dt.dayofyear[::2] assert all(nandoys < np.array([74, 147, 220, 293, 366])) assert all(nandoys > np.array([0, 73, 146, 219, 292]))
def test_convert_calendar_missing(source, target, freq): src = xr.DataArray( date_range( "2004-01-01", "2004-12-31" if source != "360_day" else "2004-12-30", freq=freq, calendar=source, ), dims=("time",), name="time", ) da_src = xr.DataArray( np.linspace(0, 1, src.size), dims=("time",), coords={"time": src} ) out = convert_calendar(da_src, target, missing=np.nan, align_on="date") assert xr.infer_freq(out.time) == freq if source == "360_day": assert out.time[-1].dt.day == 31
def aggregate_between_dates( data: xr.DataArray, start: Union[xr.DataArray, DayOfYearStr], end: Union[xr.DataArray, DayOfYearStr], op: str = "sum", freq: Optional[str] = None, ) -> xr.DataArray: """Aggregate the data over a period between start and end dates and apply the operator on the aggregated data. Parameters ---------- data : xr.DataArray Data to aggregate between start and end dates. start : xr.DataArray or DayOfYearStr Start dates (as day-of-year) for the aggregation periods. end : xr.DataArray or DayOfYearStr End (as day-of-year) dates for the aggregation periods. op : {'min', 'max', 'sum', 'mean', 'std'} Operator. freq : str Resampling frequency. Returns ------- xarray.DataArray, [dimensionless] Aggregated data between the start and end dates. If the end date is before the start date, returns np.nan. If there is no start and/or end date, returns np.nan. """ def _get_days(_bound, _group, _base_time): """Get bound in number of days since base_time. Bound can be a days_since array or a DayOfYearStr.""" if isinstance(_bound, str): b_i = rl.index_of_date(_group.time, _bound, max_idxs=1) # noqa if not len(b_i): return None return (_group.time.isel(time=b_i[0]) - _group.time.isel(time=0)).dt.days if _base_time in _bound.time: return _bound.sel(time=_base_time) return None if freq is None: frequencies = [] for i, bound in enumerate([start, end], start=1): try: frequencies.append(xr.infer_freq(bound.time)) except AttributeError: frequencies.append(None) good_freq = set(frequencies) - {None} if len(good_freq) != 1: raise ValueError( f"Non-inferrable resampling frequency or inconsistent frequencies. Got start, end = {frequencies}." " Please consider providing `freq` manually.") freq = good_freq.pop() cal = get_calendar(data, dim="time") if not isinstance(start, str): start = convert_calendar(start, cal) start.attrs["calendar"] = cal start = doy_to_days_since(start) if not isinstance(end, str): end = convert_calendar(end, cal) end.attrs["calendar"] = cal end = doy_to_days_since(end) out = list() for base_time, indexes in data.resample(time=freq).groups.items(): # get group slice group = data.isel(time=indexes) start_d = _get_days(start, group, base_time) end_d = _get_days(end, group, base_time) # convert bounds for this group if start_d is not None and end_d is not None: days = (group.time - base_time).dt.days days[days < 0] = np.nan masked = group.where((days >= start_d) & (days <= end_d - 1)) res = getattr(masked, op)(dim="time", skipna=True) res = xr.where( ((start_d > end_d) | (start_d.isnull()) | (end_d.isnull())), np.nan, res) # Re-add the time dimension with the period's base time. res = res.expand_dims(time=[base_time]) out.append(res) else: # Get an array with the good shape, put nans and add the new time. res = (group.isel(time=0) * np.nan).expand_dims(time=[base_time]) out.append(res) continue out = xr.concat(out, dim="time") return out
def xclim_convert_360day_calendar_interpolate( ds, target="noleap", align_on="random", interpolation="linear", return_indices=False, ignore_nans=True, ): """ Parameters ---------- ds : xr.Dataset target : str see xclim.core.calendar.convert_calendar align_on : str this determines which days in the calendar will have missing values or will be the product of interpolation, if there is. It could be every year the same calendar days, or the days could randomly change. see xclim.core.calendar.convert_calendar interpolation : None or str passed to xr.Dataset.interpolate_na if not None return_indices : bool on top of the converted dataset, return a list of the array indices identifying values that were inserted. This assumes there were no NaNs before conversion. ignore_nans : bool if False and there are any NaNs in `ds` variables, an assertion error will be raised. NaNs are ignored otherwise. Returns ------- tuple(xr.Dataset, xr.Dataset) if return_indices is True, xr.Dataset otherwise. Notes ----- The default values of `target`, `align_on` and `interpolation` mean that our default approach is equivalent to that of the LOCA calendar conversion [1] for conversion from 360 days calendars to noleap calendars. In that approach, 5 calendar days are added (noleap calendars always have 365 days) to each year. But those calendar days are not necessarily those that will have their value be the product of interpolation. The days for which we interpolate are selected randomly every block of 72 days, so that they change every year. [1] http://loca.ucsd.edu/loca-calendar/ """ if get_calendar(ds) != "360_day": raise ValueError( "tried to use 360 day calendar conversion for a non-360-day calendar dataset" ) if not ignore_nans: for var in ds: assert ( ds[var].isnull().sum() == 0 ), "360 days calendar conversion with interpolation : there are nans !" ds_converted = convert_calendar( ds, target=target, align_on=align_on, missing=np.NaN ) if interpolation: ds_out = ds_converted.interpolate_na("time", interpolation) else: ds_out = ds_converted if return_indices: return (ds_out, xr.ufuncs.isnan(ds_converted)) else: return ds_out
def _ens_align_datasets( datasets: List[Union[xr.Dataset, Path, str, List[Union[Path, str]]]], mf_flag: bool = False, resample_freq: str = None, calendar: str = "default", **xr_kwargs, ) -> List[xr.Dataset]: """Create a list of aligned xarray Datasets for ensemble Dataset creation. Parameters ---------- datasets : List[Union[xr.Dataset, xr.DataArray, Path, str, List[Path, str]]] List of netcdf file paths or xarray Dataset/DataArray objects . If mf_flag is True, ncfiles should be a list of lists where each sublist contains input .nc files of an xarray multifile Dataset. DataArrays should have a name so they can be converted to datasets. mf_flag : bool If True climate simulations are treated as xarray multifile datasets before concatenation. Only applicable when datasets is a sequence of file paths. resample_freq : Optional[str] If the members of the ensemble have the same frequency but not the same offset, they cannot be properly aligned. If resample_freq is set, the time coordinate of each members will be modified to fit this frequency. calendar : str The calendar of the time coordinate of the ensemble. For conversions involving '360_day', the align_on='date' option is used. See `xclim.core.calendar.convert_calendar`. 'default' is the standard calendar using np.datetime64 objects. xr_kwargs : Any keyword arguments to be given to xarray when opening the files. Returns ------- List[xr.Dataset] """ xr_kwargs.setdefault("chunks", "auto") xr_kwargs.setdefault("decode_times", False) ds_all = [] for i, n in enumerate(datasets): logging.info(f"Accessing {n} of {len(datasets)}") if mf_flag: ds = xr.open_mfdataset(n, combine="by_coords", **xr_kwargs) else: if isinstance(n, xr.Dataset): ds = n elif isinstance(n, xr.DataArray): ds = n.to_dataset() else: ds = xr.open_dataset(n, **xr_kwargs) if "time" in ds.coords: time = xr.decode_cf(ds).time if resample_freq is not None: counts = time.resample(time=resample_freq).count() if any(counts > 1): raise ValueError( f"Alignment of dataset #{i:02d} failed : its time axis cannot be resampled to freq {resample_freq}." ) time = counts.time ds["time"] = time cal = get_calendar(time) ds = convert_calendar( ds, calendar, align_on="date" if "360_day" in [cal, calendar] else None, ) ds_all.append(ds) return ds_all
def _handler(self, request, response): def _log(message, percentage): write_log(self, message, subtask_percentage=percentage) res = {} group = {} train = {} adj = {} variable = request.inputs.pop(wpsio.variable.identifier, None) for key, input in request.inputs.items(): if key in resources: ds = try_opendap(request.inputs[key][0]) name = variable or list(ds.data_vars)[0] # Force calendar to noleap res[key] = convert_calendar(ds[name], "noleap") elif key in group_args: group[key] = single_input_or_none(request.inputs, key) elif key in adjust_args: adj[key] = single_input_or_none(request.inputs, key) else: train[key] = single_input_or_none(request.inputs, key) _log("Successfully read inputs from request.", 1) group = xclim.sdba.Grouper(**group) _log("Grouper object created.", 2) bc = xclim.sdba.EmpiricalQuantileMapping.train(res["ref"], res["hist"], **train, group=group) _log("Training object created.", 3) out = bc.adjust(res["sim"], **adj).to_dataset(name=name) _log("Adjustment object created.", 5) filename = valid_filename( single_input_or_none(request.inputs, "output_name") or "bias_corrected") out_fn = Path(self.workdir) / f"{filename}.nc" with FinchProgressBar( logging_function=_log, start_percentage=5, end_percentage=98, width=15, dt=1, ): dataset_to_netcdf(out, out_fn) metalink = make_metalink_output(self, [out_fn]) response.outputs["output"].file = str(out_fn) response.outputs["output_log"].file = str(log_file_path(self)) response.outputs["ref"].data = metalink.xml write_log(self, "Processing finished successfully", process_step="done") return response