def netcdf_datasets(request) -> Dict[str, Path]: """Returns a Dict mapping a variable name to a corresponding netcdf path""" datasets = dict() for variable_name, description in variable_descriptions.items(): filename = _create_and_write_dataset(variable_name, **description, seed=1) datasets[variable_name] = filename # With missing values filename = _create_and_write_dataset(variable_name, **description, seed=1, missing=True) datasets[variable_name + "_missing"] = filename tasmin = xr.open_dataset(datasets["tasmin"]).tasmin tas = xr.open_dataset(datasets["tas"]).tas tn10 = percentile_doy(tasmin, per=0.1).to_dataset(name="tn10") datasets["tn10"] = _write_dataset("tn10", tn10) t10 = percentile_doy(tas, per=0.1).to_dataset(name="t10") datasets["t10"] = _write_dataset("t10", t10) t90 = percentile_doy(tas, per=0.9).to_dataset(name="t90") datasets["t90"] = _write_dataset("t90", t90) # Create file with two variables keys = ["pr", "discharge"] ds = xr.merge([ _create_test_dataset(k, **variable_descriptions[k], seed=1) for k in keys ]) datasets["pr_discharge"] = _write_dataset("pr_discharge", ds) return datasets
def test_percentile_doy_invalid(): tas = xr.DataArray( [0, 1], dims=("time",), coords={"time": pd.date_range("2000-01-01", periods=2, freq="H")}, ) with pytest.raises(ValueError): percentile_doy(tas)
def test_simple(seldf, tas_series, pr_series): # GIVEN raw_temp = np.full(365 * 4, 20) + K2C raw_temp[10:30] += 10 ts = tas_series(raw_temp) ts_per = percentile_doy(ts, 5, 75).sel(percentiles=75) raw_prec = np.full(365 * 4, 10) raw_prec[10:30] += 20 pr = pr_series(raw_prec) pr_per = percentile_doy(pr, 5, 75).sel(percentiles=75) # WHEN result = atmos.warm_and_wet_days(ts, ts_per, pr, pr_per, freq="MS") # THEN january has 20 warm and wet days assert result.data[0] == 20
def test_simple(seldf, tas_series, pr_series): # GIVEN raw_temp = np.full(365 * 4, 20) + K2C raw_temp[10:20] -= 10 ts = tas_series(raw_temp) ts_per = percentile_doy(ts, 5, 25).sel(percentiles=25) raw_prec = np.full(365 * 4, 10) raw_prec[10:20] = 0 pr = pr_series(raw_prec) pr_per = percentile_doy(pr, 5, 25).sel(percentiles=25) # WHEN result = atmos.cold_and_dry_days(ts, ts_per, pr, pr_per, freq="MS") # THEN january has 10 cold and dry days assert result.data[0] == 10
def test_percentile_doy_nan(tas_series): tas = tas_series(np.arange(365), start="1/1/2001") tas = tas.where(tas.time.dt.dayofyear != 2) tas = xr.concat((tas, tas), "dim0") pnan = percentile_doy(tas, window=5, per=0.5) assert pnan.sel(dayofyear=3, dim0=0).data == 2.5 assert pnan.attrs["units"] == "K"
def test_multi_per(self): tas = open_dataset("ERA5/daily_surface_cancities_1990-1993.nc").tas t90 = percentile_doy(tas.sel(time=slice("1990-01-01", "1991-12-31")), window=5, per=[90, 91]) res = tg90p(tas=tas, t90=t90, freq="YS", bootstrap=True) np.testing.assert_array_equal([90, 91], res.percentiles)
def test_tx10p_simple(self, tasmax_series): i = 366 arr = np.asarray(np.arange(i), "float") tas = tasmax_series(arr, start="1/1/2000") tasC = tas.copy() tasC -= K2C tasC.attrs["units"] = "C" t10 = percentile_doy(tas, per=0.1) # create cold spell in june tas[175:180] = 1 tasC[175:180] = 1 - K2C out = atmos.tx10p(tas, t10, freq="MS") outC = atmos.tx10p(tasC, t10, freq="MS") np.testing.assert_array_equal(out, outC) assert out[0] == 1 assert out[5] == 5 # nan treatment tas[33] = np.nan tasC[33] = np.nan out = atmos.tx10p(tas, t10, freq="MS") outC = atmos.tx10p(tasC, t10, freq="MS") np.testing.assert_array_equal(out, outC) assert out[0] == 1 assert np.isnan(out[1]) assert out[5] == 5
def test_percentile_doy(tas_series, use_dask): tas = tas_series(np.arange(365), start="1/1/2001") if use_dask: tas = tas.chunk(dict(time=10)) tas = xr.concat((tas, tas), "dim0") p1 = percentile_doy(tas, window=5, per=50) assert p1.sel(dayofyear=3, dim0=0).data == 2 assert p1.attrs["units"] == "K"
def test_doctest_ndims(self): """Replicates doctest to facilitate debugging.""" tas = open_dataset("ERA5/daily_surface_cancities_1990-1993.nc").tas t90 = percentile_doy(tas.sel(time=slice("1990-01-01", "1991-12-31")), window=5, per=90) tg90p(tas=tas, t90=t90.isel(percentiles=0), freq="YS", bootstrap=True) tg90p(tas=tas, t90=t90, freq="YS", bootstrap=True)
def test_percentile_doy_no_copy(tas_series, use_dask): tas = tas_series(np.arange(365), start="1/1/2001") if use_dask: tas = tas.chunk(dict(time=10)) tas = xr.concat((tas, tas), "dim0") original_tas = tas.copy(deep=True) p1 = percentile_doy(tas, window=5, per=50, copy=False) assert p1.sel(dayofyear=3, dim0=0).data == 2 assert p1.attrs["units"] == "K" assert not np.testing.assert_array_equal(original_tas, tas)
def test_bootstrap_full_overlap(self, tas_series): # bootstrap is unnecessary when in base and out of base fully overlap # -- GIVEN tas = tas_series(self.ar1(alpha=0.8, n=int(4 * 365.25)), start="2000-01-01") per = percentile_doy(tas, per=90) # -- THEN with pytest.raises(KeyError): # -- WHEN tg10p(tas, per, freq="YS", bootstrap=True)
def test_days_over_precip_doy_thresh(): pr = open_dataset("ERA5/daily_surface_cancities_1990-1993.nc").pr per = percentile_doy(pr, window=5, per=80) out1 = atmos.days_over_precip_doy_thresh(pr, per) np.testing.assert_array_equal(out1[1, :, 0], np.array([81, 61, 69, 78])) out2 = atmos.days_over_precip_thresh(pr, per, thresh="2 mm/d") np.testing.assert_array_equal(out2[1, :, 0], np.array([81, 61, 66, 78])) assert "only days with at least 2 mm/d are counted." in out2.description
def test_simple(self, tasmax_series): i = 3650 A = 10.0 tx = (np.zeros(i) + A * np.sin(np.arange(i) / 365.0 * 2 * np.pi) + 0.1 * np.random.rand(i)) tx[10:20] += 2 tx = tasmax_series(tx) tx90 = percentile_doy(tx, per=0.9) out = xci.warm_spell_duration_index(tx, tx90, freq="YS") assert out[0] == 10
def test_convert_units(self, tasmin_series): i = 3650 A = 10.0 tn = (np.zeros(i) + A * np.sin(np.arange(i) / 365.0 * 2 * np.pi) + 0.1 * np.random.rand(i)) tn[10:20] -= 2 tn = tasmin_series(tn + K2C) tn.attrs["units"] = "C" tn10 = percentile_doy(tn, per=0.1) out = atmos.cold_spell_duration_index(tn, tn10, freq="AS-JUL") assert out[0] == 10
def test_simple(self, tasmin_series): i = 3650 A = 10.0 tn = (np.zeros(i) + A * np.sin(np.arange(i) / 365.0 * 2 * np.pi) + 0.1 * np.random.rand(i)) tn[10:20] -= 2 tn = tasmin_series(tn) tn10 = percentile_doy(tn, per=0.1) out = xci.cold_spell_duration_index(tn, tn10, freq="YS") assert out[0] == 10 assert out.units == "days"
def test_tn10p_simple(self, tas_series): i = 366 tas = np.array(range(i)) tas = tas_series(tas, start="1/1/2000") t10 = percentile_doy(tas, per=0.1) # create cold spell in june tas[175:180] = 1 out = xci.tn10p(tas, t10, freq="MS") assert out[0] == 1 assert out[5] == 5
def test_warm_spell_duration_index(): tasmax = open_dataset("ERA5/daily_surface_cancities_1990-1993.nc").tasmax tx90 = percentile_doy(tasmax, window=5, per=90) out = atmos.warm_spell_duration_index(tasmax=tasmax, tx90=tx90, window=3, freq="AS-JUL") np.testing.assert_array_equal(out[0, :, 0], np.array([np.nan, 3, 0, 0, np.nan])) assert ("Annual total number of days within spells of at least 3 days" in out.description)
def test_bootstrap_no_overlap(self, tas_series): # bootstrap is unnecessary when in base and out of base fully overlap # -- GIVEN tas = tas_series(self.ar1(alpha=0.8, n=int(4 * 365.25)), start="2000-01-01") tas_in_base = tas.sel(time=slice("2000-01-01", "2001-12-31")) tas_out_base = tas.sel(time=slice("2002-01-01", "2001-12-31")) per = percentile_doy(tas_in_base, per=90) # -- THEN with pytest.raises(KeyError): # -- WHEN tg10p(tas_out_base, per, freq="MS", bootstrap=True)
def test_simple(self, tasmin_series): i = 3650 A = 10.0 tn = (np.zeros(i) + A * np.sin(np.arange(i) / 365.0 * 2 * np.pi) + 0.1 * np.random.rand(i)) tn += K2C tn[10:20] -= 2 tn = tasmin_series(tn) tn10 = percentile_doy(tn, per=10).sel(percentiles=10) out = atmos.cold_spell_duration_index(tn, tn10, freq="AS-JUL") assert out[0] == 10
def test_nan_presence(self, tasmin_series): i = 3650 A = 10.0 tn = (np.zeros(i) + K2C + A * np.sin(np.arange(i) / 365.0 * 2 * np.pi) + 0.1 * np.random.rand(i)) tn[10:20] -= 2 tn[9] = np.nan tn = tasmin_series(tn) tn10 = percentile_doy(tn, per=0.1) out = atmos.cold_spell_duration_index(tn, tn10, freq="AS-JUL") assert np.isnan(out[0])
def test_fraction_over_precip_doy_thresh(): pr = open_dataset("ERA5/daily_surface_cancities_1990-1993.nc").pr per = percentile_doy(pr, window=5, per=80) out = atmos.fraction_over_precip_doy_thresh(pr, per) np.testing.assert_allclose(out[1, :, 0], np.array([0.809, 0.770, 0.748, 0.807]), atol=0.001) out = atmos.fraction_over_precip_thresh(pr, per, thresh="0.002 m/d") np.testing.assert_allclose(out[1, :, 0], np.array([0.831, 0.803, 0.774, 0.833]), atol=0.001) assert "only days with at least 0.002 m/d are included" in out.description
def test_doy_interpolation(self): pytest.importorskip("xarray", "0.11.4") # Just a smoke test fn_clim = os.path.join( TESTS_DATA, "CanESM2_365day", "tasmin_day_CanESM2_rcp85_r1i1p1_na10kgrid_qm-moving-50bins-detrend_2095.nc", ) fn = os.path.join( TESTS_DATA, "HadGEM2-CC_360day", "tasmin_day_HadGEM2-CC_rcp85_r1i1p1_na10kgrid_qm-moving-50bins-detrend_2095.nc", ) with xr.open_dataset(fn_clim) as ds: t10 = percentile_doy(ds.tasmin.isel(lat=0, lon=0), per=0.1) with xr.open_dataset(fn) as ds: xci.tn10p(ds.tasmin.isel(lat=0, lon=0), t10, freq="MS")
def bootstrap_testor( self, series, per: int, index_fun: Callable[[DataArray, DataArray, bool], DataArray], positive_values=False, use_dask=False, ): # -- GIVEN arr = self.ar1(alpha=0.8, n=int(4 * 365.25), positive_values=positive_values) climat_variable = series(arr, start="2000-01-01") if use_dask: climat_variable = climat_variable.chunk(dict(time=50)) in_base_slice = slice("2000-01-01", "2001-12-31") out_base_slice = slice("2002-01-01", "2003-12-31") per = percentile_doy(climat_variable.sel(time=in_base_slice), per=per) # -- WHEN no_bootstrap = index_fun(climat_variable, per, False) no_bs_in_base = no_bootstrap.sel(time=in_base_slice) no_bs_out_base = no_bootstrap.sel(time=out_base_slice) bootstrap = index_fun(climat_variable, per, True) bootstrapped_in_base = bootstrap.sel(time=in_base_slice) bs_out_base = bootstrap.sel(time=out_base_slice) # -- THEN # Bootstrapping should increase the computed index values within the overlapping # period. However, this will not work on unrealistic values such as a constant # temperature. # Beside, bootstrapping is particularly effective on extreme percentiles, but # the closer the target percentile is to the median the less bootstrapping is # necessary. # Following assertions may even fail if chosen percentile is close to 50. assert np.count_nonzero( bootstrapped_in_base > no_bs_in_base) > np.count_nonzero( bootstrapped_in_base < no_bs_in_base) # bootstrapping should leave the out of base unchanged, # but precision above 15th decimal might differ. np.testing.assert_array_almost_equal(no_bs_out_base, bs_out_base, 15)
(xar_year.lat > ymin) & (xar_year.lat < ymax), drop=True) data.append(sliced) x_ar = xr.concat(data, dim=concat_dim) x_ar = x_ar[variable] print("Data successfully imported") return x_ar ostia_sst = readXarrayData( pathIn="dataPath", yearsList=[ "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019" ], #10 years of data concat_dim="time", variable="analysed_sst", xmin=-10.4, xmax=10.4, ymin=44.8, ymax=65.6) ####xclim method to calculate percentiles### ds_qt10 = percentile_doy(ostia_sst, window=1, per=0.1) print("10th percentile was computed") ds_qt90 = percentile_doy(ostia_sst, window=1, per=0.9) print("90th percentile was computed")
def _percentile_doy(var: xr.DataArray, perc: int) -> xr.DataArray: return percentile_doy(var, per=perc).sel(percentiles=perc, drop=True)
def test_percentile_doy(tas_series): tas = tas_series(np.arange(365), start="1/1/2001") tas = xr.concat((tas, tas), "dim0") p1 = percentile_doy(tas, window=5, per=0.5) assert p1.sel(dayofyear=3, dim0=0).data == 2 assert p1.attrs["units"] == "K"