def _adjust( self, sim: xr.DataArray, scen: xr.DataArray, *, frac: float = 0.25, power: float = 1.0, interp: str = "linear", extrapolation: str = "constant", ): # Quantiles coord : cheat and assign 0 - 1 so we can use `extrapolate_qm`. ds = self.ds.assign( quantiles=(np.arange(self.ds.quantiles.size) + 1) / (self.ds.quantiles.size + 1) ) scen = extremes_adjust( ds.assign(sim=sim, scen=scen), cluster_thresh=self.cluster_thresh, dist=stats.get_dist("genpareto"), frac=frac, power=power, interp=interp, extrapolation=extrapolation, group="time", ) return scen
def ts_fit_graph(ts, params): """Create graphic showing an histogram of the data and the distribution fitted to it. Parameters ---------- ts : str Path to netCDF file storing the time series. params : str Path to netCDF file storing the distribution parameters. Returns ------- fig """ from xclim.indices.stats import get_dist n = ts.nbasins.size dist = params.attrs["scipy_dist"] fig, axes = plt.subplots(n, figsize=(10, 6), squeeze=False) for i in range(n): ax = axes.flat[i] ax2 = plt.twinx(ax) p = params.isel(nbasins=i) # Plot histogram of time series as density then as a normal count. density, bins, patches = ax.hist( ts.isel(nbasins=i).dropna(dim="time"), alpha=0.5, density=True, bins="auto", label="__nolabel__", ) ax2.hist( ts.isel(nbasins=i).dropna(dim="time"), bins=bins, facecolor=(1, 1, 1, 0.01), edgecolor="gray", linewidth=1, ) # Plot pdf of distribution dc = get_dist(dist)(*params.isel(nbasins=i)) mn = dc.ppf(0.01) mx = dc.ppf(0.99) q = np.linspace(mn, mx, 200) pdf = dc.pdf(q) ps = ", ".join(["{:.1f}".format(x) for x in p.values]) ax.plot(q, pdf, "-", label="{}({})".format(params.attrs["scipy_dist"], ps)) # Labels ax.set_xlabel("{} (${:~P}$)".format(ts.long_name, units2pint(ts.units))) ax.set_ylabel("Probability density") ax2.set_ylabel("Histogram count") ax.legend(frameon=False) plt.tight_layout() return fig
def test_get_lm3_dist(self, dist): """Check that parameterization for lmoments3 and scipy is identical.""" pytest.importorskip("lmoments3") dc = stats.get_dist(dist) lm3dc = stats.get_lm3_dist(dist) par = self.params[dist] expected = dc(**par).pdf(self.inputs_pdf) values = lm3dc(**par).pdf(self.inputs_pdf) np.testing.assert_array_almost_equal(values, expected)
def robust_data(request): norm = get_dist("norm") ref = np.tile( np.array([ norm.rvs(loc=274, scale=0.8, size=(40, ), random_state=r) for r in [101083, 19377, 473820, 483625] ]), (4, 1, 1), ) fut = np.array([ [ norm.rvs(loc=loc, scale=sc, size=(40, ), random_state=r) for loc, sc, r in shps ] for shps in ( [ (274.0, 0.7, 176378), (274.0, 0.6, 839789), (274.0, 0.7, 393239), (275.6, 1.1, 747390), ], # 3 no change, 1 positive change [ (272.5, 1.2, 743920), (272.4, 0.8, 138489), (275.5, 0.8, 673683), (275.6, 1.1, 969383), ], # 2 neg change [ (275.6, 0.8, 696857), (275.8, 1.2, 379949), (276.5, 0.8, 268395), (277.6, 1.1, 456544), ], # All pos change [ (np.nan, 0.3, 746323), (np.nan, 1.2, 5643723), (275.5, 0.8, 118294), (275.6, 1.1, 574732), ], # Some NaN ) ]) ref = xr.DataArray(ref, dims=("lon", "realization", "time"), name="tas") ref["time"] = xr.cftime_range("2000-01-01", periods=40, freq="YS") fut = xr.DataArray(fut, dims=("lon", "realization", "time"), name="tas") fut["time"] = xr.cftime_range("2040-01-01", periods=40, freq="YS") if request.param: ref = ref.chunk({"lon": 1}).to_dataset() fut = fut.chunk({"lon": 1}).to_dataset() return ref, fut
def _train( cls, ref: xr.DataArray, hist: xr.DataArray, *, cluster_thresh: str, ref_params: xr.Dataset = None, q_thresh: float = 0.95, ): cluster_thresh = convert_units_to(cluster_thresh, ref) # Approximation of how many "quantiles" values we will get: N = (1 - q_thresh) * ref.time.size # ref_params: cast nan to f32 not to interfere with map_blocks dtype parsing # ref and hist are f32, we want to have f32 in the output. ds = extremes_train( xr.Dataset( { "ref": ref, "hist": hist, "ref_params": ref_params or np.float32(np.NaN), } ), q_thresh=q_thresh, cluster_thresh=cluster_thresh, dist=stats.get_dist("genpareto"), quantiles=np.arange(int(N)), group="time", ) ds.px_hist.attrs.update( long_name="Probability of extremes in hist", description="Parametric probabilities of extremes in the common domain of hist and ref.", ) ds.af.attrs.update( long_name="Extremes adjustment factor", description="Multiplicative adjustment factor of extremes from hist to ref.", ) ds.thresh.attrs.update( long_name=f"{q_thresh * 100}th percentile extreme value threshold", description=f"Mean of the {q_thresh * 100}th percentile of large values (x > {cluster_thresh}) of ref and hist.", ) return ds.drop_vars(["quantiles"]), {"cluster_thresh": cluster_thresh}
def test_pwm_fit(self, dist): """Test that the fitted parameters match parameters used to generate a random sample.""" pytest.importorskip("lmoments3") n = 500 dc = stats.get_dist(dist) par = self.params[dist] da = xr.DataArray( dc(**par).rvs(size=n), dims=("time", ), coords={"time": xr.cftime_range("1980-01-01", periods=n)}, ) out = stats.fit(da, dist=dist, method="PWM").compute() # Check that values are identical to lmoments3's output dict l3dc = stats.get_lm3_dist(dist) expected = l3dc.lmom_fit(da.values) for key, val in expected.items(): np.testing.assert_array_equal(out.sel(dparams=key), val, 1)
def adjust( self, scen: xr.DataArray, sim: xr.DataArray, frac: float = 0.25, power: float = 1.0, ): """Return second order bias-adjusted data. Refer to the class documentation for the algorithm details. Parameters ---------- scen: DataArray Bias-adjusted time series. sim : DataArray Time series to be bias-adjusted, source of scen. kwargs : Algorithm-specific keyword arguments, see class doc. """ if not self._trained: raise ValueError("train() must be called before adjusting.") def _adjust_extremes_1d(scen, sim, ref_params, thresh, *, dist, cluster_thresh): # Clusters of large values of sim _, _, sim_posmax, sim_maxs = get_clusters_1d( sim, thresh, cluster_thresh) new_scen = scen.copy() if sim_posmax.size == 0: # Happens if everything is under `cluster_thresh` return new_scen # Fit the dist, force location at thresh sim_fit = stats._fitfunc_1d(sim_maxs, dist=dist, nparams=len(ref_params), method="ML", floc=thresh) # Cumulative density function for extreme values in sim's distribution sim_cdf = dist.cdf(sim_maxs, *sim_fit) # Equivalent value of sim's CDF's but in ref's distribution. new_sim = dist.ppf(sim_cdf, *ref_params) + thresh # Get the transition weights based on frac and power values transition = (((sim_maxs - sim_maxs.min()) / ((sim_maxs.max()) - sim_maxs.min())) / frac)**power np.clip(transition, None, 1, out=transition) # Apply smooth linear transition between scen and corrected scen new_scen_trans = (new_sim * transition) + (scen[sim_posmax] * (1.0 - transition)) # We change new_scen to the new data new_scen[sim_posmax] = new_scen_trans return new_scen new_scen = xr.apply_ufunc( _adjust_extremes_1d, scen, sim, self.ds.fit_params, self.ds.thresh, input_core_dims=[["time"], ["time"], ["dparams"], []], output_core_dims=[["time"]], vectorize=True, kwargs={ "dist": stats.get_dist("genpareto"), "cluster_thresh": convert_units_to(self.cluster_thresh, sim), }, dask="parallelized", output_dtypes=[scen.dtype], ) params = f"frac={frac}, power={power}" new_scen.attrs["xclim_history"] = update_history( f"Second order bias-adjustment with {str(self)}.adjust(sim, {params})", sim) return new_scen
def ts_fit_graph(ts, params): """Create graphic showing an histogram of the data and the distribution fitted to it. The graphic contains one panel per watershed. Parameters ---------- ts : xr.DataArray Stream flow time series with dimensions (time, nbasins). params : xr.DataArray Fitted distribution parameters returned by `xclim.land.fit` indicator. Returns ------- fig Figure showing a histogram and the parameterized pdf. """ # Note: The hover tool could be customized to show the histogram count in addition to the frequency. from xclim.indices.stats import get_dist n = ts.nbasins.size if n > 1: raise NotImplementedError ts = ts.isel(nbasins=0) params = params.isel(nbasins=0) # Using matplotlib's default binning strategy hist, bins, mh = plt.hist(ts, bins="auto", density=True) # Histogram graphic object h = hv.Histogram((hist, bins), kdims=ts.name, label="Histogram") # PDF domain mn = np.min(bins) mx = np.max(bins) q = np.linspace(mn, mx, 200) # Compute PDF dist = params.attrs["scipy_dist"] dc = get_dist(dist)(*params) # Works because dparams is the first dimension. pdf = xr.DataArray( data=dc.pdf(q), dims=(ts.name,) + params.dims[1:], coords={ts.name: q}, name="pdf", ) # PDF line label ps = ", ".join( [ f"{key}={x:.1f}".format(x) for (key, x) in zip(params.dparams.data, params.values) ] ) label = f"{dist}({ps})" # PDF graphic object p = pdf.hvplot.line(label=label, xlabel=ts.attrs["long_name"], color="orange") # Layout return (h * p).opts(hv.opts.Histogram(tools=["hover"]))
def ts_fit_graph(ts, params): """Create graphic showing an histogram of the data and the distribution fitted to it. The graphic contains one panel per watershed. Parameters ---------- ts : xr.DataArray Stream flow time series with dimensions (time, nbasins). params : xr.DataArray Fitted distribution parameters returned by `xclim.land.fit` indicator. Returns ------- fig Figure showing a histogram and the parameterized pdf. """ from xclim.indices.stats import get_dist n = ts.nbasins.size dist = params.attrs["scipy_dist"] fig, axes = plt.subplots(n, figsize=(10, 6), squeeze=False) for i in range(n): ax = axes.flat[i] ax2 = plt.twinx(ax) p = params.isel(nbasins=i) t = ts.isel(nbasins=i).dropna(dim="time") # Plot histogram of time series as density then as a normal count. density, bins, patches = ax.hist( t, alpha=0.5, density=True, bins="auto", label="__nolabel__", ) ax2.hist( t, bins=bins, facecolor=(1, 1, 1, 0.01), edgecolor="gray", linewidth=1, ) # Plot pdf of distribution dc = get_dist(dist)(*params.isel(nbasins=i)) mn = dc.ppf(0.01) mx = dc.ppf(0.99) q = np.linspace(mn, mx, 200) pdf = dc.pdf(q) ps = ", ".join([f"{x:.1f}" for x in p.values]) ax.plot(q, pdf, "-", label="{}({})".format(params.attrs["scipy_dist"], ps)) # Labels ax.set_xlabel(f"{ts.long_name} (${units2pint(ts.units):~P}$)") ax.set_ylabel("Probability density") ax2.set_ylabel("Histogram count") ax.legend(frameon=False) plt.tight_layout() return fig