def test_fit_nan(self): da = self.da.copy() da[0, 0, 0] = np.nan out_nan = stats.fit(da, "lognorm") out_censor = stats.fit(da[1:], "lognorm") np.testing.assert_array_equal(out_nan.values[:, 0, 0], out_censor.values[:, 0, 0])
def test_fit(self): p = stats.fit(self.da, "lognorm") assert p.dims[0] == "dparams" assert p.get_axis_num("dparams") == 0 p0 = lognorm.fit(self.da.values[:, 0, 0]) np.testing.assert_array_equal(p[:, 0, 0], p0) # Check that we can reuse the parameters with scipy distributions cdf = lognorm.cdf(0.99, *p.values) assert cdf.shape == (self.nx, self.ny) assert p.attrs["estimator"] == "Maximum likelihood"
def test_pwm_fit(self, dist): """Test that the fitted parameters match parameters used to generate a random sample.""" pytest.importorskip("lmoments3") n = 500 dc = stats.get_dist(dist) par = self.params[dist] da = xr.DataArray( dc(**par).rvs(size=n), dims=("time", ), coords={"time": xr.cftime_range("1980-01-01", periods=n)}, ) out = stats.fit(da, dist=dist, method="PWM").compute() # Check that values are identical to lmoments3's output dict l3dc = stats.get_lm3_dist(dist) expected = l3dc.lmom_fit(da.values) for key, val in expected.items(): np.testing.assert_array_equal(out.sel(dparams=key), val, 1)
def test_synth(self): mu = 23 sigma = 2 n = 10000 per = 0.9 d = norm(loc=mu, scale=sigma) r = xr.DataArray( d.rvs(n), dims=("time", ), coords={"time": xr.cftime_range(start="1980-01-01", periods=n)}, attrs={"history": "Mosquito bytes per minute"}, ) expected = d.ppf(per) p = stats.fit(r, dist="norm") q = stats.parametric_quantile(p=p, q=per) np.testing.assert_array_almost_equal(q, expected, 1) assert "quantile" in q.coords
def train(self, ref, hist, ref_params=None): """Train the second-order adjustment object. Refer to the class documentation for the algorithm details. Parameters ---------- ref : DataArray Training target, usually a reference time series drawn from observations. hist : DataArray Training data, usually a model output whose biases are to be adjusted. ref_params: DataArray, optional Distribution parameters to use inplace of a Generalized Pareto fitted on `ref`. Must be similar to the output of `xclim.indices.stats.fit` called on `ref`. If the `scipy_dist` attribute is missing, `genpareto` is assumed. Only `genextreme` and `genpareto` are accepted as scipy_dist. """ if self._trained: warn("train() was already called, overwriting old results.") cluster_thresh = convert_units_to(self.cluster_thresh, ref) hist = convert_units_to(hist, ref) # Extreme value threshold computed relative to "large values". # We use the mean between ref and hist here. thresh = (ref.where(ref >= cluster_thresh).quantile(self.q_thresh, dim="time") + hist.where(hist >= cluster_thresh).quantile(self.q_thresh, dim="time")) / 2 if ref_params is None: # All large value clusters ref_clusters = get_clusters(ref, thresh, cluster_thresh) # Parameters of a genpareto (or other) distribution, we force the location at thresh. fit_params = stats.fit(ref_clusters.maximum - thresh, "genpareto", dim="cluster", floc=0) # Param "loc" was fitted with 0, put thresh back fit_params = fit_params.where(fit_params.dparams != "loc", fit_params + thresh) else: dist = ref_params.attrs.get("scipy_dist", "genpareto") fit_params = ref_params.copy().transpose(..., "dparams") if dist == "genextreme": fit_params = xr.where( fit_params.dparams == "loc", fit_params.sel(dparams="scale") + fit_params.sel(dparams="c") * (thresh - fit_params), fit_params, ) elif dist != "genpareto": raise ValueError( f"Unknown conversion from {dist} to genpareto.") ds = xr.Dataset(dict(fit_params=fit_params, thresh=thresh)) ds.fit_params.attrs.update( long_name="Generalized Pareto distribution parameters of ref", ) ds.thresh.attrs.update( long_name= f"{self.q_thresh * 100}th percentile extreme value threshold", description= f"Mean of the {self.q_thresh * 100}th percentile of large values (x > {self.cluster_thresh}) of ref and hist.", ) self.set_dataset(ds)
def test_dims_order(self): da = self.da.transpose() p = stats.fit(da) assert p.dims[-1] == "dparams"
def test_empty(self): da = self.da.copy() da[:, 0, 0] = np.nan out = stats.fit(da, "lognorm").values assert np.isnan(out[:, 0, 0]).all()
def test_genextreme_fit(self): """Check ML fit with a series that leads to poor values without good initial conditions.""" p = stats.fit(self.genextreme, "genextreme") np.testing.assert_allclose(p, (0.20949, 297.954091, 75.7911863), 1e-5)
def test_weibull_min_fit(self): """Check ML fit with a series that leads to poor values without good initial conditions.""" p = stats.fit(self.weibull_min, "weibull_min") np.testing.assert_allclose(p, (1.7760067, -322.092552, 4355.262679), 1e-5)
def frequency_analysis_method(ds, *, dim, method): sub = select_resample_op(ds.x, op=op) params = fit(sub, dist="genextreme", method=method) out = parametric_quantile(params, q=1 - 1.0 / period) return out.isel(quantile=0, drop=True).rename("out").to_dataset()