示例#1
0
 def test_fit_nan(self):
     da = self.da.copy()
     da[0, 0, 0] = np.nan
     out_nan = stats.fit(da, "lognorm")
     out_censor = stats.fit(da[1:], "lognorm")
     np.testing.assert_array_equal(out_nan.values[:, 0, 0],
                                   out_censor.values[:, 0, 0])
示例#2
0
    def test_fit(self):
        p = stats.fit(self.da, "lognorm")

        assert p.dims[0] == "dparams"
        assert p.get_axis_num("dparams") == 0
        p0 = lognorm.fit(self.da.values[:, 0, 0])
        np.testing.assert_array_equal(p[:, 0, 0], p0)

        # Check that we can reuse the parameters with scipy distributions
        cdf = lognorm.cdf(0.99, *p.values)
        assert cdf.shape == (self.nx, self.ny)
        assert p.attrs["estimator"] == "Maximum likelihood"
示例#3
0
    def test_pwm_fit(self, dist):
        """Test that the fitted parameters match parameters used to generate a random sample."""
        pytest.importorskip("lmoments3")
        n = 500
        dc = stats.get_dist(dist)
        par = self.params[dist]
        da = xr.DataArray(
            dc(**par).rvs(size=n),
            dims=("time", ),
            coords={"time": xr.cftime_range("1980-01-01", periods=n)},
        )
        out = stats.fit(da, dist=dist, method="PWM").compute()

        # Check that values are identical to lmoments3's output dict
        l3dc = stats.get_lm3_dist(dist)
        expected = l3dc.lmom_fit(da.values)
        for key, val in expected.items():
            np.testing.assert_array_equal(out.sel(dparams=key), val, 1)
示例#4
0
    def test_synth(self):
        mu = 23
        sigma = 2
        n = 10000
        per = 0.9
        d = norm(loc=mu, scale=sigma)
        r = xr.DataArray(
            d.rvs(n),
            dims=("time", ),
            coords={"time": xr.cftime_range(start="1980-01-01", periods=n)},
            attrs={"history": "Mosquito bytes per minute"},
        )
        expected = d.ppf(per)

        p = stats.fit(r, dist="norm")
        q = stats.parametric_quantile(p=p, q=per)

        np.testing.assert_array_almost_equal(q, expected, 1)
        assert "quantile" in q.coords
示例#5
0
    def train(self, ref, hist, ref_params=None):
        """Train the second-order adjustment object. Refer to the class documentation for the algorithm details.

        Parameters
        ----------
        ref : DataArray
          Training target, usually a reference time series drawn from observations.
        hist : DataArray
          Training data, usually a model output whose biases are to be adjusted.
        ref_params: DataArray, optional
          Distribution parameters to use inplace of a Generalized Pareto fitted on `ref`.
          Must be similar to the output of `xclim.indices.stats.fit` called on `ref`.
          If the `scipy_dist` attribute is missing, `genpareto` is assumed.
          Only `genextreme` and `genpareto` are accepted as scipy_dist.
        """
        if self._trained:
            warn("train() was already called, overwriting old results.")

        cluster_thresh = convert_units_to(self.cluster_thresh, ref)
        hist = convert_units_to(hist, ref)

        # Extreme value threshold computed relative to "large values".
        # We use the mean between ref and hist here.
        thresh = (ref.where(ref >= cluster_thresh).quantile(self.q_thresh,
                                                            dim="time") +
                  hist.where(hist >= cluster_thresh).quantile(self.q_thresh,
                                                              dim="time")) / 2

        if ref_params is None:
            # All large value clusters
            ref_clusters = get_clusters(ref, thresh, cluster_thresh)
            # Parameters of a genpareto (or other) distribution, we force the location at thresh.
            fit_params = stats.fit(ref_clusters.maximum - thresh,
                                   "genpareto",
                                   dim="cluster",
                                   floc=0)
            # Param "loc" was fitted with 0, put thresh back
            fit_params = fit_params.where(fit_params.dparams != "loc",
                                          fit_params + thresh)
        else:
            dist = ref_params.attrs.get("scipy_dist", "genpareto")
            fit_params = ref_params.copy().transpose(..., "dparams")
            if dist == "genextreme":
                fit_params = xr.where(
                    fit_params.dparams == "loc",
                    fit_params.sel(dparams="scale") +
                    fit_params.sel(dparams="c") * (thresh - fit_params),
                    fit_params,
                )
            elif dist != "genpareto":
                raise ValueError(
                    f"Unknown conversion from {dist} to genpareto.")

        ds = xr.Dataset(dict(fit_params=fit_params, thresh=thresh))
        ds.fit_params.attrs.update(
            long_name="Generalized Pareto distribution parameters of ref", )
        ds.thresh.attrs.update(
            long_name=
            f"{self.q_thresh * 100}th percentile extreme value threshold",
            description=
            f"Mean of the {self.q_thresh * 100}th percentile of large values (x > {self.cluster_thresh}) of ref and hist.",
        )
        self.set_dataset(ds)
示例#6
0
 def test_dims_order(self):
     da = self.da.transpose()
     p = stats.fit(da)
     assert p.dims[-1] == "dparams"
示例#7
0
 def test_empty(self):
     da = self.da.copy()
     da[:, 0, 0] = np.nan
     out = stats.fit(da, "lognorm").values
     assert np.isnan(out[:, 0, 0]).all()
示例#8
0
 def test_genextreme_fit(self):
     """Check ML fit with a series that leads to poor values without good initial conditions."""
     p = stats.fit(self.genextreme, "genextreme")
     np.testing.assert_allclose(p, (0.20949, 297.954091, 75.7911863), 1e-5)
示例#9
0
 def test_weibull_min_fit(self):
     """Check ML fit with a series that leads to poor values without good initial conditions."""
     p = stats.fit(self.weibull_min, "weibull_min")
     np.testing.assert_allclose(p, (1.7760067, -322.092552, 4355.262679),
                                1e-5)
示例#10
0
 def frequency_analysis_method(ds, *, dim, method):
     sub = select_resample_op(ds.x, op=op)
     params = fit(sub, dist="genextreme", method=method)
     out = parametric_quantile(params, q=1 - 1.0 / period)
     return out.isel(quantile=0, drop=True).rename("out").to_dataset()