示例#1
0
def test_prefix_attrs():
    source = {"units": "mm/s", "name": "pr"}
    dest = fmt.prefix_attrs(source, ["units"], "original_")
    assert "original_units" in dest

    out = fmt.unprefix_attrs(dest, ["units"], "original_")
    assert out == source

    # Check that the "naked" units will be overwritten.
    dest["units"] = ""

    out = fmt.unprefix_attrs(dest, ["units"], "original_")
    assert out == source
示例#2
0
def fit(
    da: xr.DataArray,
    dist: str = "norm",
    method: str = "ML",
    dim: str = "time",
    **fitkwargs,
) -> xr.DataArray:
    """Fit an array to a univariate distribution along the time dimension.

    Parameters
    ----------
    da : xr.DataArray
      Time series to be fitted along the time dimension.
    dist : str
      Name of the univariate distribution, such as beta, expon, genextreme, gamma, gumbel_r, lognorm, norm
      (see scipy.stats for full list). If the PWM method is used, only the following distributions are
      currently supported: 'expon', 'gamma', 'genextreme', 'genpareto', 'gumbel_r', 'pearson3', 'weibull_min'.
    method : {"ML", "PWM"}
      Fitting method, either maximum likelihood (ML) or probability weighted moments (PWM), also called L-Moments.
      The PWM method is usually more robust to outliers.
    dim : str
      The dimension upon which to perform the indexing (default: "time").
    **fitkwargs
      Other arguments passed directly to :py:func:`_fitstart` and to the distribution's `fit`.

    Returns
    -------
    xr.DataArray
      An array of fitted distribution parameters.

    Notes
    -----
    Coordinates for which all values are NaNs will be dropped before fitting the distribution. If the array
    still contains NaNs, the distribution parameters will be returned as NaNs.
    """
    method_name = {"ML": "maximum likelihood", "PWM": "probability weighted moments"}

    # Get the distribution
    dc = get_dist(dist)
    if method == "PWM":
        lm3dc = get_lm3_dist(dist)

    shape_params = [] if dc.shapes is None else dc.shapes.split(",")
    dist_params = shape_params + ["loc", "scale"]

    # xarray.apply_ufunc does not yet support multiple outputs with dask parallelism.
    duck = dask.array if isinstance(da.data, dask.array.Array) else np
    data = duck.apply_along_axis(
        _fitfunc_1d,
        da.get_axis_num(dim),
        da,
        dist=dc if method == "ML" else lm3dc,
        nparams=len(dist_params),
        method=method,
        **fitkwargs,
    )

    # Coordinates for the distribution parameters
    coords = dict(da.coords.items())
    if dim in coords:
        coords.pop(dim)
    coords["dparams"] = dist_params

    # Dimensions for the distribution parameters
    dims = [d if d != dim else "dparams" for d in da.dims]

    out = xr.DataArray(data=data, coords=coords, dims=dims)
    out.attrs = prefix_attrs(
        da.attrs, ["standard_name", "long_name", "units", "description"], "original_"
    )
    attrs = dict(
        long_name=f"{dist} parameters",
        description=f"Parameters of the {dist} distribution",
        method=method,
        estimator=method_name[method].capitalize(),
        scipy_dist=dist,
        units="",
        xclim_history=update_history(
            f"Estimate distribution parameters by {method_name[method]} method along dimension {dim}.",
            new_name="fit",
            data=da,
        ),
    )
    out.attrs.update(attrs)
    return out
示例#3
0
文件: stats.py 项目: Ouranosinc/xclim
def fit(
    da: xr.DataArray,
    dist: str = "norm",
    method: str = "ML",
    dim: str = "time",
    **fitkwargs,
) -> xr.DataArray:
    """Fit an array to a univariate distribution along the time dimension.

    Parameters
    ----------
    da : xr.DataArray
      Time series to be fitted along the time dimension.
    dist : str
      Name of the univariate distribution, such as beta, expon, genextreme, gamma, gumbel_r, lognorm, norm
      (see scipy.stats for full list). If the PWM method is used, only the following distributions are
      currently supported: 'expon', 'gamma', 'genextreme', 'genpareto', 'gumbel_r', 'pearson3', 'weibull_min'.
    method : {"ML", "PWM"}
      Fitting method, either maximum likelihood (ML) or probability weighted moments (PWM), also called L-Moments.
      The PWM method is usually more robust to outliers.
    dim : str
      The dimension upon which to perform the indexing (default: "time").
    fitkwargs
      Other arguments passed directly to :py:func:`_fitstart` and to the distribution's `fit`.

    Returns
    -------
    xr.DataArray
      An array of fitted distribution parameters.

    Notes
    -----
    Coordinates for which all values are NaNs will be dropped before fitting the distribution. If the array
    still contains NaNs, the distribution parameters will be returned as NaNs.
    """
    method_name = {
        "ML": "maximum likelihood",
        "PWM": "probability weighted moments"
    }

    # Get the distribution
    dc = get_dist(dist)
    if method == "PWM":
        lm3dc = get_lm3_dist(dist)

    shape_params = [] if dc.shapes is None else dc.shapes.split(",")
    dist_params = shape_params + ["loc", "scale"]

    data = xr.apply_ufunc(
        _fitfunc_1d,
        da,
        input_core_dims=[[dim]],
        output_core_dims=[["dparams"]],
        vectorize=True,
        dask="parallelized",
        output_dtypes=[float],
        keep_attrs=True,
        kwargs=dict(
            dist=dc if method == "ML" else lm3dc,
            nparams=len(dist_params),
            method=method,
            **fitkwargs,
        ),
        dask_gufunc_kwargs={"output_sizes": {
            "dparams": len(dist_params)
        }},
    )

    # Add coordinates for the distribution parameters and transpose to original shape (with dim -> dparams)
    dims = [d if d != dim else "dparams" for d in da.dims]
    out = data.assign_coords(dparams=dist_params).transpose(*dims)

    out.attrs = prefix_attrs(
        da.attrs, ["standard_name", "long_name", "units", "description"],
        "original_")
    attrs = dict(
        long_name=f"{dist} parameters",
        description=f"Parameters of the {dist} distribution",
        method=method,
        estimator=method_name[method].capitalize(),
        scipy_dist=dist,
        units="",
        history=update_history(
            f"Estimate distribution parameters by {method_name[method]} method along dimension {dim}.",
            new_name="fit",
            data=da,
        ),
    )
    out.attrs.update(attrs)
    return out
示例#4
0
def fit(da: xr.DataArray, dist: str = "norm", method="ML"):
    """Fit an array to a univariate distribution along the time dimension.

    Parameters
    ----------
    da : xr.DataArray
      Time series to be fitted along the time dimension.
    dist : str
      Name of the univariate distribution, such as beta, expon, genextreme, gamma, gumbel_r, lognorm, norm
      (see scipy.stats for full list). If the PWM method is used, only the following distributions are
      currently supported: 'expon', 'gamma', 'genextreme', 'genpareto', 'gumbel_r', 'pearson3', 'weibull_min'.
    method : {"ML", "PWM"}
      Fitting method, either maximum likelihood (ML) or probability weighted moments (PWM), also called L-Moments.
      The PWM method is usually more robust to outliers.

    Returns
    -------
    xr.DataArray
      An array of fitted distribution parameters.

    Notes
    -----
    Coordinates for which all values are NaNs will be dropped before fitting the distribution. If the array
    still contains NaNs, the distribution parameters will be returned as NaNs.
    """
    method_name = {"ML": "maximum likelihood", "PWM": "probability weighted moments"}

    # Get the distribution
    dc = get_dist(dist)
    if method == "PWM":
        lm3dc = get_lm3_dist(dist)

    shape_params = [] if dc.shapes is None else dc.shapes.split(",")
    dist_params = shape_params + ["loc", "scale"]

    # Fit the parameters.
    # This would also be the place to impose constraints on the series minimum length if needed.
    def fitfunc(arr):
        """Fit distribution parameters."""
        x = np.ma.masked_invalid(arr).compressed()

        # Return NaNs if array is empty.
        if len(x) <= 1:
            return [np.nan] * len(dist_params)

        # Estimate parameters
        if method == "ML":
            args, kwargs = _fit_start(x, dist)
            params = dc.fit(x, *args, **kwargs)
        elif method == "PWM":
            params = list(lm3dc.lmom_fit(x).values())

        # Fill with NaNs if one of the parameters is NaN
        if np.isnan(params).any():
            params[:] = np.nan

        return params

    # xarray.apply_ufunc does not yet support multiple outputs with dask parallelism.
    duck = dask.array if isinstance(da.data, dask.array.Array) else np
    data = duck.apply_along_axis(fitfunc, da.get_axis_num("time"), da)

    # Coordinates for the distribution parameters
    coords = dict(da.coords.items())
    coords.pop("time")
    coords["dparams"] = dist_params

    # Dimensions for the distribution parameters
    dims = [d if d != "time" else "dparams" for d in da.dims]

    out = xr.DataArray(data=data, coords=coords, dims=dims)
    out.attrs = prefix_attrs(
        da.attrs, ["standard_name", "long_name", "units", "description"], "original_"
    )
    attrs = dict(
        long_name=f"{dist} parameters",
        description=f"Parameters of the {dist} distribution",
        method=method,
        estimator=method_name[method].capitalize(),
        scipy_dist=dist,
        units="",
        xclim_history=update_history(
            f"Estimate distribution parameters by {method_name[method]} method.",
            new_name="fit",
            data=da,
        ),
    )
    out.attrs.update(attrs)
    return out