def test_estimate_ar_params_yw_localized():
    R = _create_data_univariate()

    for p in range(1, 4):
        gamma = correlation.temporal_autocorrelation(R[-(p + 1):],
                                                     window="gaussian",
                                                     window_radius=50)
        phi = autoregression.estimate_ar_params_yw_localized(gamma)
        assert len(phi) == p + 1
        for i in range(len(phi)):
            assert phi[i].shape == R.shape[1:]
Exemplo n.º 2
0
def forecast(
    vil,
    velocity,
    n_timesteps,
    rainrate=None,
    n_cascade_levels=8,
    extrap_method="semilagrangian",
    ar_order=2,
    ar_window_radius=50,
    r_vil_window_radius=3,
    fft_method="numpy",
    apply_rainrate_mask=True,
    num_workers=1,
    extrap_kwargs=None,
    filter_kwargs=None,
    measure_time=False,
):
    """Generate a nowcast by using the autoregressive nowcasting using VIL
    (ANVIL) method. ANVIL is built on top of an extrapolation-based nowcast.
    The key features are:

    1) Growth and decay: implemented by using a cascade decomposition and
       a multiscale autoregressive integrated ARI(p,1) model. Instead of the
       original time series, the ARI model is applied to the differenced one
       corresponding to time derivatives.
    2) Originally designed for using integrated liquid (VIL) as the input data.
       In this case, the rain rate (R) is obtained from VIL via an empirical
       relation. This implementation is more general so that the input can be
       any two-dimensional precipitation field.
    3) The parameters of the ARI model and the R(VIL) relation are allowed to
       be spatially variable. The estimation is done using a moving window.

    Parameters
    ----------
    vil : array_like
        Array of shape (ar_order+2,m,n) containing the input fields ordered by
        timestamp from oldest to newest. The inputs are expected to contain VIL
        or rain rate. The time steps between the inputs are assumed to be regular.
    velocity : array_like
        Array of shape (2,m,n) containing the x- and y-components of the
        advection field. The velocities are assumed to represent one time step
        between the inputs. All values are required to be finite.
    n_timesteps : int
        Number of time steps to forecast.
    rainrate : array_like
        Array of shape (m,n) containing the most recently observed rain rate
        field. If set to None, no R(VIL) conversion is done and the outputs
        are in the same units as the inputs.
    n_cascade_levels : int, optional
        The number of cascade levels to use.
    extrap_method : str, optional
        Name of the extrapolation method to use. See the documentation of
        pysteps.extrapolation.interface.
    ar_order : int, optional
        The order of the autoregressive model to use. The recommended values
        are 1 or 2. Using a higher-order model is strongly discouraged because
        the stationarity of the AR process cannot be guaranteed.
    ar_window_radius : int, optional
        The radius of the window to use for determining the parameters of the
        autoregressive model. Set to None to disable localization.
    r_vil_window_radius : int, optional
        The radius of the window to use for determining the R(VIL) relation.
        Applicable if rainrate is not None.
    fft_method : str, optional
        A string defining the FFT method to use (see utils.fft.get_method).
        Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed,
        the recommended method is 'pyfftw'.
    apply_rainrate_mask : bool
        Apply mask to prevent producing precipitation to areas where it was not
        originally observed. Defaults to True. Disabling this may improve some
        verification metrics but increases the number of false alarms. Applicable
        if rainrate is None.
    num_workers : int, optional
        The number of workers to use for parallel computation. Applicable if
        dask is installed or pyFFTW is used for computing the FFT.
        When num_workers>1, it is advisable to disable OpenMP by setting
        the environment variable OMP_NUM_THREADS to 1.
        This avoids slowdown caused by too many simultaneous threads.
    extrap_kwargs : dict, optional
        Optional dictionary containing keyword arguments for the extrapolation
        method. See the documentation of pysteps.extrapolation.
    filter_kwargs : dict, optional
        Optional dictionary containing keyword arguments for the filter method.
        See the documentation of pysteps.cascade.bandpass_filters.py.
    measure_time : bool, optional
        If True, measure, print and return the computation time.

    Returns
    -------
    out : ndarray
        A three-dimensional array of shape (n_timesteps,m,n) containing a time
        series of forecast precipitation fields. The time series starts from
        t0+timestep, where timestep is taken from the input VIL/rain rate
        fields. If measure_time is True, the return value is a three-element
        tuple containing the nowcast array, the initialization time of the
        nowcast generator and the time used in the main loop (seconds).

    References
    ----------
    :cite:`PCLH2020`
    """
    if len(vil.shape) != 3:
        raise ValueError(
            "vil.shape = %s, but a three-dimensional array expected" % str(vil.shape)
        )

    if rainrate is not None:
        if len(rainrate.shape) != 2:
            raise ValueError(
                "rainrate.shape = %s, but a two-dimensional array expected"
                % str(rainrate.shape)
            )

    if vil.shape[0] != ar_order + 2:
        raise ValueError(
            "vil.shape[0] = %d, but vil.shape[0] = ar_order + 2 = %d required"
            % (vil.shape[0], ar_order + 2)
        )

    if len(velocity.shape) != 3:
        raise ValueError(
            "velocity.shape = %s, but a three-dimensional array expected"
            % str(velocity.shape)
        )

    if extrap_kwargs is None:
        extrap_kwargs = dict()

    if filter_kwargs is None:
        filter_kwargs = dict()

    print("Computing ANVIL nowcast:")
    print("------------------------")
    print("")

    print("Inputs:")
    print("-------")
    print("input dimensions: %dx%d" % (vil.shape[1], vil.shape[2]))
    print("")

    print("Methods:")
    print("--------")
    print("extrapolation:   %s" % extrap_method)
    print("FFT:             %s" % fft_method)
    print("")

    print("Parameters:")
    print("-----------")
    print("number of time steps:        %d" % n_timesteps)
    print("parallel threads:            %d" % num_workers)
    print("number of cascade levels:    %d" % n_cascade_levels)
    print("order of the ARI(p,1) model: %d" % ar_order)
    if type(ar_window_radius) == int:
        print("ARI(p,1) window radius:      %d" % ar_window_radius)
    else:
        print("ARI(p,1) window radius:      none")

    print("R(VIL) window radius:        %d" % r_vil_window_radius)

    if measure_time:
        starttime_init = time.time()

    m, n = vil.shape[1:]
    vil = vil.copy()

    if rainrate is None and apply_rainrate_mask:
        rainrate_mask = vil[-1, :] < 0.1

    if rainrate is not None:
        # determine the coefficients fields of the relation R=a*VIL+b by
        # localized linear regression
        r_vil_a, r_vil_b = _r_vil_regression(vil[-1, :], rainrate, r_vil_window_radius)

    # transform the input fields to Lagrangian coordinates by extrapolation
    extrapolator = extrapolation.get_method(extrap_method)
    res = list()

    def worker(vil, i):
        return (
            i,
            extrapolator(
                vil[i, :],
                velocity,
                vil.shape[0] - 1 - i,
                allow_nonfinite_values=True,
                **extrap_kwargs,
            )[-1],
        )

    for i in range(vil.shape[0] - 1):
        if not DASK_IMPORTED or num_workers == 1:
            vil[i, :, :] = worker(vil, i)[1]
        else:
            res.append(dask.delayed(worker)(vil, i))

    if DASK_IMPORTED and num_workers > 1:
        num_workers_ = len(res) if num_workers > len(res) else num_workers
        vil_e = dask.compute(*res, num_workers=num_workers_)
        for i in range(len(vil_e)):
            vil[vil_e[i][0], :] = vil_e[i][1]

    # compute the final mask as the intersection of the masks of the advected
    # fields
    mask = np.isfinite(vil[0, :])
    for i in range(1, vil.shape[0]):
        mask = np.logical_and(mask, np.isfinite(vil[i, :]))

    if rainrate is None and apply_rainrate_mask:
        rainrate_mask = np.logical_and(rainrate_mask, mask)

    # apply cascade decomposition to the advected input fields
    bp_filter_method = cascade.get_method("gaussian")
    bp_filter = bp_filter_method((m, n), n_cascade_levels, **filter_kwargs)

    fft = utils.get_method(fft_method, shape=vil.shape[1:], n_threads=num_workers)

    decomp_method, recomp_method = cascade.get_method("fft")

    vil_dec = np.empty((n_cascade_levels, vil.shape[0], m, n))
    for i in range(vil.shape[0]):
        vil_ = vil[i, :].copy()
        vil_[~np.isfinite(vil_)] = 0.0
        vil_dec_i = decomp_method(vil_, bp_filter, fft_method=fft)
        for j in range(n_cascade_levels):
            vil_dec[j, i, :] = vil_dec_i["cascade_levels"][j, :]

    # compute time-lagged correlation coefficients for the cascade levels of
    # the advected and differenced input fields
    gamma = np.empty((n_cascade_levels, ar_order, m, n))
    for i in range(n_cascade_levels):
        vil_diff = np.diff(vil_dec[i, :], axis=0)
        vil_diff[~np.isfinite(vil_diff)] = 0.0
        for j in range(ar_order):
            gamma[i, j, :] = _moving_window_corrcoef(
                vil_diff[-1, :], vil_diff[-(j + 2), :], ar_window_radius
            )

    if ar_order == 2:
        # if the order of the ARI model is 2, adjust the correlation coefficients
        # so that the resulting process is stationary
        for i in range(n_cascade_levels):
            gamma[i, 1, :] = autoregression.adjust_lag2_corrcoef2(
                gamma[i, 0, :], gamma[i, 1, :]
            )

    # estimate the parameters of the ARI models
    phi = []
    for i in range(n_cascade_levels):
        if ar_order > 2:
            phi_ = autoregression.estimate_ar_params_yw_localized(gamma[i, :], d=1)
        elif ar_order == 2:
            phi_ = _estimate_ar2_params(gamma[i, :])
        else:
            phi_ = _estimate_ar1_params(gamma[i, :])
        phi.append(phi_)

    vil_dec = vil_dec[:, -(ar_order + 1) :, :]

    if measure_time:
        init_time = time.time() - starttime_init

    print("Starting nowcast computation.")

    if measure_time:
        starttime_mainloop = time.time()

    r_f = []
    dp = None
    for t in range(n_timesteps):
        print("Computing nowcast for time step %d... " % (t + 1), end="", flush=True)

        if measure_time:
            starttime = time.time()

        # iterate the ARI models for each cascade level
        for i in range(n_cascade_levels):
            vil_dec[i, :] = autoregression.iterate_ar_model(vil_dec[i, :], phi[i])

        # recompose the cascade to obtain the forecast field
        vil_dec_dict = {}
        vil_dec_dict["cascade_levels"] = vil_dec[:, -1, :]
        vil_dec_dict["domain"] = "spatial"
        vil_dec_dict["normalized"] = False
        vil_f = recomp_method(vil_dec_dict)
        vil_f[~mask] = np.nan

        if rainrate is not None:
            # convert VIL to rain rate
            r_f_ = r_vil_a * vil_f + r_vil_b
        else:
            r_f_ = vil_f
            if apply_rainrate_mask:
                r_f_[rainrate_mask] = 0.0

        r_f_[r_f_ < 0.0] = 0.0

        # extrapolate to the current nowcast lead time
        extrap_kwargs.update(
            {
                "displacement_prev": dp,
                "return_displacement": True,
                "allow_nonfinite_values": True,
            }
        )
        r_f_, dp = extrapolator(r_f_, velocity, 1, **extrap_kwargs)

        if measure_time:
            print("%.2f seconds." % (time.time() - starttime))
        else:
            print("done.")

        r_f.append(r_f_[-1])

    if measure_time:
        mainloop_time = time.time() - starttime_mainloop

    if measure_time:
        return np.stack(r_f), init_time, mainloop_time
    else:
        return np.stack(r_f)