def test_estimate_ar_params_yw_localized(): R = _create_data_univariate() for p in range(1, 4): gamma = correlation.temporal_autocorrelation(R[-(p + 1):], window="gaussian", window_radius=50) phi = autoregression.estimate_ar_params_yw_localized(gamma) assert len(phi) == p + 1 for i in range(len(phi)): assert phi[i].shape == R.shape[1:]
def forecast( vil, velocity, n_timesteps, rainrate=None, n_cascade_levels=8, extrap_method="semilagrangian", ar_order=2, ar_window_radius=50, r_vil_window_radius=3, fft_method="numpy", apply_rainrate_mask=True, num_workers=1, extrap_kwargs=None, filter_kwargs=None, measure_time=False, ): """Generate a nowcast by using the autoregressive nowcasting using VIL (ANVIL) method. ANVIL is built on top of an extrapolation-based nowcast. The key features are: 1) Growth and decay: implemented by using a cascade decomposition and a multiscale autoregressive integrated ARI(p,1) model. Instead of the original time series, the ARI model is applied to the differenced one corresponding to time derivatives. 2) Originally designed for using integrated liquid (VIL) as the input data. In this case, the rain rate (R) is obtained from VIL via an empirical relation. This implementation is more general so that the input can be any two-dimensional precipitation field. 3) The parameters of the ARI model and the R(VIL) relation are allowed to be spatially variable. The estimation is done using a moving window. Parameters ---------- vil : array_like Array of shape (ar_order+2,m,n) containing the input fields ordered by timestamp from oldest to newest. The inputs are expected to contain VIL or rain rate. The time steps between the inputs are assumed to be regular. velocity : array_like Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the inputs. All values are required to be finite. n_timesteps : int Number of time steps to forecast. rainrate : array_like Array of shape (m,n) containing the most recently observed rain rate field. If set to None, no R(VIL) conversion is done and the outputs are in the same units as the inputs. n_cascade_levels : int, optional The number of cascade levels to use. extrap_method : str, optional Name of the extrapolation method to use. See the documentation of pysteps.extrapolation.interface. ar_order : int, optional The order of the autoregressive model to use. The recommended values are 1 or 2. Using a higher-order model is strongly discouraged because the stationarity of the AR process cannot be guaranteed. ar_window_radius : int, optional The radius of the window to use for determining the parameters of the autoregressive model. Set to None to disable localization. r_vil_window_radius : int, optional The radius of the window to use for determining the R(VIL) relation. Applicable if rainrate is not None. fft_method : str, optional A string defining the FFT method to use (see utils.fft.get_method). Defaults to 'numpy' for compatibility reasons. If pyFFTW is installed, the recommended method is 'pyfftw'. apply_rainrate_mask : bool Apply mask to prevent producing precipitation to areas where it was not originally observed. Defaults to True. Disabling this may improve some verification metrics but increases the number of false alarms. Applicable if rainrate is None. num_workers : int, optional The number of workers to use for parallel computation. Applicable if dask is installed or pyFFTW is used for computing the FFT. When num_workers>1, it is advisable to disable OpenMP by setting the environment variable OMP_NUM_THREADS to 1. This avoids slowdown caused by too many simultaneous threads. extrap_kwargs : dict, optional Optional dictionary containing keyword arguments for the extrapolation method. See the documentation of pysteps.extrapolation. filter_kwargs : dict, optional Optional dictionary containing keyword arguments for the filter method. See the documentation of pysteps.cascade.bandpass_filters.py. measure_time : bool, optional If True, measure, print and return the computation time. Returns ------- out : ndarray A three-dimensional array of shape (n_timesteps,m,n) containing a time series of forecast precipitation fields. The time series starts from t0+timestep, where timestep is taken from the input VIL/rain rate fields. If measure_time is True, the return value is a three-element tuple containing the nowcast array, the initialization time of the nowcast generator and the time used in the main loop (seconds). References ---------- :cite:`PCLH2020` """ if len(vil.shape) != 3: raise ValueError( "vil.shape = %s, but a three-dimensional array expected" % str(vil.shape) ) if rainrate is not None: if len(rainrate.shape) != 2: raise ValueError( "rainrate.shape = %s, but a two-dimensional array expected" % str(rainrate.shape) ) if vil.shape[0] != ar_order + 2: raise ValueError( "vil.shape[0] = %d, but vil.shape[0] = ar_order + 2 = %d required" % (vil.shape[0], ar_order + 2) ) if len(velocity.shape) != 3: raise ValueError( "velocity.shape = %s, but a three-dimensional array expected" % str(velocity.shape) ) if extrap_kwargs is None: extrap_kwargs = dict() if filter_kwargs is None: filter_kwargs = dict() print("Computing ANVIL nowcast:") print("------------------------") print("") print("Inputs:") print("-------") print("input dimensions: %dx%d" % (vil.shape[1], vil.shape[2])) print("") print("Methods:") print("--------") print("extrapolation: %s" % extrap_method) print("FFT: %s" % fft_method) print("") print("Parameters:") print("-----------") print("number of time steps: %d" % n_timesteps) print("parallel threads: %d" % num_workers) print("number of cascade levels: %d" % n_cascade_levels) print("order of the ARI(p,1) model: %d" % ar_order) if type(ar_window_radius) == int: print("ARI(p,1) window radius: %d" % ar_window_radius) else: print("ARI(p,1) window radius: none") print("R(VIL) window radius: %d" % r_vil_window_radius) if measure_time: starttime_init = time.time() m, n = vil.shape[1:] vil = vil.copy() if rainrate is None and apply_rainrate_mask: rainrate_mask = vil[-1, :] < 0.1 if rainrate is not None: # determine the coefficients fields of the relation R=a*VIL+b by # localized linear regression r_vil_a, r_vil_b = _r_vil_regression(vil[-1, :], rainrate, r_vil_window_radius) # transform the input fields to Lagrangian coordinates by extrapolation extrapolator = extrapolation.get_method(extrap_method) res = list() def worker(vil, i): return ( i, extrapolator( vil[i, :], velocity, vil.shape[0] - 1 - i, allow_nonfinite_values=True, **extrap_kwargs, )[-1], ) for i in range(vil.shape[0] - 1): if not DASK_IMPORTED or num_workers == 1: vil[i, :, :] = worker(vil, i)[1] else: res.append(dask.delayed(worker)(vil, i)) if DASK_IMPORTED and num_workers > 1: num_workers_ = len(res) if num_workers > len(res) else num_workers vil_e = dask.compute(*res, num_workers=num_workers_) for i in range(len(vil_e)): vil[vil_e[i][0], :] = vil_e[i][1] # compute the final mask as the intersection of the masks of the advected # fields mask = np.isfinite(vil[0, :]) for i in range(1, vil.shape[0]): mask = np.logical_and(mask, np.isfinite(vil[i, :])) if rainrate is None and apply_rainrate_mask: rainrate_mask = np.logical_and(rainrate_mask, mask) # apply cascade decomposition to the advected input fields bp_filter_method = cascade.get_method("gaussian") bp_filter = bp_filter_method((m, n), n_cascade_levels, **filter_kwargs) fft = utils.get_method(fft_method, shape=vil.shape[1:], n_threads=num_workers) decomp_method, recomp_method = cascade.get_method("fft") vil_dec = np.empty((n_cascade_levels, vil.shape[0], m, n)) for i in range(vil.shape[0]): vil_ = vil[i, :].copy() vil_[~np.isfinite(vil_)] = 0.0 vil_dec_i = decomp_method(vil_, bp_filter, fft_method=fft) for j in range(n_cascade_levels): vil_dec[j, i, :] = vil_dec_i["cascade_levels"][j, :] # compute time-lagged correlation coefficients for the cascade levels of # the advected and differenced input fields gamma = np.empty((n_cascade_levels, ar_order, m, n)) for i in range(n_cascade_levels): vil_diff = np.diff(vil_dec[i, :], axis=0) vil_diff[~np.isfinite(vil_diff)] = 0.0 for j in range(ar_order): gamma[i, j, :] = _moving_window_corrcoef( vil_diff[-1, :], vil_diff[-(j + 2), :], ar_window_radius ) if ar_order == 2: # if the order of the ARI model is 2, adjust the correlation coefficients # so that the resulting process is stationary for i in range(n_cascade_levels): gamma[i, 1, :] = autoregression.adjust_lag2_corrcoef2( gamma[i, 0, :], gamma[i, 1, :] ) # estimate the parameters of the ARI models phi = [] for i in range(n_cascade_levels): if ar_order > 2: phi_ = autoregression.estimate_ar_params_yw_localized(gamma[i, :], d=1) elif ar_order == 2: phi_ = _estimate_ar2_params(gamma[i, :]) else: phi_ = _estimate_ar1_params(gamma[i, :]) phi.append(phi_) vil_dec = vil_dec[:, -(ar_order + 1) :, :] if measure_time: init_time = time.time() - starttime_init print("Starting nowcast computation.") if measure_time: starttime_mainloop = time.time() r_f = [] dp = None for t in range(n_timesteps): print("Computing nowcast for time step %d... " % (t + 1), end="", flush=True) if measure_time: starttime = time.time() # iterate the ARI models for each cascade level for i in range(n_cascade_levels): vil_dec[i, :] = autoregression.iterate_ar_model(vil_dec[i, :], phi[i]) # recompose the cascade to obtain the forecast field vil_dec_dict = {} vil_dec_dict["cascade_levels"] = vil_dec[:, -1, :] vil_dec_dict["domain"] = "spatial" vil_dec_dict["normalized"] = False vil_f = recomp_method(vil_dec_dict) vil_f[~mask] = np.nan if rainrate is not None: # convert VIL to rain rate r_f_ = r_vil_a * vil_f + r_vil_b else: r_f_ = vil_f if apply_rainrate_mask: r_f_[rainrate_mask] = 0.0 r_f_[r_f_ < 0.0] = 0.0 # extrapolate to the current nowcast lead time extrap_kwargs.update( { "displacement_prev": dp, "return_displacement": True, "allow_nonfinite_values": True, } ) r_f_, dp = extrapolator(r_f_, velocity, 1, **extrap_kwargs) if measure_time: print("%.2f seconds." % (time.time() - starttime)) else: print("done.") r_f.append(r_f_[-1]) if measure_time: mainloop_time = time.time() - starttime_mainloop if measure_time: return np.stack(r_f), init_time, mainloop_time else: return np.stack(r_f)