def save_MCMC_sampling(df, column, trace, pastdays, interval=0.95, start=0): interval_frac = int(interval * 100) sampling_mean = np.mean(trace['r_t'], axis=0) df[f'{column}_Rt_MCMC_pastdays_{pastdays:03d}'] = padnan( sampling_mean, (start, pastdays)) #credible interval sampling_hdi = pm.stats.hpd(trace['r_t'], hdi_prob=interval) df[f'{column}_Rt_MCMC_HDI_{interval_frac}_min_pastdays_{pastdays:03d}'] = padnan( sampling_hdi[:, 0], (start, pastdays)) df[f'{column}_Rt_MCMC_HDI_{interval_frac}_max_pastdays_{pastdays:03d}'] = padnan( sampling_hdi[:, 1], (start, pastdays))
def predict_next_value(X, use_last_values=None, search_steps=100): if not use_last_values: use_last_values = X.shape[0] search_alpha = np.linspace(0, 10., search_steps) pad = 3 smapes = [] x = X[-use_last_values:] for alpha in search_alpha: tik = TikhonovRegularization(timesteps=len(x), alpha=alpha) x_tik = tik.stat_smooth_data(x, verbose=False) x_pred = padnan(next_from_taylor(x_tik), (1, 0)) pred_smape = smape(x[pad:], x_pred[pad:-1]) * 100 smapes.append(pred_smape) alpha = search_alpha[np.argmin(smapes)] tik = TikhonovRegularization(timesteps=len(x), alpha=alpha) x_tik = tik.stat_smooth_data(x, verbose=False) x_pred = padnan(next_from_taylor(x_tik), (X.shape[0] - use_last_values + 1, 0)) return x_pred
def RSVD_smooth_data(df, alpha, beta, season_period=7, trend_alpha=100., difference_degree=2): initial_cols = df.columns filter_columns = [ 'newCasesByPublishDate', ] prettyprint.pprint(filter_columns) for col in filter_columns: smoothcol = col + '_deseason' print(smoothcol) lrsvd = LogSeasonalRegularizer(df[col], season_period=season_period, max_r=season_period, trend_alpha=trend_alpha, difference_degree=difference_degree, verbose=True) m = lrsvd.fit() print(f'patterns: {m.final_r}') df[f'{smoothcol}'] = m.deseasoned df[f'{smoothcol}_seasonality'] = m.season_svd df[f'{smoothcol}_smoothed'] = m.trend df[f'{smoothcol}_residuals'] = m.residuals df[f'{smoothcol}_relative_residuals'] = m.relative_residuals df[f'{smoothcol}_smoothed_Rt'] = padnan( naive.compute_Rt(df[f'{smoothcol}_smoothed'].dropna(), alpha=alpha, beta=beta), (m.padding_left, 0)) prettyprint.pprint(lrsvd.adfuller()) print('new columns generated:') prettyprint.pprint([c for c in df.columns if c not in initial_cols])
def draw_expanded_series(X, draws, season_period, trend_alpha, difference_degree, truncate, alpha, beta, method='future_range', lower_ratio=0.2, upper_ratio=1.2, res_window=None, verbose=False): assert (method in ['future_range', 'residuals']) if type(X) == pd.Series: X = X.to_numpy() # res_window if not res_window: res_window = season_period # deseason: lrsvd = LogSeasonalRegularizer(X, season_period=season_period, max_r=season_period, trend_alpha=trend_alpha, difference_degree=difference_degree, verbose=verbose) m = lrsvd.fit() # truncate means that, AFTER deseasoning, we drop the last element: # in this way, deseasoning is affected by the additional element in # the original series, while we drop the last result as it is in the future if truncate: sl = np.s_[:-1] else: sl = np.s_[:] T, S, eps_rel = m.trend[sl], m.season_svd[sl], m.relative_residuals[sl] _, _, S_hat = LogSeasonalRegularizer.periods_to_matrix(S, season_period) #print(S_hat[-2:,:]) # compute Rt on T rt = padnan(Rt.naive.compute_Rt(T[m.padding_left:], alpha, beta), (m.padding_left, 0)) # predict next T value T_next = predict_next_case(T, rt, alpha, beta)[-1] # predict next S value # we need the season of tomorrow # the season of today is the last column in S_hat # hence -> the season of tomorrow is the first column, as seasons are periodic S_tomorrow = S_hat[:, 0] # predict the next value of S_tomorrow S_tomorrow_next = predict_next_value(S_tomorrow, use_last_values=15)[-1] # compute the next X value if method == 'future_range': # draws multiple X_next based on range applied to last T_next lower, upper = T_next * lower_ratio, T_next * upper_ratio mu, sigma = T_next, T_next possible_T_next = stats.truncnorm((lower - mu) / sigma, (upper - mu) / sigma, loc=mu, scale=sigma) X_next = S_tomorrow_next + possible_T_next.rvs((draws, 1)) elif method == 'residuals': # draw multiple eps based on eps_rel if res_window == 1: eps_mean = 0. eps_sigma = np.abs(eps_rel[-1]) else: eps_mean = eps_rel[-res_window:].mean() eps_sigma = eps_rel[-res_window:].std() eps_rel_draw = np.random.normal(loc=eps_mean, scale=eps_sigma, size=(draws, 1)) eps_draw = eps_rel_draw * T_next X_next = T_next + S_tomorrow_next + eps_draw # expand the original X series and return it X_expanded = np.repeat(X[sl].reshape((1, -1)), draws, axis=0) X_expanded = np.append(X_expanded, X_next, axis=1) return X_expanded
def predict_next_case(cases, rt, alpha, beta): return (Rt.naive.infectious_charge( np.nan_to_num(padnan(cases, (0, 1)), nan=0.), alpha=alpha, beta=beta)[1:] * next_from_taylor(rt))
def next_from_taylor(x): return 2.5 * x - 2. * padnan(x[:-1], (1, 0)) + 0.5 * padnan(x[:-2], (2, 0))