def __init__(self, variable: str, param: op.Param(), dloader: xp.DataLoader()): self.__variable = variable self.__param = param self.__dloader = dloader #set these self.__data = self.fetch_data() self.__table = self.__dloader.table # self.__model = None self.__forecast = None self.__cv_metrics = cv.CVMetrics() self.__trained = self.trainer() self.__validated = self.validator()
df[col + 'k'] = smooth(df[col + 'k'], w, 3) #doubling time df[col + 'T'] = np.log(2) / df[col + 'k'] #df.loc[df[col+'T'] > 100, col+'T'] = 0 # 2nd Derivative df[col + 'D2'] = np.gradient(np.gradient(df[col])) df[col + 'D2'] = smooth(df[col + 'D2'], w, 5) #growth factor df[col + 'GF'] = growth_factor(df[col]) df[col + 'GF'] = smooth(df[col + 'GF'], w, 3) # df[i]/df[i-1] = growth ratio df[col + 'GR'] = growth_ratio(df[col]) df[col + 'GR'] = smooth(df[col + 'GR'], w, 5) return df ## country = "Germany" top = 10 ld = xp.DataLoader(top=top) df = ld.covid_data cv = country_view(df, country) cv = augment_view(cv, "Confirmed") plot_view(cv, "Confirmed", country, True) print(country, "\n", cv)
import numpy as np import proc as xp import optparam as op import prophet_trainer as pt ds = xp.DataLoader().train_ds_confirmed tb = xp.DataLoader().table floor_points = [0]#, 10e3, 20e3, 50e3] cap_lower = 700e3 cap_upper = 1200e3 cap_step = 100e3 cap_benchpoints = np.arange(cap_lower, cap_upper+1, cap_step).tolist() seasonality_modes = ['multiplicative', 'additive'] future_periods = [21] changepoint_prior_scales = [0.05, 0.5] interval_widths = [0.90, 0.95] print("Logistic scan points:", cap_benchpoints) #create set of params optparams = [] for cap in cap_benchpoints: for floor in floor_points: for smode in seasonality_modes: for periods in future_periods: for cpps in changepoint_prior_scales: for iw in interval_widths:
import proc as xp import xquery as xq q0 = xq.Query("All Period", "Confirmed > 0 and Date < '2021-01-01'") ld = xp.DataLoader(query=q0) ld.reporter()
query_raw = xq.Query("Base", "Date > '2020-03-01'") query_derived = xq.Query("Recent", "") #RecentDays <=15 ") y_data_label = 'Confirmed All' asia = ['Mainland China', 'South Korea', 'Iran'] europe = ['Germany', 'UK', 'Italy', 'Spain', 'France'] #, 'Greece']#, 'Cyprus'] amerika = ['US'] countries = europe + amerika # + asia #countries = ['Germany'] fig, ax = plt.subplots(figsize=(15, 7)) for country in countries: dloader = xp.DataLoader(query=query_raw, countries=[country]) df = dloader.leaders if query_derived.query: df = df.query(query_derived.query) x = df['Days'].to_numpy() dx = x[1] - x[0] y = df[y_data_label].to_numpy() dy = np.gradient(y, dx) k = dy / y T = np.log(2) / k T[T < 0] = 0 plt.plot(x, T, label=country) axes = plt.gca()
import proc as xp import pandas as pd import tools as xt import xquery as xq odir = 'images/predictions' #query qAll = xq.Query("All Period", "Confirmed > 0 and Date < '2021-01-01'") qGerm = xq.Query("Germany", "Confirmed > 0 and Country == 'Germany'") #query = qAll; tag = "" query = qGerm tag = "Germany" #data loader dloader = xp.DataLoader(query=query, arima=True) df = dloader.train_ds_confirmed['Confirmed'] model = pm.auto_arima( df.values, start_p=1, start_q=1, test='adf', # use adftest to find optimal 'd' max_p=4, # maximum p max_q=4, # maximum q m=1, # frequency of series d=1, # let model determine 'd' seasonal=False, # No Seasonality start_P=1, D=0,
def fit(country = '', query_raw = None, query_der = None, do_1st_order = True, do_2nd_order = False, show = True): odir = 'images/doubling_time' y_data_label = 'Confirmed All' _countries = [] _countries.append(country) print(_countries) dloader = xp.DataLoader(query = query_raw, countries = _countries) df = dloader.leaders.query( query_der.query ) print("Fit:", df.head()) x_data = np.flip( df['Days'].to_numpy() ) y_data = np.flip( df[y_data_label].to_numpy() ) for i in range(0, len(x_data)): print( x_data[i], y_data[i]) nstd = 1 # to draw 5-sigma intervals #plot fig, ax = plt.subplots(figsize=(15,7)) if do_1st_order: params_opt1, params_cov1 = optimize.curve_fit(f = fitfunc1, xdata = x_data, ydata = y_data, p0=[1, 0.1]) a1, b1 = params_opt1[0], params_opt1[1] params_err1 = perrors(params_cov1) da1, db1 = errors(params_cov1) params_opt_up1 = params_opt1 + nstd * params_err1 params_opt_down1 = params_opt1 - nstd * params_err1 fit_nom1 = fitfunc1(x_data, *params_opt1) fit_up1 = fitfunc1(x_data, *params_opt_up1) fit_down1 = fitfunc1(x_data, *params_opt_down1) #doubling times r = np.log(2) / b1 dr = r * db1 / b1 print("1st order") print("Opt params", params_opt1) print("Opt param errors", da1, db1) print("Opt params up", params_opt_up1) print("Opt params down", params_opt_down1) print("Doubling time with 1st order") print("%.2f +/- %.2f"%(r, dr)) ax.fill_between(x = x_data, y1 = np.array(fit_up1), y2 = np.array(fit_down1), alpha = .25, color = 'red', label = "%d-$\sigma$ interval"%(nstd)) plt.plot(x_data, fit_nom1, label='fit: a=%5.3f, b=%5.3f' % tuple(params_opt1), color = 'red') if do_2nd_order: params_opt2, params_cov2 = optimize.curve_fit(f = fitfunc2, xdata = x_data, ydata = y_data, p0=[1, 0.1, 0.001], maxfev=1000) a2, b2, c2 = params_opt2[0], params_opt2[1], params_opt2[2] params_err2 = perrors(params_cov2) da2, db2, dc2 = errors(params_cov2) params_opt_up2 = params_opt2 + nstd * params_err2 params_opt_down2 = params_opt2 - nstd * params_err2 fit_nom2 = fitfunc2(x_data, *params_opt2) fit_up2 = fitfunc2(x_data, *params_opt_up2) fit_down2 = fitfunc2(x_data, *params_opt_down2) r1 = (-a2 + np.sqrt(a2**2 + 4*b2*np.log(2)) ) / (2*b2) r2 = (-a2 - np.sqrt(a2**2 + 4*b2*np.log(2)) ) / (2*b2) print("2nd order") print("Opt params", params_opt2) print("Opt param errors", da2, db2, dc2) print("Opt params up", params_opt_up2) print("Opt params down", params_opt_down2) print("Doubling times with 2nd order") print(r1) print(r2) ax.fill_between(x = x_data, y1 = np.array(fit_up2), y2 = np.array(fit_down2), alpha = .25, color = 'blue', label = "%d-$\sigma$ interval"%(nstd)) plt.plot(x_data, fit_nom2, label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(params_opt2), color = 'blue') plt.scatter(x_data, y_data, label="Data", color = 'black') plt.ylabel(y_data_label) plt.xlabel('Day') plt.legend(loc='best') plt.legend(loc='upper left',fontsize=18) plt.tight_layout() xt.save(fig, xt.name(odir, country.replace(" ", "_") )) if show: plt.show() plt.close('all') return r, dr
#output dir odir = 'images/predictions' #queries - cuts q1 = xq.Query("Subperiod", "Confirmed > 0 and Date > '2020-02-15' and Date < '2021-01-01'") qMort = xq.Query("Subperiod", "Confirmed > 0 and Date > '2020-02-20' and Date < '2021-01-01'") qAll = xq.Query("All Period", "Confirmed > 0 and Date < '2021-01-01'") qGerm = xq.Query("Germany", "Confirmed > 0 and Country == 'Germany'") #tag = ""; query = qMort; logparams = logparamsGlobal tag = ""; query = qAll; logparams = logparamsGlobal #tag = "Germany"; query = qGerm; logparams = logparamsGerm #data loader dloader = xp.DataLoader(query = query, logistic_params = logparams, prophet = True) #forecasting periods periods = 21 #prediction for confirmed - logistic if confirmed_logistic: param = op.Param(growth = 'logistic', floor = logparams['Confirmed'].floor, cap = logparams['Confirmed'].cap, smode = "additive", periods = periods, cpps = 0.05, iw = 0.95) train = pt.ProphetTrainer("Confirmed", param, dloader)
plot_acf(df, ax=axes[0, 1]) # 1st Differencing axes[1, 0].plot(df.diff()) axes[1, 0].set_title('1st Order Differencing') plot_acf(df.diff().dropna(), ax=axes[1, 1]) # 2nd Differencing axes[2, 0].plot(df.diff().diff()) axes[2, 0].set_title('2nd Order Differencing') plot_acf(df.diff().diff().dropna(), ax=axes[2, 1]) return fig, axes odir = 'images/predictions' ld = xp.DataLoader(arima=True) data = ld.train_ds_confirmed print("arima data:\n", data) ## test statistic stationary_test_stat(data['Confirmed']) ## data data_train = data.iloc[ : int(data.shape[0]*0.90) ] data_valid = data.iloc[ int(data.shape[0]*0.90) : ] print('Training %d, Validation %d' % (len(data_train), len(data_valid))) data_train_log = np.log(data_train["Confirmed"]) data_pred = data_valid.copy()