def VAR_IRF(df, n=10, future=20): m = VAR(df) m.select_order(n) n = int(input('order:')) model = m.fit(maxlags=n) print('\n\n', model.summary()) model.irf(10).plot()
def decide_degree_best(self): # make a VAR model model = VAR(self.X) model.select_order(15) # determine the optimal VAR model order using AIC print(model.select_order(15)) results = model.fit(maxlags=15, ic='aic') print(results.summary())
def vector_autoregression_example(): mdata = sm.datasets.macrodata.load_pandas().data # Prepare the dates index. dates = mdata[['year', 'quarter']].astype(int).astype(str) quarterly = dates['year'] + 'Q' + dates['quarter'] quarterly = dates_from_str(quarterly) mdata = mdata[['realgdp', 'realcons', 'realinv']] mdata.index = pd.DatetimeIndex(quarterly) data = np.log(mdata).diff().dropna() # Make a VAR model. model = VAR(data) results = model.fit(2) print(results.summary()) # Plots input time series. results.plot() # Plots time series autocorrelation function. results.plot_acorr() # Lag order selection. model.select_order(15) results = model.fit(maxlags=15, ic='aic') # Forecast. lag_order = results.k_ar results.forecast(data.values[-lag_order:], 5) results.plot_forecast(10) # Impulse response analysis. # Impulse responses are the estimated responses to a unit impulse in one of the variables. # They are computed in practice using the MA(infinity) representation of the VAR(p) process. irf = results.irf(10) irf.plot(orth=False) irf.plot(impulse='realgdp') irf.plot_cum_effects(orth=False) # Forecast error variance decomposition (FEVD). fevd = results.fevd(5) print(fevd.summary()) results.fevd(20).plot() # Statistical tests. # Granger causality. results.test_causality('realgdp', ['realinv', 'realcons'], kind='f') # Normality. results.test_normality() # Whiteness of residuals. results.test_whiteness()
def VARprocess(df, log=False): # Log transformation, relative difference and drop NULL values if (log): df = np.log(df + 0.1).diff().dropna() # Vector Autoregression Process generation maxAttr = len(df.columns) # Find the right lag order orderFound = False while orderFound != True: try: model = VAR(df.ix[:, 0:maxAttr]) order = model.select_order() orderFound = True except: exc_type, exc_obj, exc_tb = sys.exc_info() if str(exc_obj) == "data already contains a constant.": maxAttr = maxAttr - 1 else: maxAttr = int(str(exc_obj).split("-th")[0]) - 1 print "Exception, reducing to n_attributes ", maxAttr orderFound = False n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1] method = max(order.iteritems(), key=operator.itemgetter(1))[0] print "n_lags ", n_lags print "method ", method results = model.fit(maxlags=n_lags, ic=method) return results
def VARprocess(df,log=False): # Log transformation, relative difference and drop NULL values if (log): df = np.log(df+0.1).diff().dropna() # Vector Autoregression Process generation maxAttr = len(df.columns) # Find the right lag order orderFound = False while orderFound!=True: try: model = VAR(df.ix[:,0:maxAttr]) order = model.select_order() orderFound = True except: exc_type, exc_obj, exc_tb = sys.exc_info() if str(exc_obj)=="data already contains a constant.": maxAttr = maxAttr - 1 else: maxAttr = int(str(exc_obj).split("-th")[0])-1 print "Exception, reducing to n_attributes ",maxAttr orderFound = False n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1] method = max(order.iteritems(), key=operator.itemgetter(1))[0] print "n_lags ",n_lags print "method ",method results = model.fit(maxlags=n_lags, ic=method) return results
def compute_pair_metrics(security, candidates): security = security.div(security.iloc[0]) ticker = security.name candidates = candidates.div(candidates.iloc[0]) spreads = candidates.sub(security, axis=0) n, m = spreads.shape X = np.ones(shape=(n, 2)) X[:, 1] = np.arange(1, n + 1) drift = (( np.linalg.inv(X.T @ X) @ X.T @ spreads).iloc[1].to_frame('drift')) vol = spreads.std().to_frame('vol') corr_ret = (candidates.pct_change().corrwith( security.pct_change()).to_frame('corr_ret')) corr = candidates.corrwith(security).to_frame('corr') metrics = drift.join(vol).join(corr).join(corr_ret).assign(n=n) tests = [] for candidate, prices in candidates.items(): df = pd.DataFrame({'s1': security, 's2': prices}) var = VAR(df.values) lags = var.select_order() # select VAR order k_ar_diff = lags.selected_orders['aic'] # Johansen Test with constant Term and estd. lag order cj0 = coint_johansen(df, det_order=0, k_ar_diff=k_ar_diff) # Engle-Granger Tests t1, p1 = coint(security, prices, trend='c')[:2] t2, p2 = coint(prices, security, trend='c')[:2] tests.append([ticker, candidate, t1, p1, t2, p2, k_ar_diff, *cj0.lr1]) columns = [ 's1', 's2', 't1', 'p1', 't2', 'p2', 'k_ar_diff', 'trace0', 'trace1' ] tests = pd.DataFrame(tests, columns=columns).set_index('s2') return metrics.join(tests)
def deocmpose(self): dataset = pd.read_csv('Raotbl6.csv', index_col=0) num_vars = dataset.shape[1] fig, axes = pyplot.subplots(num_vars, 1, figsize=(16, 12)) for i in range(num_vars): col = dataset.columns[i] axes[i].plot(dataset[col]) axes[i].set_xticks([], []) axes[i].set_title(col) # Apply VAR model to data set, summarize model = VAR(dataset) var_selected = model.select_order(maxlags=10) print(var_selected.summary()) # Select model with highest AIC value model_fitted = model.fit(1) print(model_fitted.summary()) lag_order = model_fitted.k_ar forecast_input = dataset.values[-lag_order:] quarters_to_predict = 24 predicted_values = model_fitted.forecast(forecast_input, quarters_to_predict) dataset_dates = [ datetime.datetime.strptime(date, '%Y-%m-%d') for date in dataset.index ] last_date = dataset_dates[-1] predicted_dates = [] for _ in range(quarters_to_predict): next_date = last_date + dateutil.relativedelta.relativedelta( months=3) predicted_dates.append(next_date) last_date = next_date predicted_index = [ date.strftime('%Y-%m-%d') for date in predicted_dates ] fig, axes = pyplot.subplots(num_vars, 1, figsize=(16, 12)) for i in range(num_vars): col = dataset.columns[i] axes[i].plot(dataset[col], color='blue') axes[i].plot(predicted_index, predicted_values[:, i], color='green') axes[i].set_xticks([], []) axes[i].set_title(col) pyplot.show() fit = pm.auto_arima(dataset['gdfce'], seasonal=True, stepwise=True, error_action='ignore', m=12, max_order=6) print(fit.summary())
def causality_VAR(post_ts, max_order): model = VAR(post_ts) best_lag = model.select_order(max_order, verbose= False) print 'best lag: ', best_lag result = model.fit(best_lag['aic']) return result, best_lag
def forecast_DNS_VARm(ts,pred): model = VAR(ts) x = model.select_order(maxlags=3) lag_order = x.selected_orders["bic"] #we select best model based on the BIC criterion if lag_order==0: #constrains not turning into a random walk lag_order=1 model_fitted = model.fit(lag_order) return model_fitted.forecast(ts.values[-lag_order:],pred)
def var(self, df, host): df_diffed, no_diffs = Helper.diff_test(df) print(df_diffed) df_diffed.replace([np.inf, -np.inf], np.nan) cols = df_diffed.columns df_diffed = df_diffed.dropna() print("Length : " + str(len(df_diffed))) nobs = int(len(df_diffed) / 10) + 2 train = df_diffed[:-nobs] test = df_diffed[-nobs:] #print(train) model = VAR(train) maxlags = int(nobs / 2) + 1 aic = model.select_order(maxlags).selected_orders['aic'] results = model.fit(aic) print(results.summary()) lagged_values = train.values[-maxlags:] #print(lagged_values) forecast = results.forecast(y=lagged_values, steps=nobs) idx = pd.date_range(test.first_valid_index(), periods=nobs) df_forecast = pd.DataFrame(data=forecast, index = idx, columns=cols) #print(df_forecast) df_fixed = Helper.reverse_diff(df_forecast, df, nobs, no_diffs) test_range = df[-nobs:] print("-- TEST Result -- \n") print(test_range) print("-- TEST Result END -- \n") print("-- Forecast Result -- \n") print(df_fixed) print("-- Forecast Result END -- \n") for col in df.columns: print("-- RMSE --") print(rmse(test_range[col], df_fixed[col + '_forecast'])) print("-- Mean --") print(test_range[col].mean()) df[col].plot(legend=True) df_fixed[col + '_forecast'].plot(legend=True) plt.show()
def get_optimal_lag_exper(p_src_index, src_neighbor_indices, normalized_cells_response_curve): from statsmodels.tsa.api import VAR #get the src neighbors number_of_points = len(src_neighbor_indices) optimal_lag_vector = dict() for p_dst_index in src_neighbor_indices: src_dst_data = None try: src_dst_data = normalized_cells_response_curve[ [p_src_index, p_dst_index], :] src_dst_data = np.transpose(src_dst_data) model = VAR(src_dst_data) maxlags = None lag_order_results = model.select_order(maxlags=maxlags) lags = [ lag_order_results.aic, lag_order_results.bic, lag_order_results.fpe, lag_order_results.hqic ] min_i = np.argmin(lags) model = model.fit(maxlags=lags[min_i], ic=None) p_value_whiteness = model.test_whiteness(nlags=lags[min_i]).pvalue if p_value_whiteness == float('nan') or p_value_whiteness < 0.05: raise ValueError('found autocorrelation in residuals.') #i = models[min_i].k_ar + 1 #while i < 12 * (models[min_i].nobs/100.)**(1./4): # result_auto_co = model._estimate_var(i, trend='c') # if result_auto_co.test_whiteness(nlags=i).pvalue > 0.05: # break # i += 1 # print 'error order:' + str(models[min_i].k_ar) # print 'found correlation ' + str(i) optimal_lag_vector[p_dst_index] = lags[min_i] except: print('src index: ' + str(p_src_index) + ' dst index: ' + str(p_dst_index)) if src_dst_data is not None: print(src_dst_data) raise return optimal_lag_vector
def get_optimal_lag(p_src_index, neighbor_indices, normalized_cells_response_curve): #get the src neighbors number_of_points = len(neighbor_indices) src_neighbor_indices = neighbor_indices[p_src_index] optimal_lag_vector = np.zeros((number_of_points)) for p_dst_index in src_neighbor_indices: #find the common neighbours dst_neighbor_indices = neighbor_indices[p_dst_index] disjoint_neighbours = get_disjoint_neighbours(p_src_index, p_dst_index, neighbor_indices) src_dst_data = normalized_cells_response_curve[ [p_src_index, p_dst_index], :] src_dst_data = np.transpose(src_dst_data) model = VAR(src_dst_data) maxlags = None lag_order_results = model.select_order(maxlags=maxlags) lags = [ lag_order_results.aic, lag_order_results.bic, lag_order_results.fpe, lag_order_results.hqic ] min_i = np.argmin(lags) model = model.fit(maxlags=lags[min_i], ic=None) if model.test_whiteness(nlags=lags[min_i]).pvalue < 0.05: raise ValueError('found autocorrelation in residuals.') #i = models[min_i].k_ar + 1 #while i < 12 * (models[min_i].nobs/100.)**(1./4): # result_auto_co = model._estimate_var(i, trend='c') # if result_auto_co.test_whiteness(nlags=i).pvalue > 0.05: # break # i += 1 # print 'error order:' + str(models[min_i].k_ar) # print 'found correlation ' + str(i) optimal_lag_vector[p_dst_index] = lags[min_i] break return optimal_lag_vector
def get_optimal_lag(p_src_index, neighbor_indices, normalized_cells_response_curve): #get the src neighbors number_of_points = len(neighbor_indices) src_neighbor_indices = neighbor_indices[p_src_index] optimal_lag_vector = np.zeros((number_of_points)) for p_dst_index in src_neighbor_indices: src_dst_data = normalized_cells_response_curve[ [p_src_index, p_dst_index], :] src_dst_data = np.transpose(src_dst_data) model = VAR(src_dst_data) maxlags = None lag_order_results = model.select_order(maxlags=maxlags) lags = [ lag_order_results.aic, lag_order_results.bic, lag_order_results.fpe, lag_order_results.hqic ] min_i = np.argmin(lags) var_result = model.fit(maxlags=lags[min_i], ic=None) portmanteau_test = var_result.test_whiteness(lags[min_i]).pvalue if portmanteau_test < 0.05: raise ValueError('found autocorrelation in residuals.' + str(portmanteau_test)) ''' i = lags[min_i] + 1 while i < 12 * (model.nobs/100.)**(1./4): var_result = model.fit(i, ic=None) if var_result.test_whiteness(max(10, i + 1)).pvalue >= 0.05: break i += 1 #print('error order:' + str(lags[min_i])) #print('found correlation ' + str(i)) optimal_lag_vector[p_dst_index] = i else: ''' optimal_lag_vector[p_dst_index] = lags[min_i] return optimal_lag_vector
def select_order_of_VAR_model(self): model = VAR(self.df) print("\n*********checking different orders of lag************\n") for i in [1, 2, 3, 4, 5, 6, 7, 8, 9]: result = model.fit(i) print('Lag Order =', i) print('AIC : ', result.aic) print('BIC : ', result.bic) print('FPE : ', result.fpe) print('HQIC: ', result.hqic, '\n') #alternative print("\n*********select_order method used: ************\n") x = model.select_order(maxlags=self.max_lags) print(x.summary())
def VARprocess(df, log=False): """ Description: This function applies Vector Auto Regression Input: dataframe Output: VARresults object """ # Log transformation, relative difference and drop NULL values if (log): df = np.log(df + 0.1).diff().dropna() # Vector Autoregression Process generation maxAttr = len(df.columns) # Find the right lag order orderFound = False print "7.1.0 ----- Finding an order for the VAR" maxIter = 0 while orderFound != True and maxIter < 15: maxIter = maxIter + 1 try: model = VAR(df) order = model.select_order() orderFound = True print " !!! loop stuck" except: exc_type, exc_obj, exc_tb = sys.exc_info() #if str(exc_obj)=="data already contains a constant.": maxAttr = maxAttr - 1 #else: #maxAttr = int(str(exc_obj).split("-th")[0])-1 #print "Exception, reducing to n_attributes ",maxAttr orderFound = False print "7.1.1 ----- Model fitting" if orderFound: n_lags = max(order.iteritems(), key=operator.itemgetter(1))[1] method = max(order.iteritems(), key=operator.itemgetter(1))[0] results = model.fit(maxlags=n_lags, ic=method) else: results = model.fit() return results
def temporal_detect_individual(target_idx, dta, maxlag): num_ts = len(dta[0]) len_ts = len(dta) tmp_target = [ dta[j][target_idx] for j in range(len_ts) ] res_lag = [] for i in range(num_ts): if i != target_idx: tmp_ts = [ dta[j][i] for j in range(len_ts) ] tmp_x = zip(tmp_target, tmp_ts ) # print np.shape(tmp_x) model = VAR(tmp_x) best_lag = model.select_order(maxlag, verbose= False) res_lag.append(best_lag) return res_lag
def multi_forecast( sd_log, variables, n_period): # sd_log object, variables list of features column names """ :param sd_log: sd_log object :param variables: features you would like to use for the multivariate forecast :param n_period: steps you would like to predict :return: """ max_lag = 5 # Check for stationary if sd_log.isStationary: data = sd_log.data[variables] else: data = sd_log.data_diff[0][variables] ndiff = sd_log.data_diff[1] # Split into train (0.9) and test (0.1) #data_train = data[:int(0.9*(len(data)))] #data_test = data[int(0.9*(len(data))):] model = VAR(data) # Look for minimum AIC/BIC and corresponding lag to fit model lag = min(model.select_order(maxlags=max_lag).selected_orders.values()) results = model.fit(lag) print(results.summary()) var_diagnostic(results) results.plot_forecast(n_period) lag_order = results.k_ar fc = results.forecast(data.values[-lag_order:], n_period) df_fc = pd.DataFrame(fc, index=data.index[-n_period:]) # TODO inverting forecast #inv_diff(sd_log.data[sd_log.finish_rate], data[sd_log.finish_rate], ndiff) plt.show() return df_fc
import pandas as pd from statsmodels.tsa.api import VAR from statsmodels.tsa.vector_ar.vecm import select_order series = ( pd.read_csv("../dados/series_log.csv", parse_dates=["date"], index_col=["date"]) .loc[:, ["spread", "selic", "inad", "ibc"]] .dropna() ) var = VAR(endog=series) var_model = var.fit(maxlags=4, verbose=True) print(var_model.test_whiteness(nlags=12).summary()) print(var.select_order(12).summary()) print(" & ".join(series.columns)) for linha in series.columns: resultados = [] for coluna in series.columns: test = var_model.test_causality(caused=linha, causing=coluna, kind="wald") if coluna == linha: resultados.append(" - & - ") else: resultados.append(f"{test.test_statistic:.3f} & {test.pvalue:.3f}") print(linha + " & " + " & ".join(resultados)) total = [] for linha in series.columns: resultados_total = var_model.test_causality( causing=[serie for serie in series.columns if serie != linha],
print(grangers_causation_matrix(dataFrame, variables=dataFrame.columns)) cointegration_test(dataFrame) # select the order of VAR model model = VAR(df_differenced) for i in range(1, 10): result = model.fit(i) print('Lag Order =', i) print('AIC : ', result.aic) print('BIC : ', result.bic) print('FPE : ', result.fpe) print('HQIC: ', result.hqic, '\n') x = model.select_order(maxlags=10) print(x.summary()) model_fitted = model.fit(10) print(model_fitted.summary()) # use Durbin Watson Statistic to check for errors out = durbin_watson(model_fitted.resid) for col, val in zip(dataFrame.columns, out): print(adjust(col), ':', round(val, 2)) # get the lag order lag_order = model_fitted.k_ar print(lag_order)
pred_garch.residual_variance[-1:] # ### Multvariate Regression Model # In[59]: from statsmodels.tsa.api import VAR # In[60]: df_ret = df[['ret_spx', 'ret_dax', 'ret_ftse', 'ret_nikkei']][1:] # In[61]: model_var_ret = VAR(df_ret) model_var_ret.select_order(20) results_var_ret = model_var_ret.fit(ic='aic') # In[62]: results_var_ret.summary() # In[63]: lag_order_ret = results_var_ret.k_ar var_pred_ret = results_var_ret.forecast(df_ret.values[-lag_order_ret:], len(df_test[start_date:end_date])) df_ret_pred = pd.DataFrame(data=var_pred_ret, index=df_test[start_date:end_date].index, columns=df_test[start_date:end_date].columns[4:8])
def optimal_lag(data, maxlag): model = VAR(data) result = model.select_order(maxlag) return result.aic
return model, loss.data, loss_rmse, training_hist def predict(model, inputs): outputs = model.forward(inputs) return outputs #QuantAR import statsmodels.formula.api as smf mod = smf.quantreg('y ~ x', data) res = mod.fit(q=.5) print(res.summary()) #VAR import statsmodels.api as sm from statsmodels.tsa.api import VAR, DynamicVAR model = VAR(data) results = model.fit(2) model.select_order(15) results = model.fit(maxlags=15, ic='aic') lag_order = results.k_ar results.forecast(data.values[-lag_order:], 5)
obs trainset.tail() # testset.head() #using trainset to modeling var from statsmodels.tsa.api import VAR help(VAR) modelvar = VAR(trainset) bestmodelaic =modelvar.fit(maxlags =15,ic = 'aic') bestmodelaic.summary() bestmodelbic = modelvar.fit(maxlags =15,ic = 'bic') bestmodelbic.summary() bestmodelhqic = modelvar.fit(maxlags =15,ic ='hqic') bestmodelhqic.summary() modelvar.select_order(15) #using CV to choose the best model def rolling_forecast(trainset,testset,lags): Pmse = [] forecastreturn = [] accuracys = [] ntest = len(testset) for i in range(0,ntest): if i == 0: X_in = trainset else: X_in = trainset.append(testset.iloc[:i,:]) X_out = testset.iloc[i,0]
columns=a.columns) # re-applying column names # plt.figure(figsize=(15, 5)) # plt.ylabel("Returns") # plt.plot(a_returns) # plt.show() # plt.figure(figsize=(15, 5)) # plt.ylabel("Log Value") # plt.plot(a_ts) # plt.show() from statsmodels.tsa.api import VAR model = VAR(a_diff[:'2016-01-01']) model.select_order( ) # uses information criteria to select the order of the model reg = model.fit(5) # number of AR terms to include sample = a_diff[:'2016-01-04'].values fcast = reg.forecast(y=sample, steps=10) # plt.plot(fcast[:,3]) reg.plot_forecast(20) def dediff(end, forecast): future = np.copy(forecast) for i in range(np.shape(forecast)[0]): if (i == 0): future[i] = end + forecast[0] else:
regression="c", autolag="AIC") if p_val < 0.05: unit_roots.append(fw_1w_prices) i = 1 for fw_1w_prices_1 in unit_roots: ## Set up nested for loop like this to avoid testing twice for cointegration on any two pairs for fw_1w_prices_2 in unit_roots[i:]: if not fw_1w_prices_1.equals(fw_1w_prices_2): grouped_fw_1w_prices = (pd.concat( (fw_1w_prices_1, fw_1w_prices_2), axis=1)) ## VAR model model = VAR(grouped_fw_1w_prices) ## Optimal VAR(p) lag structure p = find_max_lag_var(model.select_order(5).summary().data[1:]) ## Include p-1 lags in cointegration test for control for any existing autocorrelation in u_t (disturbance term) coint_result = coint_johansen(grouped_fw_1w_prices, 1, max(p - 1, 0)) ## Statistically significant proof of cointegration at 5% level if find_coint_relationship(trace_stats=coint_result.lr1, crit_vals=coint_result.cvm) == 1: coint_relationships.append(grouped_fw_1w_prices) i += 1 if coint_relationships: for relationship in coint_relationships: fw_1w_prices_1, fw_1w_prices_2 = relationship.iloc[:, 0], relationship.iloc[:, 1] N = len(fw_1w_prices_1.index)
pd.Series(names).to_hdf('data.h5', 'tickers') corr = pd.DataFrame(index=stocks.columns) for etf, data in etfs.items(): corr[etf] = stocks.corrwith(data) #cmap = sns.diverging_palette( 220, 10, as_cmap = True) #sns.clustermap( corr, cmap= cmap, center = 0) #stocks.shape security = etfs['AAXJ.US'].loc['2010':'2020'] candidates = stocks.loc['2010':'2020'] security = security.div(security.iloc[0]) candidates = candidates.div(candidates.iloc[0]) spreads = candidates.sub(security, axis=0) n, m = spreads.shape X = np.ones(shape=(n, 2)) X[:, 1] = np.arange(1, n + 1) for candidate, prices in candidates.items(): df = pd.DataFrame({'s1': security, 's2': prices}) var = VAR(df.values) lags = var.select_order() k_ar_diff = lags.selected_orders['aic'] coint_johansen(df, det_order=0, k_ar_diff=k_ar_diff) coint(security, prices, trend='c')[:2] coint(prices, security, trend='c')[:2]
def cross_validation_VAR(train): model_VAR = VAR(train) x = model_VAR.select_order(maxlags=24)
def create_var_model(training_set): var_model = VAR(training_set) lag_order = var_model.select_order() lag_order_selected = lag_order.selected_orders['aic'] var_model_results = var_model.fit(lag_order_selected) return var_model_results
def var_forecast(coin, data_stats, train_data, actual_df, nobs, verbose=False): """ This function performs the following: - forecast the time-series using VAR - durbin watson testing on the residual from the model - obtain normaltest, kurtosis and skewness of the residual from the model - compute the forecast accuracy The number of days forecast is the minimum value between the lag order and the nobs. Args: coin: The cryptocurrency the time-series belongs to data_stats: The data_stats dataframe for storing the durbin_watson, norm_stat, norm_p, kurtosis and skewness value train_data: Train data containing the features for VAR forecast actual_df: The actual value to be compared against the forecasted results nobs: Number of observations to forecast verbose: To print the debugging statements Returns: fitted_df: Dataframe containing residual of the features data_stats: Dataframe containing durbin_watson, norm_stat, norm_p, kurtosis and skewness value accuracy_prod: measures of accuracy for the forecast pred_df: predicted results """ # standardizing features scal = StandardScaler() df_scaled = pd.DataFrame( scal.fit_transform(train_data.values), columns=train_data.columns, index=train_data.index, ) mod = VAR(df_scaled, freq="D") selected_orders = mod.select_order().selected_orders max_lag = selected_orders["aic"] res = mod.fit(maxlags=max_lag, ic="aic") if verbose: print(coin, res.summary()) fitted_df = res.resid.rename(columns={"Returns": "Returns residual"})[ "Returns residual" ] # check for auto-correlation of the residual out = durbin_watson(res.resid) # collect the auto-correlatio results to be loaded into atoti later on for col, val in zip(df_scaled.columns, out): # get the residual values metric = res.resid[col] stat, p = stats.normaltest(metric) kurtosis = stats.kurtosis(metric) skewness = stats.skew(metric) data_stats.loc[ (data_stats["coin_symbol"] == coin) & (data_stats["metric_name"] == col), ["durbin_watson", "norm_stat", "norm_p", "kurtosis", "skewness"], ] = [val, stat, p, kurtosis, skewness] if verbose: print( "+++++++++++++ data_stats", data_stats.loc[ (data_stats["coin_symbol"] == coin) & (data_stats["metric_name"] == col) ], ) autocorrelation(val) # Get the lag order lag_order = res.k_ar if lag_order > 0: # Forecasting input_data = df_scaled.values[-lag_order:] # take the minimal forecast between the lag order and the number of observations required forecast_steps = lag_order if lag_order < nobs else nobs pred = res.forecast(y=input_data, steps=forecast_steps) pred_transform = scal.inverse_transform(pred) # we generate index from the last date for a period equivalent to the size of the forecast last_date = df_scaled.tail(1).index.get_level_values("date").to_pydatetime()[0] date_indices = pd.date_range( start=last_date, periods=(forecast_steps + 1), closed="right" ) pred_df = pd.DataFrame( pred_transform, index=date_indices, columns=df_scaled.columns, ).reset_index() accuracy_prod = forecast_accuracy( pred_df["Returns"].values, actual_df["Returns"][:forecast_steps] ) accuracy_prod = pd.DataFrame(accuracy_prod, index=[coin]) accuracy_prod["lag_order"] = lag_order accuracy_prod["Observations"] = forecast_steps if verbose: for k, v in accuracy_prod.items(): print(adjust(k), ": ", v) pred_df["coin_symbol"] = coin pred_df["Subset"] = "Test" pred_df.rename(columns={"index": "date"}, inplace=True) fitted_df = fitted_df.reset_index() fitted_df["coin_symbol"] = coin fitted_df["Subset"] = "Train" fitted_df["date"] = fitted_df["date"].apply(lambda x: x.strftime("%Y-%m-%d")) return ( fitted_df, data_stats.loc[~data_stats["norm_stat"].isnull()], accuracy_prod, pred_df, )
# VAR Vector variables = [ model_data['total_sales'], model_data['inflation'], model_data['interest_rate'], ] vector = np.column_stack(variables) # Fit a VAR regression. model = VAR(vector) results = model.fit(1) print(results.summary()) # Fit the best in-sample predicting VAR. model.select_order(6) results = model.fit(maxlags=6, ic='bic') print('Best lag order:', results.k_ar) # Create a forecast. (FIXME:) # forecast = results.forecast(data.values[-lag_order:], 5) # Show the data! forecasts = results.plot_forecast(9) #----------------------------------------------------------------------- # Drafts #----------------------------------------------------------------------- # Local imports. # from VAR import VAR, VAR_forecast
results.plot() # In[8]: # Autocorrelation results.plot_acorr() # ## Lag order selection # In[9]: # Adding maximum lag model.select_order(5) # Estimate model with maximum lag according to the AIC criterion results = model.fit(maxlags=5, ic='aic') results.summary() # ## Forecasting # In[10]: lag_order = results.k_ar # Specify the initial value of forecast results.forecast(data.values[-lag_order:], 5)