def lead_lag(dfx, maxlags=10, steps_to_forecast=1, preprocess=0, display=True): if preprocess == 0: df = dfx # preprocess:0 --> do nothing elif preprocess == 1: #df = returns_df(dfx) # preprocess:1 --> returns (diff of the log of the values) df = diff_df(dfx) elif preprocess == 2: df = normalize_df( dfx, display=display ) # preprocess:2 --> normalize using mean and standard deviation model = smt.VAR(df) model.select_order(maxlags=maxlags, verbose=display) results = model.fit(maxlags=maxlags, ic='aic') if display: print results.summary() lag_order = results.k_ar steps_to_forecast = 1 #print "lag_order: {0}".format(lag_order) dfz = df[-lag_order:] forecast = results.forecast(dfz.values, steps_to_forecast) if display: print "forecast (lag_order={0}):\n{1}\n".format(lag_order, forecast) #df[['Forecast_btc','Forecast_eth']] = results.fittedvalues #df[['Forecast_X','Forecast_Y']] = res.fittedvalues size = res.fittedvalues.shape[0] df = df[-size:].copy() #df[['Forecast_x','Forecast_y']] = res.fittedvalues return results, forecast, df.dropna(), lag_order
def VarSimul(data, H): model = sm.VAR(data) results = model.fit(H) VARcoeff = results.params[1:] VARcoeff = np.array(VARcoeff).reshape( len(VARcoeff) / len(data.columns), len(data.columns), len(data.columns)) VARstd = results.stderr[1:] VARstd = np.array(VARstd).reshape( len(VARstd) / len(data.columns), len(data.columns), len(data.columns)) test = [] for i in range(1000): VarSim = np.zeros( (len(VARcoeff) / len(data.columns), len(data.columns), len(data.columns))) for j in range(VarSim.shape[0]): for k in range(VarSim.shape[1]): for l in range(VarSim.shape[2]): VarSim[j][k, l] = np.random.normal(VARcoeff[j][k, l], VARstd[j][k, l]) marep = ma_rep(VarSim, 10) test.append(marep[1][0, 0]) print np.std(test) seaborn.distplot(test, norm_hist=True) plt.show()
def EstimateVAR(data, H): """ :param data: A numpy array of log returns :param H: integer, size of step ahead forecast :return: a dataframe of connectivity or concentration parameters """ model = sm.VAR(data) results = model.fit(maxlags=10, ic='aic') SIGMA = np.cov(results.resid.T) ma_rep = results.ma_rep(maxn=H) GVD = np.zeros_like(SIGMA) r, c = GVD.shape for i in range(r): for j in range(c): GVD[i, j] = 1 / np.sqrt(SIGMA[i, i]) * sum( [ma_rep[h, i].dot(SIGMA[j])**2 for h in range(H)]) / sum([ ma_rep[h, i, :].dot(SIGMA).dot(ma_rep[h, i, :]) for h in range(H) ]) # GVD[i,j] = SIGMAINV[i,i] * sum([ma_rep[h,i].dot(SIGMA[j])**2 for h in range(H)]) / sum([ma_rep[h,i,:].dot(SIGMA).dot(ma_rep[h,i,:]) for h in range(H)]) GVD[i] /= GVD[i].sum() return pd.DataFrame(GVD), SIGMA, ma_rep, results.resid.T
def SOI(days=50): H = 15 data = pd.read_csv('data/TData9313_final6.csv',index_col=0) data = np.log(data).diff()[1:] data.index = pd.to_datetime(data.index) ddate = datetime.datetime(1994,12,27) soidf = pd.DataFrame() print days while ddate<datetime.datetime(2014,1,1): datestr2 = ddate.strftime('%Y%m%d') datestr1 = (ddate-datetime.timedelta(days)).strftime('%Y%m%d') print datestr1,datestr2, "\t", td = data[datestr1:datestr2].dropna(axis=1, how='any') model = sm.VAR(td) results = model.fit(maxlags=H, ic='aic') SIGMA = np.cov(results.resid.T) _ma_rep = results.ma_rep(maxn=H) GVD = np.empty_like(SIGMA) r, c = GVD.shape for i in range(r): for j in range(c): GVD[i, j] = 1 / np.sqrt(SIGMA[i, i]) * sum([_ma_rep[h, i].dot(SIGMA[j]) ** 2 for h in range(H)]) / sum( [_ma_rep[h, i, :].dot(SIGMA).dot(_ma_rep[h, i, :]) for h in range(H)]) GVD[i] /= GVD[i].sum() soi = (len(GVD)-np.trace(np.array(GVD)))/len(GVD) print soi soidf.loc[td.index[-1].strftime("%Y%m%d"),'SOI'] = soi soidf.loc[td.index[-1].strftime("%Y%m%d"),'LL'] = int((len(results.params)-1)/len(td.columns)) ddate += datetime.timedelta(1) soidf.to_csv('SOI_%s_days.csv' % (days,),mode='w',header=True)
def EstimateVAR(data, H, sparse_method=False, GVD_output=True): """ :param data: A numpy array of log returns :param H: integer, size of step ahead forecast :return: a dataframe of connectivity or concentration parameters """ model = sm.VAR(data) results = model.fit(maxlags=H, ic='aic') SIGMA = np.cov(results.resid.T) if sparse_method == True: exit("METODEN BRUGER RESULTS.COEFS FREM FOR PARAMS") _nAssets = results.params.shape[1] _nLags = results.params.shape[0] / results.params.shape[1] custom_params = np.where(abs(results.params / results.stderr) > 1.96, results.params, 0)[1:].reshape( (_nLags, _nAssets, _nAssets)) _ma_rep = ma_rep(custom_params, maxn=H) else: _ma_rep = results.ma_rep(maxn=H) GVD = np.empty_like(SIGMA) if GVD_output: r, c = GVD.shape for i in range(r): for j in range(c): GVD[i, j] = 1 / np.sqrt(SIGMA[j, j]) * sum([_ma_rep[h, i].dot(SIGMA[j]) ** 2 for h in range(H)]) / sum( [_ma_rep[h, i, :].dot(SIGMA).dot(_ma_rep[h, i, :]) for h in range(H)]) GVD[i] /= GVD[i].sum() return pd.DataFrame(GVD), SIGMA, _ma_rep, results.resid
def VarSimul2(data, H): model = sm.VAR(data) results = model.fit(H) VARcoeff = results.params[1:] VARcoeff = np.array(VARcoeff).reshape( len(VARcoeff) / len(data.columns), len(data.columns), len(data.columns)) VARstd = results.stderr[1:] VARstd = np.array(VARstd).reshape( len(VARstd) / len(data.columns), len(data.columns), len(data.columns)) test = [] for i in range(10000): VarSim = np.zeros( (len(VARcoeff) / len(data.columns), len(data.columns), len(data.columns))) for j in range(VarSim.shape[0]): for k in range(VarSim.shape[1]): for l in range(VarSim.shape[2]): VarSim[j][k, l] = np.random.normal(VARcoeff[j][k, l], VARstd[j][k, l]) marep = ma_rep(VarSim, 15) tlist = [marep[j][9, 0] for j in range(marep.shape[0])] test.append(tlist) mean = [np.mean([j[i] for j in test]) for i in range(len(test[0]))] up = [ np.percentile([j[i] for j in test], 97.5) for i in range(len(test[0])) ] down = [ np.percentile([j[i] for j in test], 2.5) for i in range(len(test[0])) ] plt.plot(mean, color=seaborn.xkcd_rgb['cornflower blue'], alpha=1, linestyle='-') plt.plot(up, color=seaborn.xkcd_rgb['indian red'], alpha=0.5, linestyle='--') plt.plot(down, color=seaborn.xkcd_rgb['indian red'], alpha=0.5, linestyle='--') plt.fill_between(range(len(mean)), up, down, alpha=0.5) plt.xlim(1) plt.show()
def MetropolisHastingMCMC(data, H): model = sm.VAR(data) results = model.fit(H) VARcoeff = results.params[1:] VARcoeff = np.array(VARcoeff).reshape( len(VARcoeff) / len(data.columns), len(data.columns), len(data.columns)) VARstd = results.stderr[1:] VARstd = np.array(VARstd).reshape( len(VARstd) / len(data.columns), len(data.columns), len(data.columns)) mean = VARcoeff[1][1, 1] std = VARstd[1][1, 1] #mean = 1000 #std = 100 N = 200000 s = 10 r = np.random.normal(mean, std) #p = np.random.normal(mean,std) p = scipy.stats.norm.pdf(r, mean, std) + 1 samples = [] #plt.ion() #plt.show() for i in range(N): if i % 10000 == 0: print i rn = r + np.random.normal() pn = scipy.stats.norm.pdf(rn, mean, std) if pn >= p: p = pn r = rn else: u = np.random.rand() if u < pn / p: p = pn r = rn if i % s == 0: samples.append(r) #plt.plot(samples) #plt.draw() samples = samples[int(N / 200):] normdata = np.random.normal(mean, std, len(samples)) seaborn.distplot(samples, norm_hist=True, label='MCMC') seaborn.distplot(normdata, norm_hist=True, label='normdata') plt.vlines(mean, 0, 7.5) plt.legend() plt.show()
def main(): signal = np.ones(150) signal[:100] *= 50 signal[100:] *= 40 dates = pd.date_range('1/1/2017', periods=150, freq='D') ts = pd.Series(signal, index=dates) df = pd.DataFrame(ts) data = df.as_matrix() model = tsa.VAR(data) results = model.fit() residuals = [np.matrix(e).T for e in list(results.resid)]
def EstimateVAR(df): df = pd.read_csv('data.csv') df['Date'] = pd.to_datetime(df['Date']) df = df.dropna().ffill().set_index('Date') #df = df.drop('William Demant Holding',1) data = np.log(df).diff().dropna() df.to_csv('data.csv') model = sm.VAR(data) results = model.fit(maxlags=5, ic='aic') fevd = results.fevd(10) print fevd.summary() allcomp = fevd.decomp[:, -1] for i, name in zip(fevd.decomp, fevd.names): print name tempdf = pd.DataFrame(i, columns=fevd.names) tempdf.to_csv('test/' + name + '.csv') exit()
def EstimateVARTest(data, H, sparse_method=False): """ :param data: A numpy array of log returns :param H: integer, size of step ahead forecast :return: a dataframe of connectivity or concentration parameters """ model = sm.VAR(data) results = model.fit(maxlags=H, ic='aic') SIGMA = np.cov(results.resid.T) if sparse_method == True: _nAssets = results.params.shape[1] _nLags = results.params.shape[0] / results.params.shape[1] custom_params = np.where( abs(results.params / results.stderr) > 1.96, results.params, 0)[1:].reshape((_nLags, _nAssets, _nAssets)) _ma_rep = ma_rep(custom_params, maxn=H) else: _ma_rep = results.ma_rep(maxn=H) GVD = np.zeros_like(SIGMA) r, c = GVD.shape for i in range(r): for j in range(c): #GVD[i, j] = 1 / np.sqrt(SIGMA[i, i]) * sum([_ma_rep[h, i].dot(SIGMA[j]) ** 2 for h in range(H)]) / sum([_ma_rep[h, i, :].dot(SIGMA).dot(_ma_rep[h, i, :]) for h in range(H)]) GVD[i, j] = sum([_ma_rep[h, i].dot(SIGMA[j])**2 for h in range(H)]) / sum([ _ma_rep[h, i, :].dot(SIGMA).dot(_ma_rep[h, i, :]) for h in range(H) ]) #GVD[i] /= GVD[i].sum() print pd.DataFrame(SIGMA) * 10000000 print pd.DataFrame(GVD) * 10000000 print pd.DataFrame(SIGMA) - pd.DataFrame(GVD) return pd.DataFrame(GVD), SIGMA, _ma_rep, results.resid
def EstimateVAR_slow(): df = pd.read_csv( 'C:/Users/thoru_000/Dropbox/Pers/PyCharmProjects/Speciale/data.csv', sep=";") df['Date'] = pd.to_datetime(df['Date']) df = df.dropna().ffill().set_index('Date') data = np.log(df).diff().dropna() model = sm.VAR(data) results = model.fit(maxlags=5, ic='aic') coeff = results.coefs SIGMA = np.cov(results.resid.T) ma_rep = results.ma_rep(maxn=10) mse = results.mse(10) GVD = np.zeros_like(SIGMA) r, c = GVD.shape for i in range(r): for j in range(c): sel_j = np.zeros(r) sel_j[j] = 1 sel_i = np.zeros(r) sel_i[i] = 1 AuxSum = 0 AuxSum_den = 0 for h in range(10): AuxSum += (sel_i.T.dot(ma_rep[h]).dot(SIGMA).dot(sel_j))**2 AuxSum_den += (sel_i.T.dot(ma_rep[h]).dot(SIGMA).dot( ma_rep[h].T).dot(sel_i)) GVD[i, j] = (AuxSum * (1 / SIGMA[i, i])) / AuxSum_den GVD[i] /= GVD[i].sum() pd.DataFrame(GVD).to_csv('GVD.csv', index=False, header=False)
def Main(): H = 10 data = pd.read_csv("Data/Index_data.csv", sep=';') data['Date'] = pd.to_datetime(data['Date']) data = data.set_index('Date') #data = data[['XLY','XLE','XLP']] model = sm.VAR(data) results = model.fit(maxlags=5, ic='aic') eps = results.resid SIGMA = np.cov(eps.T) ma_rep = results.ma_rep(maxn=H) nn = 0 df = pd.DataFrame() print pd.DataFrame(SIGMA) for j in range(H): tt = SIGMA[nn, nn]**(-0.5) * (ma_rep[j, nn]).dot(SIGMA) print ma_rep[j, nn] for nr, i in enumerate(tt): df.loc[j, nr] = i for nr, j in enumerate(df.columns): plt.plot(df[j], label=data.columns[nr]) plt.legend() plt.show()
data = sm.datasets.macrodata.load() mdata = data.data df = DataFrame.from_records(mdata) quarter_end = frequencies.BQuarterEnd() df.index = [quarter_end.rollforward(datetime(int(y), int(q) * 3, 1)) for y, q in zip(df.pop('year'), df.pop('quarter'))] logged = np.log(df.ix[:, ['m1', 'realgdp', 'cpi']]) logged.plot(subplots=True) log_difference = logged.diff().dropna() plot_acf_multiple(log_difference.values) #Example TSA VAR model = tsa.VAR(log_difference, freq='D') print(model.select_order()) res = model.fit(2) print(res.summary()) print(res.is_stable()) irf = res.irf(20) irf.plot() fevd = res.fevd() fevd.plot() #print res.test_whiteness() print(res.test_causality('m1', 'realgdp')) #print res.test_normality() # exception
def VARmodel(dataset): VARmodel = sm.VAR(dataset) VARmodel_fit = VARmodel.fit(ic='bic', trend='c') return VARmodel_fit
def VecAutoReg_fit(endog, maxlag=None, method='ols', trend='c'): return tsa.VAR(endog).fit(maxlags=maxlag, method=method, trend=trend)
#from keras.wrappers.scikit_learn import KerasRegressor from sklearn.preprocessing import StandardScaler, Normalizer, MinMaxScaler from statsmodels.tsa.arima_model import ARIMA np.random.seed(7) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' warnings.filterwarnings("ignore") df = pd.read_hdf('DeepLearning.h5', 'Data_Gold') for c in df.columns: df[c + '_ret'] = df[c].pct_change(2).fillna(0) data = df.loc[:, ['Gold_ret', 'DJI_ret', 'Inflation_ret']] var = sm.VAR(data) #selecting VAR order print(var.select_order(10)) results = var.fit(4) x_pred = results.fittedvalues df.loc[:, 'Pred_ret'] = x_pred.loc[:, 'Gold_ret'] #arima.fittedvalues df = df df['Pred'] = df.Gold for j in range(len(df.index)): if j < 4: continue i = df.index[j] prev = df.index[j - 2] df.loc[i, 'Pred'] = df.loc[prev, 'Pred'] * (1 + df.loc[i, 'Pred_ret']) df.to_hdf('DeepLearning.h5', 'Pred_VAR')
X = np.concatenate( [pc[1:, None], e[1:, None], cpi[1:, None], y[1:, None], m[1:, None]], axis=1) var_names = ['pc', 'e', 'log cpi', 'log r gdp', 'log Dm2m'] shock_names = ['com. price', 'infl.-target', 'cost-push', 'demand', 'mon. pol'] dates = pd.date_range(start='1992-01', periods=len(X), freq='M') pd.DatetimeIndex(dates) endog = pd.DataFrame(X, columns=var_names) endog.set_index(dates) print('\nAre there any missings?', pd.isnull(endog).any().any(), '.') #select lag order by criteria lag_crit = sm.VAR(endog, dates=dates, freq='m').select_order() p = lag_crit['aic'] h = 48 sample_split = '2007' recVar = sm.VAR(endog, dates=dates, freq='m').fit(maxlags=p) ' gather some infos about the specification for saving' sample = [ str(recVar.dates.min()).split()[0], str(recVar.dates.max()).split()[0] ] sample = '_to_'.join(sample) lags = ': '.join(['lags', str(p)]) info = ', '.join([sample, lags])
cvt = out.cvt[:, int(np.round((0.1 - alpha) / 0.05))] int(np.round((0.1 - alpha) / 0.05)) alpha = 0.05 int(np.round((0.1 - alpha) / 0.05)) alpha = 0.1 int(np.round((0.1 - alpha) / 0.05)) alpha = 0.05 cvt = out.cvt[:, int(np.round((0.1 - alpha) / 0.05))] cvt traces out.lr1 import statsmodels.tsa.api as smt from statsmodels.tsa.api import VAR mod = smt.VAR(pdf) res = mod.fit(maxlags=5, ic='aic') res res.summary() print(res.summary()) mod.lag lag_order = results.k_ar lag_order = res.k_ar lag_order res.forecast(pdf.values[-lag_order:], 5) res.plot_forecast(10) pred = res.forcast(pdf.values[-2 * lag_order: -lag_order], 5) pred = res.forecast(pdf.values[-2 * lag_order: -lag_order], 5) pred res.forecast(pdf.values[-lag_order:], 5) res.plot_forecast(10)
# plt.show() nobs = 15 X_train, X_test = dataset[0:-nobs], dataset[-nobs:] print(X_train.shape) print(X_test.shape) transform_data = X_train.diff().dropna() print(transform_data.head()) print(transform_data.describe()) # TODO Stationarity check # TODO Stationarity check # TODO Stationarity check mod = smt.VAR(transform_data) res = mod.fit(maxlags=15, ic='aic') print(res.summary()) # TODO Durbin-Watson Statistic pred = res.forecast(transform_data.values[3:], 15) pred_df = pd.DataFrame(pred, index=dataset.index[-15:], columns=dataset.columns) print(pred_df) pred_inverse = pred_df.cumsum() f = pred_inverse + X_test #.shift(1) print(f) #
def cusum_algorithm(data, critical_value): num_dimensions = len(data[0]) total_time = len(data) ################ #### STEP 1 #### ################ # Estimate a VAR(p) model and compute the residuals. model = tsa.VAR(data) results = model.fit() residuals = [ np.matrix(e).T for e in list(results.resid) ] d = int(num_dimensions * (results.k_ar + 0 + 1) + (num_dimensions * (num_dimensions + 1)) / 2 + 1) possible_changepoints = [0, total_time-1] # Initialize h_first and h_last to the endpoints +/- d. h_first = d h_last = total_time-1 - d while True: if ( h_first >= h_last ): break ################ #### STEP 2 #### ################ # Find the most likely changepoint (if any) between h_first and h_last. Gamma_max, h_max, Cs = max_Cs(h_first, h_last, residuals, num_dimensions) ''' plt.plot(range(h_first, h_last+1), Cs) plt.axhline(critical_value, color='red') plt.axvline(h_max, color='gray', linestyle='--') plt.show() ''' # If there are none between h_first and h_last, skip to step 4. if Gamma_max < critical_value: break # Otherwise, look for more changepoints. else: old_Gamma_max = Gamma_max old_h_max = h_max ################# #### STEP 3a #### ################# # Find the leftmost possible changepoint (i.e., leftmost point # with a significant test statistic). Make it the new h_first. while Gamma_max > critical_value: t_2 = h_max - 1 Gamma_max, h_max, Cs = max_Cs(h_first, t_2, residuals, num_dimensions) h_first = t_2 ################# #### STEP 3b #### ################# # Find the rightmost possible changepoint (i.e., rightmost point # with a significant test statistic). Make it the new h_last. Gamma_max = old_Gamma_max h_max = old_h_max while Gamma_max > critical_value: t_1 = h_max + 1 Gamma_max, h_max, Cs = max_Cs(t_1, h_last, residuals, num_dimensions) h_last = t_1 ################# #### STEP 3c #### ################# # If the time between h_first and h_last is higher our resolution d, # record them and repeat steps 2 and 3 in narrower interval. if np.abs(h_last - h_first) > d: possible_changepoints.append(h_first) possible_changepoints.append(h_last) h_first = h_first + d h_last = h_last - d # Otherwise, record the most likely changepoint from before and then # go to step 4. else: possible_changepoints.append(old_h_max) break ################ #### STEP 4 #### ################ possible_changepoints.sort() # Delete possible changepoints until convergence. converged = False while not converged: # For every ith and (i+2)th changepoint, check if the open interval # between them is statistically significant. If not, drop the (i+1)th. for i in range(len(possible_changepoints)-2): Gamma_max, h_max, Cs = max_Cs(possible_changepoints[i]+1, possible_changepoints[i+2]-1, residuals, num_dimensions) if Gamma_max < critical_value: # Mark for deletion. possible_changepoints[i+1] = -1 converged = True # Delete the marked ones. for i in reversed(range(len(possible_changepoints))): if possible_changepoints[i] == -1: del possible_changepoints[i] converged = False # Also delete the endpoints. del possible_changepoints[0] del possible_changepoints[-1] changepoints = [ point + 1 for point in possible_changepoints ] return tuple(changepoints)
comboMV.columns = ["RDA", "RDB", "Total", "Repsol", "Centrica"] model = statsm.ols(formula="RDA~RDB+Total+Repsol+Centrica", data=comboMV) resultMV = model.fit() resultMV.params resultMV.summary() statsgraph.plot_fit(resultMV, 1) #######Partial Auto correlation function PACF & ACF residualsMV = resultMV.resid acfModel = acf(residualsMV) plot_acf(acfModel) acfModel.summary() pacfModel = pacf(residualsMV) plot_pacf(pacfModel) #################### # Lag 2 model = mod.VAR(comboMV) model.select_order(10) varlag = 2 results = model.fit(varlag) #results.summary() coefs = results.coefs[varlag - 1] residuals = results.resid stationary_test(residuals, name='Stationarity Test on the Spread') plt.plot(residuals) plt.legend( loc='best', fontsize=8, labels=["Royal Dutch A", "Royal Dutch B", "Total", "Repsol", "Centrica"]) plt.xlabel('3 Year Time Series in Days', fontsize=10)
for r in X_train.index: test_result = grangercausalitytests(data[[r, c]], maxlag=maxlag, verbose=False) p_values = [round(test_result[i + 1][0][test][1], 4) for i in range(maxlag)] if verbose: print(f'Y= {r}, X = {c}, P-Values = {p_values}') min_p_value = np.min(p_values) X_train.loc[r, c] = min_p_value X_train.columns = [var + '_x' for var in variables] X_train.index = [var + '_y' for var in variables] return X_train def cointegration_test(transform_data, alpha=0.05): out = coint_johansen(transform_data, -1, 5) d = {'0.90': 0, '0.95': 1, '0.99': 2} traces = out.lr1 cvts = out.cvt[:, d[str(1 - alpha)]] def adjust(val, length=6): return str(val).ljust(length) print('Name :: Test Stat > C(95%) => Signif \n', '--' * 20) for col, trace, cvt in zip(transform_data.columns, traces, cvts): print(adjust(col), ':: ', adjust(round(trace, 2), 9), ">", adjust(cvt, 8), ' => ', trace > cvt) mod = smt.VAR(X_train) res = mod.fit(maxlags=maxlag, ic='aic') print(res.summary())
data = data_raw.loc['1992-02':'2016-07'] nobs = data.shape[0] var_list = data.columns X = data[['l_cpi', 'u', mb], axis=1) var_names = [price_name, rea_name, info_name, div_name, uc_name, 'mb'] shock_names = ['cost push', 'demand', 'pcom/infl.target', 'mon. pol', 'finance cost', 'mon. base'] #select lag order by criteria #p = sm.VAR(X, dates=dates, freq='m').select_order(disp=0)['aic'] #print(p) h = 48 recVar = sm.VAR(X, dates=X.index, freq='m').fit(maxlags=15, ic='aic') ASigma = np.linalg.cholesky(recVar.sigma_u) # save the lag length p = recVar.k_ar ' gather some infos about the specification for saving' sample = [str(recVar.dates.min()).split()[0], str(recVar.dates.max()).split()[0]] sample = '_to_'.join(sample) lags = ': '.join(['lags', str(p)]) info_string = ', '.join([sample, lags]) irfs_analysis = recVar.irf(h) ci_lower, ci_upper = recVar.irf_errband_mc(T=48) from plotirfs import plot_irfs rec_irfs = plot_irfs(irfs_analysis.irfs, ci_lower, ci_upper, imps=[0, 1, 3], resps=[0, 1, 2, 3, 4, 5],
data = data.diff().dropna() data.head() # Out[101]: # blue goog # 2011-01-16 0.000358 -0.074634 # 2011-01-23 0.001575 0.000000 # 2011-01-30 0.000335 0.026329 # 2011-02-06 -0.002415 0.024824 # 2011-02-13 0.001094 -0.051153 # In[122]: # make a VAR model model = api.VAR(data) # check on order of variables model.select_order(8) # Out[122]: # VAR Order Selection # ===================================================== # aic bic fpe hqic # ----------------------------------------------------- # 0 -14.51 -14.48 4.972e-07 -14.50 # 1 -14.71 -14.61 4.080e-07 -14.67 # 2 -14.79 -14.63 3.780e-07 -14.72 # 3 -14.88* -14.65* 3.447e-07* -14.79* # 4 -14.88 -14.59 3.454e-07 -14.76
#irf.plot(response='PES10 Index') #irf.plot_cum_effects(orth=False) # #results.test_causality('PES10 Index', ['PES05 Index', 'PES07 Index','PES02 Index'], kind='f') # #var = sm.DynamicVAR(data, lag_order=2, window_type='expanding') #var.coefs #var.forecast(2) #var.plot_forecast(2) # Analizando el impacto de la TPM sobre la curva tpmRatesData = b.getHistDataFromBloomberg([ 'CHOVCHOV Index', 'PES02 Index', 'PES05 Index', 'PES07 Index', 'PES10 Index' ], init=dt.datetime(2006, 12, 20), end=dt.datetime(2016, 12, 20), freq='MONTHLY') data = tpmRatesData.diff().dropna() model = sm.VAR(data) results = model.fit(1) results.summary() irf = results.irf(10) irf.plot(impulse='CHOVCHOV Index') irf.plot_cum_effects(orth=False) irf.plot_cum_effects(impulse='CHOVCHOV Index') results.test_causality('PES10 Index', ['CHOVCHOV Index'], kind='f')
# In[34]: for each in df_train.columns: res = adfuller(df_train[each].diff(1).dropna()) if res[1] < .05: print("Series " + each + " is Stationary") else: print("Series " + each + " is Non-Stationary") # ### VAR model # Now we will fit a VAR model for the variables. To get the order of the var model we have loop through various order then chose the model with lowest AIC value. Here for order 2 we have gotten the lowest AIC. So, we have fitted the VAR with order 2. # In[35]: df_tdiff = df_train.diff(1).dropna() var_model = smt.VAR(df_tdiff) # In[36]: best_aic = np.inf order = None fitted_model = None for i in range(7): res = var_model.fit(i + 1) print(res.aic) if res.aic < best_aic: best_aic = res.aic order = i + 1 fitted_model = res else: continue