def test_select_order(self): result = self.model.fit(10, ic='aic', verbose=True) result = self.model.fit(10, ic='fpe', verbose=True) # bug model = VAR(self.model.endog) model.select_order()
def test_lag_order_selection(): if debug_mode: if "lag order" not in to_test: return else: print("\n\nLAG ORDER SELECTION", end="") for ds in datasets: for dt in dt_s_list: if debug_mode: print("\n" + dt_s_tup_to_string(dt) + ": ", end="") endog_tot = data[ds] exog = generate_exog_from_season(dt[1], len(endog_tot)) model = VAR(endog_tot, exog) obtained_all = model.select_order(10, trend=dt[0]) for ic in ["aic", "fpe", "hqic", "bic"]: err_msg = build_err_msg(ds, dt, "LAG ORDER SELECTION - " + ic.upper()) obtained = getattr(obtained_all, ic) desired = results_ref[ds][dt]["lagorder"][ic] assert_allclose(obtained, desired, rtol, atol, False, err_msg)
import pandas as pd import numpy as np import statsmodels.api as sm import pylab from statsmodels.tsa.base.datetools import dates_from_str from statsmodels.tsa.vector_ar.var_model import VAR mdata = sm.datasets.macrodata.load_pandas().data dates = mdata[['year', 'quarter']].astype(int).astype(str) quarterly = dates["year"] + "Q" + dates["quarter"] quarterly = dates_from_str(quarterly) mdata = mdata[['realgdp','realcons','realinv']] mdata.index = pd.DatetimeIndex(quarterly) data = np.log(mdata).diff().dropna() # log difference # make a VAR model model = VAR(data) results = model.fit(2) print results.summary() results.plot() results.plot_acorr() #autocorrelation model.select_order(15) results = model.fit(maxlags=15, ic='aic') irf = results.irf(10) irf.plot(orth=True) #Orthogonalization pylab.show()
print mn_bm, sd_bm, sr_bm #calc beta's alpha's #do forecast of returns, correlation. Use to Weight rets.iloc[:,0:10].plot() ###DETOUR TO VAR FORECASTING from statsmodels.tsa.vector_ar.var_model import VAR, VARResults, VARProcess import statsmodels statsmodels.version.version #Check for NA's in data - have to reduce number of series used as full 30 #gave singular matrix v1 = VAR(rets_train[series_red], freq='D') v1.select_order(maxlags=30) results = v1.fit(5) #From fitted # results.summary() results.plot() # results.plot_acorr() # plt.show() #Make forecast for 3months test_index = rets_test.index fc_range = pd.date_range(start=test_index[0], periods=2, freq='3M') fc_periods = len(rets_test[fc_range[0]:fc_range[1]]) lag_order = results.k_ar fc = results.forecast(rets_train[series_red].values,fc_periods) fc.shape fc[:,-1] df_fc = pd.DataFrame(fc,index=rets.index[0:fc_periods],columns=rets_train[series_red])
import pandas as pd import numpy as np import statsmodels.api as sm import pylab from statsmodels.tsa.base.datetools import dates_from_str from statsmodels.tsa.vector_ar.var_model import VAR mdata = sm.datasets.macrodata.load_pandas().data dates = mdata[['year', 'quarter']].astype(int).astype(str) quarterly = dates["year"] + "Q" + dates["quarter"] quarterly = dates_from_str(quarterly) mdata = mdata[['realgdp', 'realcons', 'realinv']] mdata.index = pd.DatetimeIndex(quarterly) data = np.log(mdata).diff().dropna() # log difference # make a VAR model model = VAR(data) results = model.fit(2) print results.summary() results.plot() results.plot_acorr() #autocorrelation model.select_order(15) results = model.fit(maxlags=15, ic='aic') irf = results.irf(10) irf.plot(orth=True) #Orthogonalization pylab.show()
top_3_24hour_kwh = [[0 for x in range(3)] for y in range(24)] #define list of list to store top electric consumtion householde names top_3_24hour_names = [["1" for x in range(3)] for y in range(24)] for names,data in df: hours = ['1', '2', '3', '4', '5', '6', '7', '8' , '9' , '10' , '11' , '12' , '13' , '14' , '15' , '16' , '17' , '18' , '19' , '20' , '21' ,'22' ,'23' , '24' ] data= data.drop(['name'], axis=1) data = data.loc[:, (data != data.iloc[1]).any()] print(names) namelist.append(names) train = data[:-1] valid = data[-1:] model = VAR(endog=train) model.select_order(3) model_fit = model.fit(maxlags=3, ic='aic') prediction = model_fit.forecast(model_fit.y, steps=len(valid)) mse.append(mean_squared_error(valid, prediction)) prediction=prediction[0] #find the top three household of every hour for i in range(0,len(prediction)): hour_temp_score = prediction[i] hour_temp_name = names for j in range(0,3): if hour_temp_score > top_3_24hour_kwh[i][j]: top_temp_score = top_3_24hour_kwh[i][j] top_temp_name = top_3_24hour_names[i][j] top_3_24hour_kwh[i][j] = hour_temp_score top_3_24hour_names[i][j] = hour_temp_name
# print(grangercausalitytests(train_diff[['temp', 'pollution']], # maxlag=15, addconst=True, verbose=True)) # print(grangercausalitytests(train_diff[['press', 'pollution']], # maxlag=15, addconst=True, verbose=True)) # print(grangercausalitytests(train_diff[['wnd_dir', 'pollution']], # maxlag=15, addconst=True, verbose=True)) # print(grangercausalitytests(train_diff[['wnd_spd', 'pollution']], # maxlag=15, addconst=True, verbose=True)) # print(grangercausalitytests(train_diff[['snow', 'pollution']], # maxlag=15, addconst=True, verbose=True)) # print(grangercausalitytests(train_diff[['rain', 'pollution']], # maxlag=15, addconst=True, verbose=True)) # 模型初始化 model = VAR(endog=train_diff) res = model.select_order(15) res.summary() # 训练 model_fit = model.fit(maxlags=3) model_fit.summary() # 选择lag order lag_order = model_fit.k_ar print("lag order为:",lag_order) input_data = train_diff.values[-lag_order:] print("预测数据为:",input_data) # 预测 pred = model_fit.forecast(y=input_data, steps=nobs)