def testing_var(): df = transform_dataframe(path) #creating the train and validation set train = df.iloc[:int(0.8*(len(df)))] valid = df.iloc[int(0.8*(len(df))):] model = VAR(endog=train) model_fit = model.fit() # make prediction on validation prediction = model_fit.forecast(model_fit.y, steps=len(valid)) cols = df.columns.values #converting predictions to dataframe pred = pd.DataFrame(index=range(0,len(prediction)),columns=cols) for j in range(0,10): for i in range(0, len(prediction)): pred.iloc[i][j] = prediction[i][j] #check rmse for i in cols: p=pred[i] v=valid[i] print('rmse value for', i, 'is : ', np.sqrt(mean_squared_error(p, v))) #make final predictions model = VAR(endog=df) model_fit = model.fit() yhat = model_fit.forecast(model_fit.y, steps=1) print(yhat)
def test_irf_trend(): # test for irf with different trend see #1636 # this is a rough comparison by adding trend or subtracting mean to data # to get similar AR coefficients and IRF data = get_macrodata().view((float, 3), type=np.ndarray) model = VAR(data) results = model.fit(4) # , trend = 'c') irf = results.irf(10) data_nc = data - data.mean(0) model_nc = VAR(data_nc) results_nc = model_nc.fit(4, trend="n") irf_nc = results_nc.irf(10) assert_allclose(irf_nc.stderr()[1:4], irf.stderr()[1:4], rtol=0.01) trend = 1e-3 * np.arange(len(data)) / (len(data) - 1) # for pandas version, currently not used, if data is a pd.DataFrame # data_t = pd.DataFrame(data.values + trend[:,None], index=data.index, columns=data.columns) data_t = data + trend[:, None] model_t = VAR(data_t) results_t = model_t.fit(4, trend="ct") irf_t = results_t.irf(10) assert_allclose(irf_t.stderr()[1:4], irf.stderr()[1:4], rtol=0.03)
def test_var_trend(): # see 2271 data = get_macrodata().view((float, 3), type=np.ndarray) model = VAR(data) results = model.fit(4) # , trend = 'c') irf = results.irf(10) data_nc = data - data.mean(0) model_nc = VAR(data_nc) results_nc = model_nc.fit(4, trend="n") with pytest.raises(ValueError): model.fit(4, trend="t")
def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2): n_sample, n_output = df.shape n_test = int(round(n_sample * test_ratio)) n_train = n_sample - n_test df_train, df_test = df[:n_train], df[n_train:] scaler = StandardScaler(mean=df_train.values.mean(), std=df_train.values.std()) data = scaler.transform(df_train.values) var_model = VAR(data) var_result = var_model.fit(n_lags) max_n_forwards = np.max(n_forwards) # Do forecasting. result = np.zeros(shape=(len(n_forwards), n_test, n_output)) start = n_train - n_lags - max_n_forwards + 1 for input_ind in range(start, n_sample - n_lags): prediction = var_result.forecast( scaler.transform(df.values[input_ind:input_ind + n_lags]), max_n_forwards) for i, n_forward in enumerate(n_forwards): result_ind = input_ind - n_train + n_lags + n_forward - 1 if 0 <= result_ind < n_test: result[i, result_ind, :] = prediction[n_forward - 1, :] df_predicts = [] for i, n_forward in enumerate(n_forwards): df_predict = pd.DataFrame(scaler.inverse_transform(result[i]), index=df_test.index, columns=df_test.columns) df_predicts.append(df_predict) df_predict.to_csv("./df_predict.csv", sep=',', index=False) df_test.to_csv("./df_test.csv", sep=',', index=False) return df_predicts, df_test
def fit_model(self): """ Use Vector Autoregression, pass Training Set & fit the model """ model = VAR(endog=self.train) self.model_fit = model.fit()
def control_lqr(env, agent, model_fit, data, lag=4): B = np.array([[0], [0], [-.01], [-.01]]) Q = np.diag((10., 1., 10., 1.)) print(model_fit.coefs) K = lqr(model_fit.coefs[0], B, Q, 1) print("K=") print(K) obs = env.reset() action = agent.begin_episode(obs) for i in range(500): env.render() time.sleep(0.15) # slows down process to make it more visible # recompute K every 10 steps data = np.vstack([data, obs]) if (i % 10 == 0): model_next = VAR(data) model_fit_next = model_next.fit(lag) K = lqr(model_fit_next.coefs[0], B, Q, 1) # print("K=") # print(K) action = get_control(K, obs) # Get the next action from the learner, given our new state. obs, reward, done, info = env.step(action) if done: print("Final episode: lasted {} timesteps, data: {}".format( i + 1, obs)) break
def generate_final_predictions(df_coords, lag_order=3, display=False): ''' Uses the best lag_order (from testing_harness) to train the full model and forecast mean coordinates for the years 2022 and 2023. Returns a DF ''' model = VAR(endog=df_coords) model = model.fit(lag_order) forecast = model.forecast(model.y, steps=2) df_forecast = pd.DataFrame(forecast, columns=['future_latitude', 'future_longitude']) df_forecast['year'] = [2022, 2023] df_forecast = df_forecast[['year', 'future_latitude', 'future_longitude']] if display: print() print('Final model information:') print() print(model.summary()) print() print('Future hotspot forecasts:') print() print(df_forecast) return df_forecast
def extract_model(self, input, save_status=False): total = self._model_clean() if total is False: return "Try to find available area by:\n sh casa.sh --find aptartment name\n" # input n = int(input) new_index = pd.date_range(start=total.index[-1], periods=n + 1, freq='MS')[1:] model = VAR(total) model_fit = model.fit() pred = model_fit.forecast(y=total.values, steps=n) pred = pd.DataFrame(pred, columns=['Q1', 'Q2', 'Q3'], index=new_index) final_df = pd.concat([total, pred], axis=0) final = final_df.loc[new_index] if save_status is True: self._save_image_model(eval_model=final_df, pred_model=pred, pred_num=input) return final
def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2): """ Multivariate time series forecasting using Vector Auto-Regressive Model. :param df: pandas.DataFrame, index: time, columns: sensor id, content: data. :param n_forwards: a tuple of horizons. :param n_lags: the order of the VAR model. :param test_ratio: :return: [list of prediction in different horizon], dt_test """ n_sample, n_output = df.shape n_test = int(round(n_sample * test_ratio)) n_train = n_sample - n_test df_train, df_test = df[:n_train], df[n_train:] scaler = StandardScaler(mean=df_train.values.mean(), std=df_train.values.std()) data = scaler.transform(df_train.values) var_model = VAR(data) var_result = var_model.fit(n_lags) max_n_forwards = np.max(n_forwards) # Do forecasting. result = np.zeros(shape=(len(n_forwards), n_test, n_output)) start = n_train - n_lags - max_n_forwards + 1 for input_ind in range(start, n_sample - n_lags): prediction = var_result.forecast(scaler.transform(df.values[input_ind: input_ind + n_lags]), max_n_forwards) for i, n_forward in enumerate(n_forwards): result_ind = input_ind - n_train + n_lags + n_forward - 1 if 0 <= result_ind < n_test: result[i, result_ind, :] = prediction[n_forward - 1, :] df_predicts = [] for i, n_forward in enumerate(n_forwards): df_predict = pd.DataFrame(scaler.inverse_transform(result[i]), index=df_test.index, columns=df_test.columns) df_predicts.append(df_predict) return df_predicts, df_test
def var(data): start_time_ = time.time() # train,test = data[:int(0.7*(len(data)))],data[int(0.7*(len(data))):] data = data.interpolate(limit=30000000, limit_direction='both').astype('float32') #split_date = '2017-01-01' train, test = data[:split_date], data[split_date:] if DOpca: steps = [('scale', StandardScaler()), ('pca', PCA(n_components=n_pca))] else: steps = [('scale', StandardScaler())] pipe = Pipeline(steps=steps) pipe.fit(data) train, test = data[:int(0.7 * (len(data)))], data[int(0.7 * (len(data))):] sc_train, sc_test = pipe.transform(train), pipe.transform(test) model = VAR(endog=sc_train) model_fit = model.fit(9) trainPredict = model_fit.forecast(sc_train, steps=len(sc_train)) testPredict = model_fit.forecast(sc_test, steps=len(sc_test)) try: trainPredict = pipe.inverse_transform(trainPredict) testPredict = pipe.inverse_transform(testPredict) trainPredict = pd.Series(data=(trainPredict[:, 0]), index=train.index) testPredict = pd.Series(data=(testPredict[:, 0]), index=test.index) except: trainPredict, testPredict = -999, -999 trainY = pd.Series(data=(train.iloc[:, 0]), index=train.index) testY = pd.Series(data=(test.iloc[:, 0]), index=test.index) time_ = time.time() - start_time_ return trainPredict, testPredict, time_, trainY, testY
def var(flow, target): warnings.filterwarnings("ignore") in_mask = np.greater(target[:, 0], mask_threshold) out_mask = np.greater(target[:, 1], mask_threshold) result = np.zeros((flow.shape[0], flow.shape[-1])) for i in range(flow.shape[0]): if verbose: if (i + 1) % 10000 == 0: print("VAR: line {} of {}".format(i + 1, flow.shape[0])) for j in range(flow.shape[-1]): data = list() for k in range(flow.shape[1] - 1): data.append([flow[i, k, j], flow[i, k + 1, j]]) model = VAR(data) try: model_fit = model.fit() result[i, j] = model_fit.forecast(model_fit.y, steps=1)[0][1] except: result[i, j] = 0.0 pass in_rmse = np.sqrt( np.mean(np.square(target[:, 0][in_mask] - result[:, 0][in_mask]))) out_rmse = np.sqrt( np.mean(np.square(target[:, 1][out_mask] - result[:, 1][out_mask]))) in_mae = np.mean(np.abs(target[:, 0][in_mask] - result[:, 0][in_mask])) out_mae = np.mean(np.abs(target[:, 1][out_mask] - result[:, 1][out_mask])) return in_rmse, out_rmse, in_mae, out_mae
def _run_varLiNGAM(self, xt, verbose=False): """ Run the VarLiNGAM algorithm on data. Args: xt : time series matrix with size n*m (length*num_variables) Returns: Tuple: (Bo, Bhat) Instantaneous and lagged causal coefficients """ Ident = np.identity(xt.shape[1]) # Step 1: VAR estimation model = VAR(xt) results = model.fit(self.lag) Mt_ = results.params[1:, :] # Step 2: LiNGAM on Residuals resid_VAR = results.resid model = LiNGAM(verbose=verbose) data = pd.DataFrame(resid_VAR) Bo_ = model._run_LiNGAM(data) # Step 3: Get instantaneous matrix Bo from LiNGAM # Bo_ = pd.read_csv("results.csv").values # Step 4: Calculation of lagged Bhat Bhat_ = np.dot((Ident - Bo_), Mt_) return (Bo_, Bhat_)
def artificial_data(): N = 301 # x, y = n_hat(N, 6) x, y = sin_(N, 5) y = y + 0.01 * np.random.normal(0., .5, len(y)) z = y * y Y = np.matrix([y, z]).transpose().tolist() # ====================== title('single prediction') model = VAR(Y) model_fit = model.fit(maxlags=15, ic='aic') pred = model_fit.forecast(Y[-model_fit.k_ar:], N) xx = np.arange(N, N + len(pred)) assert (len(pred) == N) # print(model_fit.k_ar) # print(model_fit.params) plot(x, Y) plot(xx, pred, '--') show() # # ======================================= title('dynamic prediction') xx, pred = test_forecast(x, Y, len_for_prediction=100, n_pred=100, maxlags=15, ic='aic') plot(x, Y) plot(xx, pred, '--') show()
class TestVARResultsLutkepohl(object): """ Verify calculations using results from Lutkepohl's book """ def __init__(self): self.p = 2 if not have_pandas(): return sdata, dates = get_lutkepohl_data('e1') names = sdata.dtype.names data = data_util.struct_to_ndarray(sdata) adj_data = np.diff(np.log(data), axis=0) # est = VAR(adj_data, p=2, dates=dates[1:], names=names) self.model = VAR(adj_data[:-16], dates=dates[1:-16], names=names, freq='Q') self.res = self.model.fit(maxlags=self.p) self.irf = self.res.irf(10) self.lut = E1_Results() def test_approx_mse(self): if not have_pandas(): raise nose.SkipTest # 3.5.18, p. 99 mse2 = np.array([[25.12, .580, 1.300], [.580, 1.581, .586], [1.300, .586, 1.009]]) * 1e-4 assert_almost_equal(mse2, self.res.forecast_cov(3)[1], DECIMAL_3) def test_irf_stderr(self): if not have_pandas(): raise nose.SkipTest irf_stderr = self.irf.stderr(orth=False) for i in range(1, 1 + len(self.lut.irf_stderr)): assert_almost_equal(np.round(irf_stderr[i], 3), self.lut.irf_stderr[i - 1]) def test_cum_irf_stderr(self): if not have_pandas(): raise nose.SkipTest stderr = self.irf.cum_effect_stderr(orth=False) for i in range(1, 1 + len(self.lut.cum_irf_stderr)): assert_almost_equal(np.round(stderr[i], 3), self.lut.cum_irf_stderr[i - 1]) def test_lr_effect_stderr(self): if not have_pandas(): raise nose.SkipTest stderr = self.irf.lr_effect_stderr(orth=False) orth_stderr = self.irf.lr_effect_stderr(orth=True) assert_almost_equal(np.round(stderr, 3), self.lut.lr_stderr)
def load_results_statsmodels(dataset): results_per_deterministic_terms = dict.fromkeys(dt_s_list) for dt_s_tup in dt_s_list: endog = data[dataset] exog = generate_exog_from_season(dt_s_tup[1], len(endog)) model = VAR(endog, exog) results_per_deterministic_terms[dt_s_tup] = model.fit( maxlags=4, trend=dt_s_tup[0], method="ols") return results_per_deterministic_terms
def var_simulate(data, n_simulate, pca_n=200): # PCA reduction before VAR fit pca_dim_res = pca(data, pca_n) var = VAR(pca_dim_res['pc_scores']) var_res = var.fit(maxlags=1) data_sim = var_res.simulate_var(n_simulate) # Project simulated PCA time courses into original vertex space data_sim = data_sim @ pca_dim_res['Va'] return data_sim
def var_predict(train_data, num_out): var_preds = [] for x in train_data: var = VAR(x) var_fit = var.fit(2) yhat = var_fit.forecast(var_fit.y, steps=num_out) var_preds.append(yhat[:, 0]) return np.array(var_preds)
def extractCoeff(timeseries_data, lag_order): ''' Takes in a 7680x16 array to fit a VAR model and obtain the coefficients @return: 5x16x16 VAR coefficients array ''' model = VAR(timeseries_data) model_fit = model.fit(lag_order, trend='nc') coefs = model_fit.coefs #the lag coeffs return coefs
def train(self, array_X, array_Y): self.train_X = array_X self.train_Y = array_Y array = numpy.concatenate((numpy.matrix(array_Y).T, array_X), axis=1) model = VAR(endog=pd.DataFrame(data=array)) fit = model.fit() res = fit.fittedvalues.values[:, 0] res = numpy.hstack((res[0], res)) return res
def VARmethod(paramsList=['pollution.csv', '0.93','pm','date'], specialParams=['2','1','4','0','1', '1', '7']): path = paramsList[0] trainRows = float(paramsList[1]) saveto = 'result.csv' df = pd.read_csv(path, usecols=paramsList[2:]) allRows = df.shape[0] train = df[0:int(allRows*trainRows)] test = df[int(allRows*trainRows)+1:] df['Timestamp'] = pd.to_datetime(df[paramsList[-1]], format='%Y/%m/%d %H:%M') df.index = df['Timestamp'] df = df.resample('D').mean() train['Timestamp'] = pd.to_datetime(train[paramsList[-1]], format='%Y/%m/%d %H:%M') train.index = train['Timestamp'] train = train.resample('D').mean() test['Timestamp'] = pd.to_datetime(test[paramsList[-1]], format='%Y/%m/%d %H:%M') test.index = test['Timestamp'] test = test.resample('D').mean() y_hat = test.copy() nullArray = train.copy() nullArray['time'] = train.index # 以上可通用---------------------------- for i in range(2,len(paramsList)-1): #https://blog.csdn.net/mooncrystal123/article/details/86736397 #https://blog.csdn.net/qq_41518277/article/details/85101141 var_data = train[paramsList[i]].diff(1).dropna() #model = VAR(endog=var_data, dates=pd.date_range(train.index[0], train.index[-1]),freq='M') model = VAR(endog=var_data) # 估计最优滞后项系数 #lag_order = model.select_order() # 输出结果 #print(lag_order.summary()) model_fit = model.fit(1) prediction = model_fit.forecast(model_fit.y, steps=len(test[paramsList[i]])) print(prediction) y_hat[paramsList[i]] = prediction rms = sqrt(mean_squared_error(test[paramsList[i]], y_hat[paramsList[i]])) print(rms) # -------------------------------------- y_hat['time'] = test.index yhat_naive = np.array(y_hat) nArray = np.array(nullArray) newArray = np.concatenate((nArray,yhat_naive),axis=0) s = pd.DataFrame(newArray, columns=paramsList[2:]) for i in range(2,len(paramsList)-1): s[paramsList[i]][0:int(len(s)*trainRows)] = "" s.to_csv(saveto,index=False,header=True,float_format='%.2f')
def test_var_cov_params_pandas(bivariate_var_data): df = pd.DataFrame(bivariate_var_data, columns=['x', 'y']) mod = VAR(df) res = mod.fit(2) cov = res.cov_params() assert isinstance(cov, pd.DataFrame) exog_names = ('const', 'L1.x', 'L1.y', 'L2.x', 'L2.y') index = pd.MultiIndex.from_product((exog_names, ('x', 'y'))) assert_index_equal(cov.index, cov.columns) assert_index_equal(cov.index, index)
def test_irf_err_bands(): # smoke tests data = get_macrodata() model = VAR(data) results = model.fit(maxlags=2) irf = results.irf() bands_sz1 = irf.err_band_sz1() bands_sz2 = irf.err_band_sz2() bands_sz3 = irf.err_band_sz3() bands_mc = irf.errband_mc()
def baseline_VAR(flow_df, road_adj, hops=5, history_window=4, prediction_window=1, test_ratio=0.25): n_timestamp, n_road = flow_df.shape n_timestamp_train = int(round(n_timestamp * (1 - test_ratio))) n_timestamp_test = n_timestamp - n_timestamp_train # find neighbors for each node symm_adj = road_adj + road_adj.transpose() neighbor_adj = symm_adj for hop in range(hops - 1): neighbor_adj = np.matmul(neighbor_adj, symm_adj) + symm_adj np.fill_diagonal(neighbor_adj, 0) # exclude self train_data = np.array( flow_df.iloc[:n_timestamp_train]) # (n_timestamp_train, n_road) test_data = np.array( flow_df.iloc[n_timestamp_train:]) # (n_timestamp_test, n_road) Y_true = test_data[history_window + (prediction_window - 1):n_timestamp_test] # (n_sample, n_road) Y_pred = np.zeros(Y_true.shape) # (n_sample, n_road) for road_index in range(n_road): filtered_roads = [road_index] + list( np.where(neighbor_adj[road_index] > 0)[0]) filtered_train_data = np.array(train_data[:, filtered_roads]) filtered_test_data = np.array(test_data[:, filtered_roads]) model = VAR(filtered_train_data) model_fitted = model.fit(history_window) X_test = np.concatenate([ np.expand_dims( filtered_test_data[i:(n_timestamp_test - history_window - prediction_window + 1 + i)], axis=2) for i in range(history_window) ], axis=2) # (n_sample, n_road, history_window) for i in range(Y_pred.shape[0]): # n_sample Y_pred[i, road_index] = model_fitted.forecast( X_test[i].transpose(), steps=prediction_window)[-1, :][0] # max_value = Y_true.max() # print((Y_pred > max_value).sum()) # no super large values # print((Y_pred < 0).sum()) # negative values account for 0.2% Y_pred[Y_pred < 0] = 0 # correct negative values return Y_pred, Y_true
class TestVARResultsLutkepohl(object): """ Verify calculations using results from Lutkepohl's book """ def __init__(self): self.p = 2 if not have_pandas(): return sdata, dates = get_lutkepohl_data("e1") names = sdata.dtype.names data = data_util.struct_to_ndarray(sdata) adj_data = np.diff(np.log(data), axis=0) # est = VAR(adj_data, p=2, dates=dates[1:], names=names) self.model = VAR(adj_data[:-16], dates=dates[1:-16], names=names, freq="Q") self.res = self.model.fit(maxlags=self.p) self.irf = self.res.irf(10) self.lut = E1_Results() def test_approx_mse(self): if not have_pandas(): raise nose.SkipTest # 3.5.18, p. 99 mse2 = np.array([[25.12, 0.580, 1.300], [0.580, 1.581, 0.586], [1.300, 0.586, 1.009]]) * 1e-4 assert_almost_equal(mse2, self.res.forecast_cov(3)[1], DECIMAL_3) def test_irf_stderr(self): if not have_pandas(): raise nose.SkipTest irf_stderr = self.irf.stderr(orth=False) for i in range(1, 1 + len(self.lut.irf_stderr)): assert_almost_equal(np.round(irf_stderr[i], 3), self.lut.irf_stderr[i - 1]) def test_cum_irf_stderr(self): if not have_pandas(): raise nose.SkipTest stderr = self.irf.cum_effect_stderr(orth=False) for i in range(1, 1 + len(self.lut.cum_irf_stderr)): assert_almost_equal(np.round(stderr[i], 3), self.lut.cum_irf_stderr[i - 1]) def test_lr_effect_stderr(self): if not have_pandas(): raise nose.SkipTest stderr = self.irf.lr_effect_stderr(orth=False) orth_stderr = self.irf.lr_effect_stderr(orth=True) assert_almost_equal(np.round(stderr, 3), self.lut.lr_stderr)
def run( self, action: str = 'evaluate' ) -> Union[pd.DataFrame, List[Dict[str, Dict[str, Union[Union[float, str], Any]]]]]: """ >>> from q3_time_series.model import VectorAutoRegression >>> # To Evaluate >>> evaluate_metrics = VectorAutoRegression().run("evaluate") >>> # To Predict >>> prediction = VectorAutoRegression().run("predict") """ model = VAR(endog=self.train) model_fit = model.fit() if action == 'predict': # Use full dataset to get prediction model_full = VAR(endog=self.df) model_fit_full = model_full.fit() return pd.DataFrame(model_fit.forecast(model_fit_full.y, steps=2), index=['2008', '2009'], columns=[self.df.columns]) else: tmp = [] for col in self.df.columns: tmp.append({ col: { 'rmse_val': sqrt( mean_squared_error( self.valid[col], self._prediction(model_fit, self.valid)[[col]])), 'mae_val': mean_absolute_error( self.valid[col], self._prediction(model_fit, self.valid)[[col]]), 'mape_val': f'{self.mean_absolute_percentage_error(self.valid[col], self._prediction(model_fit, self.valid)[[col]])} %' } }) return tmp
def sensitivity(df, col_name, ratio, percentage=0.9): df_sen[col_name].iloc[-2] = df_sen[col_name].iloc[-2] * ratio train = df_sen[:-1] model_sen = VAR(endog=train) model_sen_fit = model_sen.fit() # Make prediction on validation yhat_sen_cal = model_sen_fit.forecast(model_sen_fit.y, steps=2) return yhat_sen_cal[:, 3][-1]
def _estimate_var_coefs(self, X): """Estimate coefficients of VAR""" # XXX: VAR.fit() is not searching lags correctly if self._criterion not in ['aic', 'fpe', 'hqic', 'bic']: var = VAR(X) result = var.fit(maxlags=self._lags, trend='nc') else: min_value = float('Inf') result = None for lag in range(1, self._lags + 1): var = VAR(X) fitted = var.fit(maxlags=lag, ic=None, trend='nc') value = getattr(fitted, self._criterion) if value < min_value: min_value = value result = fitted return result.coefs, result.k_ar, result.resid
def _VAR(train, test=None): model = VAR(train) model_fit = model.fit() #maxlags=299, ic='aic') print('Lag: %s' % model_fit.k_ar) if test != None: predictions = model_fit.forecast(train[-10:, :], len(test)) error = mean_squared_error(test, predictions) print('Test MSE: %.3f' % error) else: predictions = model_fit.forecast(train[-10:, :], len(train)) return predictions
def model_var(train_data,test_data,train_data1,test_data1): x = train_data1.reshape((372,1)) x1 = train_data.reshape((372,1)) lis = np.concatenate((x,x1), axis = 1) print(np.shape(lis)) #forecast model = VAR(endog = lis) model_fit = model.fit() print(model_fit.summary()) predictions = model_fit.forecast(model_fit.y, steps=10) print('VAR RMSE: ', mean_squared_error(predictions[:,0], test_data1[0:10]))
def test_var_constant(): # see 2043 import datetime from pandas import DataFrame, DatetimeIndex series = np.array([[2., 2.], [1, 2.], [1, 2.], [1, 2.], [1., 2.]]) data = DataFrame(series) d = datetime.datetime.now() delta = datetime.timedelta(days=1) index = [] for i in range(data.shape[0]): index.append(d) d += delta data.index = DatetimeIndex(index) model = VAR(data) with pytest.raises(ValueError): model.fit(1)
def VectorAutoRegressive(self, data, exodata): #currently, exodata not used. #make a dataframe the size of prediction datahat = pd.DataFrame(np.zeros(shape=((self.end - self.start), 3))) #convert to a list datalist = data.values.tolist() # create a model for each axis and predict each axis model = VAR(datalist) model_fit = model.fit() datahat = model_fit.forecast(model_fit.y, steps=(self.end - self.start)) return (datahat)
def vars_test(): dt = get_dataframe() name_list = ["date", "tBalance_all", "total_purchase", "total_redeem", "total_diff"] # print(dt["total_purchase"]) time = dt["date"] mdata = dt[["tBalance_all", "total_purchase", "total_redeem"]] mdata.index = pandas.DatetimeIndex(time) data = np.log(mdata).diff().dropna() model = VAR(data) results = model.fit(2) results.summary() results.plot()
def test_var_constant(): # see 2043 import datetime from pandas import DataFrame, DatetimeIndex series = np.array([[2., 2.], [1, 2.], [1, 2.], [1, 2.], [1., 2.]]) data = DataFrame(series) d = datetime.datetime.now() delta = datetime.timedelta(days=1) index = [] for i in range(data.shape[0]): index.append(d) d += delta data.index = DatetimeIndex(index) #with pytest.warns(ValueWarning): #does not silence warning in test output with warnings.catch_warnings(): warnings.simplefilter("ignore", category=ValueWarning) model = VAR(data) with pytest.raises(ValueError): model.fit(1)
def test2(): mdata = statsmodels.datasets.macrodata.load_pandas().data dates = mdata[["year", "quarter"]].astype(int).astype(str) quarterly = dates["year"] + "Q" + dates["quarter"] mdata = mdata[["realgdp", "realcons", "realinv"]] mdata.index = pandas.DatetimeIndex(quarterly) data = np.log(mdata).diff().dropna() model = VAR(data) results = model.fit(2) results.summary() results = model.fit(maxlags=50, ic="aic") # print(results.summary()) lag_order = results.k_ar print results.forecast(data.values[-lag_order:], 30) # print(results) # print model.select_order(15) # results.plot() # results.plot_acorr() pass
import pandas as pd import numpy as np import statsmodels.api as sm import pylab from statsmodels.tsa.base.datetools import dates_from_str from statsmodels.tsa.vector_ar.var_model import VAR mdata = sm.datasets.macrodata.load_pandas().data dates = mdata[['year', 'quarter']].astype(int).astype(str) quarterly = dates["year"] + "Q" + dates["quarter"] quarterly = dates_from_str(quarterly) mdata = mdata[['realgdp','realcons','realinv']] mdata.index = pd.DatetimeIndex(quarterly) data = np.log(mdata).diff().dropna() # log difference # make a VAR model model = VAR(data) results = model.fit(2) print results.summary() results.plot() results.plot_acorr() #autocorrelation model.select_order(15) results = model.fit(maxlags=15, ic='aic') irf = results.irf(10) irf.plot(orth=True) #Orthogonalization pylab.show()
#calc beta's alpha's #do forecast of returns, correlation. Use to Weight rets.iloc[:,0:10].plot() ###DETOUR TO VAR FORECASTING from statsmodels.tsa.vector_ar.var_model import VAR, VARResults, VARProcess import statsmodels statsmodels.version.version #Check for NA's in data - have to reduce number of series used as full 30 #gave singular matrix v1 = VAR(rets_train[series_red], freq='D') v1.select_order(maxlags=30) results = v1.fit(5) #From fitted # results.summary() results.plot() # results.plot_acorr() # plt.show() #Make forecast for 3months test_index = rets_test.index fc_range = pd.date_range(start=test_index[0], periods=2, freq='3M') fc_periods = len(rets_test[fc_range[0]:fc_range[1]]) lag_order = results.k_ar fc = results.forecast(rets_train[series_red].values,fc_periods) fc.shape fc[:,-1] df_fc = pd.DataFrame(fc,index=rets.index[0:fc_periods],columns=rets_train[series_red]) df_fc.plot()
def test_constructor(self): # make sure this works with no names ndarr = self.data.view((float, 3)) model = VAR(ndarr) res = model.fit(self.p)