def test_irf_trend(): # test for irf with different trend see #1636 # this is a rough comparison by adding trend or subtracting mean to data # to get similar AR coefficients and IRF data = get_macrodata().view((float, 3), type=np.ndarray) model = VAR(data) results = model.fit(4) # , trend = 'c') irf = results.irf(10) data_nc = data - data.mean(0) model_nc = VAR(data_nc) results_nc = model_nc.fit(4, trend="n") irf_nc = results_nc.irf(10) assert_allclose(irf_nc.stderr()[1:4], irf.stderr()[1:4], rtol=0.01) trend = 1e-3 * np.arange(len(data)) / (len(data) - 1) # for pandas version, currently not used, if data is a pd.DataFrame # data_t = pd.DataFrame(data.values + trend[:,None], index=data.index, columns=data.columns) data_t = data + trend[:, None] model_t = VAR(data_t) results_t = model_t.fit(4, trend="ct") irf_t = results_t.irf(10) assert_allclose(irf_t.stderr()[1:4], irf.stderr()[1:4], rtol=0.03)
def testing_var(): df = transform_dataframe(path) #creating the train and validation set train = df.iloc[:int(0.8*(len(df)))] valid = df.iloc[int(0.8*(len(df))):] model = VAR(endog=train) model_fit = model.fit() # make prediction on validation prediction = model_fit.forecast(model_fit.y, steps=len(valid)) cols = df.columns.values #converting predictions to dataframe pred = pd.DataFrame(index=range(0,len(prediction)),columns=cols) for j in range(0,10): for i in range(0, len(prediction)): pred.iloc[i][j] = prediction[i][j] #check rmse for i in cols: p=pred[i] v=valid[i] print('rmse value for', i, 'is : ', np.sqrt(mean_squared_error(p, v))) #make final predictions model = VAR(endog=df) model_fit = model.fit() yhat = model_fit.forecast(model_fit.y, steps=1) print(yhat)
def setup_class(cls): mdata = macrodata.load_pandas().data mdata = mdata[["realgdp", "realcons", "realinv"]] data = mdata.values data = np.diff(np.log(data), axis=0) * 400 cls.res0 = VAR(data).fit(maxlags=2) cls.resl1 = VAR(data).fit(maxlags=1) cls.data = data
def test_exog(self): # check that trend and exog are equivalent for basics and varsim data = self.res0.model.endog res_lin_trend = VAR(data).fit(maxlags=2, trend="ct") ex = np.arange(len(data)) res_lin_trend1 = VAR(data, exog=ex).fit(maxlags=2) ex2 = np.arange(len(data))[:, None]**[0, 1] res_lin_trend2 = VAR(data, exog=ex2).fit(maxlags=2, trend="n") # TODO: intercept differs by 4e-3, others are < 1e-12 assert_allclose(res_lin_trend.params, res_lin_trend1.params, rtol=5e-3) assert_allclose(res_lin_trend.params, res_lin_trend2.params, rtol=5e-3) assert_allclose(res_lin_trend1.params, res_lin_trend2.params, rtol=1e-10) y1 = res_lin_trend.simulate_var(seed=987128) y2 = res_lin_trend1.simulate_var(seed=987128) y3 = res_lin_trend2.simulate_var(seed=987128) assert_allclose(y2.mean(0), y1.mean(0), rtol=1e-12) assert_allclose(y3.mean(0), y1.mean(0), rtol=1e-12) assert_allclose(y3.mean(0), y2.mean(0), rtol=1e-12) h = 10 fc1 = res_lin_trend.forecast(res_lin_trend.endog[-2:], h) exf = np.arange(len(data), len(data) + h) fc2 = res_lin_trend1.forecast(res_lin_trend1.endog[-2:], h, exog_future=exf) with pytest.raises(ValueError, match="exog_future only has"): wrong_exf = np.arange(len(data), len(data) + h // 2) res_lin_trend1.forecast(res_lin_trend1.endog[-2:], h, exog_future=wrong_exf) exf2 = exf[:, None]**[0, 1] fc3 = res_lin_trend2.forecast(res_lin_trend2.endog[-2:], h, exog_future=exf2) assert_allclose(fc2, fc1, rtol=1e-12, atol=1e-12) assert_allclose(fc3, fc1, rtol=1e-12, atol=1e-12) assert_allclose(fc3, fc2, rtol=1e-12, atol=1e-12) fci1 = res_lin_trend.forecast_interval(res_lin_trend.endog[-2:], h) exf = np.arange(len(data), len(data) + h) fci2 = res_lin_trend1.forecast_interval(res_lin_trend1.endog[-2:], h, exog_future=exf) exf2 = exf[:, None]**[0, 1] fci3 = res_lin_trend2.forecast_interval(res_lin_trend2.endog[-2:], h, exog_future=exf2) assert_allclose(fci2, fci1, rtol=1e-12, atol=1e-12) assert_allclose(fci3, fci1, rtol=1e-12, atol=1e-12) assert_allclose(fci3, fci2, rtol=1e-12, atol=1e-12)
def test_var_trend(): # see 2271 data = get_macrodata().view((float, 3), type=np.ndarray) model = VAR(data) results = model.fit(4) # , trend = 'c') irf = results.irf(10) data_nc = data - data.mean(0) model_nc = VAR(data_nc) results_nc = model_nc.fit(4, trend="n") with pytest.raises(ValueError): model.fit(4, trend="t")
def _run_varLiNGAM(self, xt, verbose=False): """ Run the VarLiNGAM algorithm on data. Args: xt : time series matrix with size n*m (length*num_variables) Returns: Tuple: (Bo, Bhat) Instantaneous and lagged causal coefficients """ Ident = np.identity(xt.shape[1]) # Step 1: VAR estimation model = VAR(xt) results = model.fit(self.lag) Mt_ = results.params[1:, :] # Step 2: LiNGAM on Residuals resid_VAR = results.resid model = LiNGAM(verbose=verbose) data = pd.DataFrame(resid_VAR) Bo_ = model._run_LiNGAM(data) # Step 3: Get instantaneous matrix Bo from LiNGAM # Bo_ = pd.read_csv("results.csv").values # Step 4: Calculation of lagged Bhat Bhat_ = np.dot((Ident - Bo_), Mt_) return (Bo_, Bhat_)
def artificial_data(): N = 301 # x, y = n_hat(N, 6) x, y = sin_(N, 5) y = y + 0.01 * np.random.normal(0., .5, len(y)) z = y * y Y = np.matrix([y, z]).transpose().tolist() # ====================== title('single prediction') model = VAR(Y) model_fit = model.fit(maxlags=15, ic='aic') pred = model_fit.forecast(Y[-model_fit.k_ar:], N) xx = np.arange(N, N + len(pred)) assert (len(pred) == N) # print(model_fit.k_ar) # print(model_fit.params) plot(x, Y) plot(xx, pred, '--') show() # # ======================================= title('dynamic prediction') xx, pred = test_forecast(x, Y, len_for_prediction=100, n_pred=100, maxlags=15, ic='aic') plot(x, Y) plot(xx, pred, '--') show()
def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2): n_sample, n_output = df.shape n_test = int(round(n_sample * test_ratio)) n_train = n_sample - n_test df_train, df_test = df[:n_train], df[n_train:] scaler = StandardScaler(mean=df_train.values.mean(), std=df_train.values.std()) data = scaler.transform(df_train.values) var_model = VAR(data) var_result = var_model.fit(n_lags) max_n_forwards = np.max(n_forwards) # Do forecasting. result = np.zeros(shape=(len(n_forwards), n_test, n_output)) start = n_train - n_lags - max_n_forwards + 1 for input_ind in range(start, n_sample - n_lags): prediction = var_result.forecast( scaler.transform(df.values[input_ind:input_ind + n_lags]), max_n_forwards) for i, n_forward in enumerate(n_forwards): result_ind = input_ind - n_train + n_lags + n_forward - 1 if 0 <= result_ind < n_test: result[i, result_ind, :] = prediction[n_forward - 1, :] df_predicts = [] for i, n_forward in enumerate(n_forwards): df_predict = pd.DataFrame(scaler.inverse_transform(result[i]), index=df_test.index, columns=df_test.columns) df_predicts.append(df_predict) df_predict.to_csv("./df_predict.csv", sep=',', index=False) df_test.to_csv("./df_test.csv", sep=',', index=False) return df_predicts, df_test
def test_select_order(self): result = self.model.fit(10, ic='aic', verbose=True) result = self.model.fit(10, ic='fpe', verbose=True) # bug model = VAR(self.model.endog) model.select_order()
def generate_final_predictions(df_coords, lag_order=3, display=False): ''' Uses the best lag_order (from testing_harness) to train the full model and forecast mean coordinates for the years 2022 and 2023. Returns a DF ''' model = VAR(endog=df_coords) model = model.fit(lag_order) forecast = model.forecast(model.y, steps=2) df_forecast = pd.DataFrame(forecast, columns=['future_latitude', 'future_longitude']) df_forecast['year'] = [2022, 2023] df_forecast = df_forecast[['year', 'future_latitude', 'future_longitude']] if display: print() print('Final model information:') print() print(model.summary()) print() print('Future hotspot forecasts:') print() print(df_forecast) return df_forecast
def extract_model(self, input, save_status=False): total = self._model_clean() if total is False: return "Try to find available area by:\n sh casa.sh --find aptartment name\n" # input n = int(input) new_index = pd.date_range(start=total.index[-1], periods=n + 1, freq='MS')[1:] model = VAR(total) model_fit = model.fit() pred = model_fit.forecast(y=total.values, steps=n) pred = pd.DataFrame(pred, columns=['Q1', 'Q2', 'Q3'], index=new_index) final_df = pd.concat([total, pred], axis=0) final = final_df.loc[new_index] if save_status is True: self._save_image_model(eval_model=final_df, pred_model=pred, pred_num=input) return final
def train(self, data): if len(data.columns) > 1: self.model = VAR(data) self.opt_p = self.model.select_order(30).aic else: self.model = AR(data) self.opt_p = self.model.select_order(30, 'aic')
def var(flow, target): warnings.filterwarnings("ignore") in_mask = np.greater(target[:, 0], mask_threshold) out_mask = np.greater(target[:, 1], mask_threshold) result = np.zeros((flow.shape[0], flow.shape[-1])) for i in range(flow.shape[0]): if verbose: if (i + 1) % 10000 == 0: print("VAR: line {} of {}".format(i + 1, flow.shape[0])) for j in range(flow.shape[-1]): data = list() for k in range(flow.shape[1] - 1): data.append([flow[i, k, j], flow[i, k + 1, j]]) model = VAR(data) try: model_fit = model.fit() result[i, j] = model_fit.forecast(model_fit.y, steps=1)[0][1] except: result[i, j] = 0.0 pass in_rmse = np.sqrt( np.mean(np.square(target[:, 0][in_mask] - result[:, 0][in_mask]))) out_rmse = np.sqrt( np.mean(np.square(target[:, 1][out_mask] - result[:, 1][out_mask]))) in_mae = np.mean(np.abs(target[:, 0][in_mask] - result[:, 0][in_mask])) out_mae = np.mean(np.abs(target[:, 1][out_mask] - result[:, 1][out_mask])) return in_rmse, out_rmse, in_mae, out_mae
def forecast_out_model(data, order=(3, 0)): """Forecast parameters for one model. Parameters ---------- data : DataFrame Parameters for one model only Returns ------- data : DataFrame Predicted parameters. The same structure as input. """ window = data.shape[0] // 2 maxlags = order[0] out = [data[:window]] nobs = data.shape[0] for first in range(nobs - window): last = window + first if data.shape[1] == 1: model = ARMA(data[first:last], order=order) res = model.fit(method='css', disp=False) forecast = res.forecast(1)[0] else: model = VAR(data[first:last]) res = model.fit(maxlags=maxlags) forecast = res.forecast(np.atleast_2d(data[first:last]), 1) out.append(forecast) return np.vstack(out)
def var(data): start_time_ = time.time() # train,test = data[:int(0.7*(len(data)))],data[int(0.7*(len(data))):] data = data.interpolate(limit=30000000, limit_direction='both').astype('float32') #split_date = '2017-01-01' train, test = data[:split_date], data[split_date:] if DOpca: steps = [('scale', StandardScaler()), ('pca', PCA(n_components=n_pca))] else: steps = [('scale', StandardScaler())] pipe = Pipeline(steps=steps) pipe.fit(data) train, test = data[:int(0.7 * (len(data)))], data[int(0.7 * (len(data))):] sc_train, sc_test = pipe.transform(train), pipe.transform(test) model = VAR(endog=sc_train) model_fit = model.fit(9) trainPredict = model_fit.forecast(sc_train, steps=len(sc_train)) testPredict = model_fit.forecast(sc_test, steps=len(sc_test)) try: trainPredict = pipe.inverse_transform(trainPredict) testPredict = pipe.inverse_transform(testPredict) trainPredict = pd.Series(data=(trainPredict[:, 0]), index=train.index) testPredict = pd.Series(data=(testPredict[:, 0]), index=test.index) except: trainPredict, testPredict = -999, -999 trainY = pd.Series(data=(train.iloc[:, 0]), index=train.index) testY = pd.Series(data=(test.iloc[:, 0]), index=test.index) time_ = time.time() - start_time_ return trainPredict, testPredict, time_, trainY, testY
def control_lqr(env, agent, model_fit, data, lag=4): B = np.array([[0], [0], [-.01], [-.01]]) Q = np.diag((10., 1., 10., 1.)) print(model_fit.coefs) K = lqr(model_fit.coefs[0], B, Q, 1) print("K=") print(K) obs = env.reset() action = agent.begin_episode(obs) for i in range(500): env.render() time.sleep(0.15) # slows down process to make it more visible # recompute K every 10 steps data = np.vstack([data, obs]) if (i % 10 == 0): model_next = VAR(data) model_fit_next = model_next.fit(lag) K = lqr(model_fit_next.coefs[0], B, Q, 1) # print("K=") # print(K) action = get_control(K, obs) # Get the next action from the learner, given our new state. obs, reward, done, info = env.step(action) if done: print("Final episode: lasted {} timesteps, data: {}".format( i + 1, obs)) break
def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2): """ Multivariate time series forecasting using Vector Auto-Regressive Model. :param df: pandas.DataFrame, index: time, columns: sensor id, content: data. :param n_forwards: a tuple of horizons. :param n_lags: the order of the VAR model. :param test_ratio: :return: [list of prediction in different horizon], dt_test """ n_sample, n_output = df.shape n_test = int(round(n_sample * test_ratio)) n_train = n_sample - n_test df_train, df_test = df[:n_train], df[n_train:] scaler = StandardScaler(mean=df_train.values.mean(), std=df_train.values.std()) data = scaler.transform(df_train.values) var_model = VAR(data) var_result = var_model.fit(n_lags) max_n_forwards = np.max(n_forwards) # Do forecasting. result = np.zeros(shape=(len(n_forwards), n_test, n_output)) start = n_train - n_lags - max_n_forwards + 1 for input_ind in range(start, n_sample - n_lags): prediction = var_result.forecast(scaler.transform(df.values[input_ind: input_ind + n_lags]), max_n_forwards) for i, n_forward in enumerate(n_forwards): result_ind = input_ind - n_train + n_lags + n_forward - 1 if 0 <= result_ind < n_test: result[i, result_ind, :] = prediction[n_forward - 1, :] df_predicts = [] for i, n_forward in enumerate(n_forwards): df_predict = pd.DataFrame(scaler.inverse_transform(result[i]), index=df_test.index, columns=df_test.columns) df_predicts.append(df_predict) return df_predicts, df_test
def fit_model(self): """ Use Vector Autoregression, pass Training Set & fit the model """ model = VAR(endog=self.train) self.model_fit = model.fit()
def var_fit(self, endog, maxlags=5, ic='aic', verbose=False, trend='c'): ''' Find best VAR with best order and various lags Parameters ---------- endog : array-like, (shape: (n_time_points, n_variables)) 2-d endogenous response variable. The independent variable. maxlags : int Maximum number of lags to check for order selection. ic : {'aic', 'fpe', 'hqic', 'bic', None}, optional, (default="aic") Information criterion to use for VAR order selection. aic : Akaike fpe : Final prediction error hqic : Hannan-Quinn bic : Bayesian a.k.a. Schwarz verbose : bool, default False Print order selection output to the screen trend : str {"c", "ct", "ctt", "nc"}, optional, (default="c") "c" - add constant "ct" - constant and trend "ctt" - constant, linear and quadratic trend "nc" - co constant, no trend Note that these are prepended to the columns of the dataset. Notes ----- Returns ------- self (updating self.var_result) ''' self.var_result = VAR(endog).fit(maxlags=maxlags, ic=ic, verbose=verbose, trend=trend)
def parametersAR(self, lag=1): # OLS(self.parametersHistorical()['b0'], self.parametersHistorical()['b0'][]) # self._arModel = (AR(self.parametersHistorical()['b0']).fit(lag), AR(self.parametersHistorical()['b1']).fit(lag), AR(self.parametersHistorical()['b2']).fit(lag)) self._varModel = VAR(self.parametersHistorical()[['b0', 'b1', 'b2']]).fit(lag) self._varModel.summary() return True
def extract(self, instance): assert (isinstance(instance, Instance)) params = VAR(instance.eeg_data.T).fit(self.lags).params # hstack will collapse all entries into one big vector features = np.hstack(params.reshape((np.prod(params.shape), 1))) self.assert_features(features) # features = a 1d ndarray return features
def var_simulate(data, n_simulate, pca_n=200): # PCA reduction before VAR fit pca_dim_res = pca(data, pca_n) var = VAR(pca_dim_res['pc_scores']) var_res = var.fit(maxlags=1) data_sim = var_res.simulate_var(n_simulate) # Project simulated PCA time courses into original vertex space data_sim = data_sim @ pca_dim_res['Va'] return data_sim
def var_predict(train_data, num_out): var_preds = [] for x in train_data: var = VAR(x) var_fit = var.fit(2) yhat = var_fit.forecast(var_fit.y, steps=num_out) var_preds.append(yhat[:, 0]) return np.array(var_preds)
def extractCoeff(timeseries_data, lag_order): ''' Takes in a 7680x16 array to fit a VAR model and obtain the coefficients @return: 5x16x16 VAR coefficients array ''' model = VAR(timeseries_data) model_fit = model.fit(lag_order, trend='nc') coefs = model_fit.coefs #the lag coeffs return coefs
def train(self, array_X, array_Y): self.train_X = array_X self.train_Y = array_Y array = numpy.concatenate((numpy.matrix(array_Y).T, array_X), axis=1) model = VAR(endog=pd.DataFrame(data=array)) fit = model.fit() res = fit.fittedvalues.values[:, 0] res = numpy.hstack((res[0], res)) return res
def load_results_statsmodels(dataset): results_per_deterministic_terms = dict.fromkeys(dt_s_list) for dt_s_tup in dt_s_list: endog = data[dataset] exog = generate_exog_from_season(dt_s_tup[1], len(endog)) model = VAR(endog, exog) results_per_deterministic_terms[dt_s_tup] = model.fit( maxlags=4, trend=dt_s_tup[0], method="ols") return results_per_deterministic_terms
def VARmethod(paramsList=['pollution.csv', '0.93','pm','date'], specialParams=['2','1','4','0','1', '1', '7']): path = paramsList[0] trainRows = float(paramsList[1]) saveto = 'result.csv' df = pd.read_csv(path, usecols=paramsList[2:]) allRows = df.shape[0] train = df[0:int(allRows*trainRows)] test = df[int(allRows*trainRows)+1:] df['Timestamp'] = pd.to_datetime(df[paramsList[-1]], format='%Y/%m/%d %H:%M') df.index = df['Timestamp'] df = df.resample('D').mean() train['Timestamp'] = pd.to_datetime(train[paramsList[-1]], format='%Y/%m/%d %H:%M') train.index = train['Timestamp'] train = train.resample('D').mean() test['Timestamp'] = pd.to_datetime(test[paramsList[-1]], format='%Y/%m/%d %H:%M') test.index = test['Timestamp'] test = test.resample('D').mean() y_hat = test.copy() nullArray = train.copy() nullArray['time'] = train.index # 以上可通用---------------------------- for i in range(2,len(paramsList)-1): #https://blog.csdn.net/mooncrystal123/article/details/86736397 #https://blog.csdn.net/qq_41518277/article/details/85101141 var_data = train[paramsList[i]].diff(1).dropna() #model = VAR(endog=var_data, dates=pd.date_range(train.index[0], train.index[-1]),freq='M') model = VAR(endog=var_data) # 估计最优滞后项系数 #lag_order = model.select_order() # 输出结果 #print(lag_order.summary()) model_fit = model.fit(1) prediction = model_fit.forecast(model_fit.y, steps=len(test[paramsList[i]])) print(prediction) y_hat[paramsList[i]] = prediction rms = sqrt(mean_squared_error(test[paramsList[i]], y_hat[paramsList[i]])) print(rms) # -------------------------------------- y_hat['time'] = test.index yhat_naive = np.array(y_hat) nArray = np.array(nullArray) newArray = np.concatenate((nArray,yhat_naive),axis=0) s = pd.DataFrame(newArray, columns=paramsList[2:]) for i in range(2,len(paramsList)-1): s[paramsList[i]][0:int(len(s)*trainRows)] = "" s.to_csv(saveto,index=False,header=True,float_format='%.2f')
def parametersVAR(self, tenors, yields, lag=1, steps=1, alpha=0.01): params = pd.DataFrame(data=self.calibrateCurveParametersHistorical( tenors, yields), columns=['tau', 'b0', 'b1', 'b2'], index=yields.index) self._varModel = VAR(params[['b0', 'b1', 'b2']]).fit(lag) self._varModel.summary() fparam = self._varModel.forecast_interval( params.tail(1)[['b0', 'b1', 'b2']].values, steps, alpha=alpha) return fparam, params.tail(1)[['b0', 'b1', 'b2']].values
def test_irf_err_bands(): # smoke tests data = get_macrodata() model = VAR(data) results = model.fit(maxlags=2) irf = results.irf() bands_sz1 = irf.err_band_sz1() bands_sz2 = irf.err_band_sz2() bands_sz3 = irf.err_band_sz3() bands_mc = irf.errband_mc()
def test_var_cov_params_pandas(bivariate_var_data): df = pd.DataFrame(bivariate_var_data, columns=['x', 'y']) mod = VAR(df) res = mod.fit(2) cov = res.cov_params() assert isinstance(cov, pd.DataFrame) exog_names = ('const', 'L1.x', 'L1.y', 'L2.x', 'L2.y') index = pd.MultiIndex.from_product((exog_names, ('x', 'y'))) assert_index_equal(cov.index, cov.columns) assert_index_equal(cov.index, index)