def test_exog(self): # check that trend and exog are equivalent for basics and varsim data = self.res0.model.endog res_lin_trend = VAR(data).fit(maxlags=2, trend="ct") ex = np.arange(len(data)) res_lin_trend1 = VAR(data, exog=ex).fit(maxlags=2) ex2 = np.arange(len(data))[:, None]**[0, 1] res_lin_trend2 = VAR(data, exog=ex2).fit(maxlags=2, trend="n") # TODO: intercept differs by 4e-3, others are < 1e-12 assert_allclose(res_lin_trend.params, res_lin_trend1.params, rtol=5e-3) assert_allclose(res_lin_trend.params, res_lin_trend2.params, rtol=5e-3) assert_allclose(res_lin_trend1.params, res_lin_trend2.params, rtol=1e-10) y1 = res_lin_trend.simulate_var(seed=987128) y2 = res_lin_trend1.simulate_var(seed=987128) y3 = res_lin_trend2.simulate_var(seed=987128) assert_allclose(y2.mean(0), y1.mean(0), rtol=1e-12) assert_allclose(y3.mean(0), y1.mean(0), rtol=1e-12) assert_allclose(y3.mean(0), y2.mean(0), rtol=1e-12) h = 10 fc1 = res_lin_trend.forecast(res_lin_trend.endog[-2:], h) exf = np.arange(len(data), len(data) + h) fc2 = res_lin_trend1.forecast(res_lin_trend1.endog[-2:], h, exog_future=exf) with pytest.raises(ValueError, match="exog_future only has"): wrong_exf = np.arange(len(data), len(data) + h // 2) res_lin_trend1.forecast(res_lin_trend1.endog[-2:], h, exog_future=wrong_exf) exf2 = exf[:, None]**[0, 1] fc3 = res_lin_trend2.forecast(res_lin_trend2.endog[-2:], h, exog_future=exf2) assert_allclose(fc2, fc1, rtol=1e-12, atol=1e-12) assert_allclose(fc3, fc1, rtol=1e-12, atol=1e-12) assert_allclose(fc3, fc2, rtol=1e-12, atol=1e-12) fci1 = res_lin_trend.forecast_interval(res_lin_trend.endog[-2:], h) exf = np.arange(len(data), len(data) + h) fci2 = res_lin_trend1.forecast_interval(res_lin_trend1.endog[-2:], h, exog_future=exf) exf2 = exf[:, None]**[0, 1] fci3 = res_lin_trend2.forecast_interval(res_lin_trend2.endog[-2:], h, exog_future=exf2) assert_allclose(fci2, fci1, rtol=1e-12, atol=1e-12) assert_allclose(fci3, fci1, rtol=1e-12, atol=1e-12) assert_allclose(fci3, fci2, rtol=1e-12, atol=1e-12)
def generate_final_predictions(df_coords, lag_order=3, display=False): ''' Uses the best lag_order (from testing_harness) to train the full model and forecast mean coordinates for the years 2022 and 2023. Returns a DF ''' model = VAR(endog=df_coords) model = model.fit(lag_order) forecast = model.forecast(model.y, steps=2) df_forecast = pd.DataFrame(forecast, columns=['future_latitude', 'future_longitude']) df_forecast['year'] = [2022, 2023] df_forecast = df_forecast[['year', 'future_latitude', 'future_longitude']] if display: print() print('Final model information:') print() print(model.summary()) print() print('Future hotspot forecasts:') print() print(df_forecast) return df_forecast
df['Ibov_fut'] = df['Ibov_fut'] / df['Spot'] train_size = int(len(df) * 0.7) test_size = len(df) - train_size train, test = df[0:train_size], df[train_size:len(df)] train2 = train.copy() pred = pd.DataFrame() model = VAR(train2).fit(maxlags=15, ic='aic') lag_order = model.k_ar for i in range(len(test)): yhat = model.forecast(train2.values[-lag_order:], 1) train2 = train2.append(test.iloc[i, :]) pred = pred.append(pd.DataFrame(yhat)) print(i, lag_order) vol_pred = pred.iloc[:, -1] vol_test = test.iloc[:, -1] mean_squared_error(vol_test, vol_pred) model = VAR(train).fit(maxlags=15, ic='aic') lag_order = model.k_ar yhat = model.forecast(train.values[-lag_order:], 1) yhat datas = df['Data']
if lenght > max_lenght: max_lenght = lenght next_line = np.array( current_path_df.drop( columns=['id_line', 'index_path']).iloc[lenght + 1]) #print(current_path_df) array_route = np.array( current_path_df.head(lenght).drop( columns=['id_line', 'index_path'])) #print(array_route) current_train_df = pd.DataFrame( [create_training_path(current_path_df.head(lenght), size)]) prediction_1 = model_1.forecast(array_route, steps=1)[0] prediction_2 = model_2.forecast(array_route, steps=1)[0] prediction_3 = model_3.forecast(array_route, steps=1)[0] prediction_4 = model_4.forecast(array_route, steps=1)[0] # Define closest prediction dist_1 = distance_between_literal(prediction_1[1], prediction_1[2], next_line[1], next_line[2]) dist_2 = distance_between_literal(prediction_2[1], prediction_2[2], next_line[1], next_line[2]) dist_3 = distance_between_literal(prediction_3[1], prediction_3[2], next_line[1], next_line[2]) dist_4 = distance_between_literal(prediction_4[1], prediction_4[2], next_line[1], next_line[2]) # Use time as distance too, centering using the value of the day