def predict(self, prev_data, period, model_name='p'): # stime = time.time() model = SARIMAX(prev_data, order=self.my_order, seasonal_order=self.my_seasonal_order, enforce_stationarity=False, enforce_invertibility=False) if model_name == 'r': model_fit = model.filter(self.model_params_r) elif model_name == 's': model_fit = model.filter(self.model_params_s) else: model_fit = model.filter(self.model_params_p) yhat = model_fit.forecast(period) if self.pos: yhat = yhat.clip( min=0 ) # do not allow it to predict negative values for demand or solar # print 'pred time', time.time()-stime return yhat
def model_predict(y): """ Predict using the SARIMAX Model (Multi-step ahead Forecasting) """ n_y = y.shape[0] y_pred = np.zeros(n_y) n_iter = int(np.floor(n_y / steps_ahead)) L_last_window = n_y % steps_ahead # Multi-step ahead Forecasting of all the full windows window_start = steps_ahead window_end = window_start + steps_ahead - 1 for i in range(1, n_iter): pred_start = i * steps_ahead pred_end = pred_start + steps_ahead - 1 pred_outliers = np.zeros((steps_ahead, 1)) x = y[pred_start - steps_ahead:pred_start] pred_model = SARIMAX(x, order=order, seasonal_order=seasonal_order, exog=pred_outliers, trend=trend) y_pred[pred_start:pred_end + 1] = pred_model.filter(fitted_params).get_prediction( start=window_start, end=window_end, exog=pred_outliers).predicted_mean if L_last_window > 0: # Multi-step ahead Forecasting of the last window window_start = steps_ahead window_end = window_start + L_last_window - 1 pred_start = n_y - L_last_window pred_end = n_y pred_outliers = np.zeros((steps_ahead, 1)) x = y[pred_start - steps_ahead:pred_start] pred_model = SARIMAX(x, order=order, seasonal_order=seasonal_order, exog=pred_outliers, trend=trend) pred_outliers = np.zeros((L_last_window, 1)) y_pred[pred_start:pred_end + 1] = pred_model.filter(fitted_params).get_prediction( start=window_start, end=window_end, exog=pred_outliers).predicted_mean return y_pred
def test_innovations_algo_direct_filter_kalman_filter(ar_params, ma_params, sigma2): # Test the innovations algorithm and filter against the Kalman filter # for exact likelihood evaluation of an ARMA process, using the direct # function. endog = np.random.normal(size=10) # Innovations algorithm approach u, r = arma_innovations.arma_innovations(endog, ar_params, ma_params, sigma2) v = np.array(r) * sigma2 u = np.array(u) llf_obs = -0.5 * u**2 / v - 0.5 * np.log(2 * np.pi * v) # Kalman filter apparoach mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params))) res = mod.filter(np.r_[ar_params, ma_params, sigma2]) # Test that the two approaches are identical assert_allclose(u, res.forecasts_error[0]) # assert_allclose(theta[1:, 0], res.filter_results.kalman_gain[0, 0, :-1]) assert_allclose(llf_obs, res.llf_obs) # Get llf_obs directly llf_obs2 = _arma_innovations.darma_loglikeobs_fast( endog, ar_params, ma_params, sigma2) assert_allclose(llf_obs2, res.llf_obs)
def test_innovations_algo_filter_kalman_filter(reset_randomstate): # Test the innovations algorithm and filter against the Kalman filter # for exact likelihood evaluation of an ARMA process ar_params = np.array([0.5]) ma_params = np.array([0.2]) # TODO could generalize to sigma2 != 1, if desired, after #5324 is merged # and there is a sigma2 argument to arma_acovf # (but maybe this is not really necessary for the point of this test) sigma2 = 1 endog = np.random.normal(size=10) # Innovations algorithm approach acovf = arma_acovf(np.r_[1, -ar_params], np.r_[1, ma_params], nobs=len(endog)) theta, v = innovations_algo(acovf) u = innovations_filter(endog, theta) llf_obs = -0.5 * u ** 2 / (sigma2 * v) - 0.5 * np.log(2 * np.pi * v) # Kalman filter apparoach mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params))) res = mod.filter(np.r_[ar_params, ma_params, sigma2]) # Test that the two approaches are identical atol = 1e-6 if PLATFORM_WIN else 0.0 assert_allclose(u, res.forecasts_error[0], rtol=1e-6, atol=atol) assert_allclose(theta[1:, 0], res.filter_results.kalman_gain[0, 0, :-1], atol=atol) assert_allclose(llf_obs, res.llf_obs, atol=atol)
def test_innovations_algo_filter_kalman_filter(reset_randomstate): # Test the innovations algorithm and filter against the Kalman filter # for exact likelihood evaluation of an ARMA process ar_params = np.array([0.5]) ma_params = np.array([0.2]) # TODO could generalize to sigma2 != 1, if desired, after #5324 is merged # and there is a sigma2 argument to arma_acovf # (but maybe this is not really necessary for the point of this test) sigma2 = 1 endog = np.random.normal(size=10) # Innovations algorithm approach acovf = arma_acovf(np.r_[1, -ar_params], np.r_[1, ma_params], nobs=len(endog)) theta, v = innovations_algo(acovf) u = innovations_filter(endog, theta) llf_obs = -0.5 * u**2 / (sigma2 * v) - 0.5 * np.log(2 * np.pi * v) # Kalman filter apparoach mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params))) res = mod.filter(np.r_[ar_params, ma_params, sigma2]) # Test that the two approaches are identical atol = 1e-6 if PLATFORM_WIN else 0.0 assert_allclose(u, res.forecasts_error[0], atol=atol) assert_allclose(theta[1:, 0], res.filter_results.kalman_gain[0, 0, :-1], atol=atol) assert_allclose(llf_obs, res.llf_obs, atol=atol)
def arima_forecast(dataset_series, dateset, start_index, end_index): # define the model #dataset_series.to_csv('train.csv', index=False) #print(dataset_series) #autocorrelation_plot(dataset_series) #pyplot.show() #plot_pacf(dataset_series,lags=20) #pyplot.show() res = pd.Series(dataset_series, index=dateset) #print(res) exogx = np.array(range(1, 5)) model = SARIMA(res, order=(3, 0, 0), seasonal_order=(3, 0, 0, 20), simple_differencing=True ) #ARIMA:(Autoregressive Integrated Moving Average) # fit the model model_fit = model.filter(model.start_params) # make forecast #print(model_fit.summary()) forecast = model_fit.predict(start=len(dataset_series), end=len(dataset_series) + 300) print(forecast) #forecast = model_fit.predict(len(dataset_series), len(dataset_series)) #pyplot.figure(2) #pyplot.title("Mean Square Error") pyplot.plot(forecast, marker='o', label="forecast") pyplot.show() print(forecast[1]) return forecast[0]
def predict_with_sarima(self, order=(1, 0, 0), seasonal_order=(1, 0, 0, 24)): # seasonal_order=(1, 1, 1, 24), saved_model = SARIMAXResults.load('{}{}{}.pkl'.format(self.model_dir, self.name, self.sarima_models_suffix)) data_used_for_prediction = self.train[-48:] model = SARIMAX(endog=data_used_for_prediction, order=order, seasonal_order=seasonal_order) fitted_model = model.filter(params=saved_model.params) self.prediction = fitted_model.predict(start=len(data_used_for_prediction), end=len(data_used_for_prediction) + self.prediction_length - 1) return self.prediction
def arima_forecast(dataset_series): # define the model model = SARIMA(dataset_series, order=(2, 0, 0), seasonal_order=(2, 0, 0, 7), simple_differencing=True ) #ARIMA:(Autoregressive Integrated Moving Average) # fit the model model_fit = model.filter(model.start_params) # make forecast forecast = model_fit.predict(len(dataset_series), len(dataset_series) + 5) return forecast
def evaluate(md_path, flat_path_ts, flat_path_out): prs = pk.load(open(md_path, "rb")) ts = [vl for vl in load_flat(flat_path_ts)] md2 = SARIMAX(ts, order=(AR, 1 if D else 0, MA), enforce_stationarity=False, enforce_invertibility=False) rs = md2.filter(prs) # assembling the results with open(flat_path_out, "w") as oh: for i in xrange(len(ts)): oh.write(str(rs.predict(i, i)[-1]) + "\n")
def arima_forecast(dataset): # converting dataset into series dataset_series = dataset model = SARIMA(dataset, order=(3, 0, 0), seasonal_order=(3, 0, 0, 7), simple_differencing=True ) #ARIMA:(Autoregressive Integrated Moving Average) # fit the model model_fit = model.filter(model.start_params) # make forecast forecast = model_fit.predict(start=len(dataset_series), end=len(dataset_series) + 23) return forecast
def arima_forecast(dataset_series,dateset,start_index,end_index): # define the model #dataset_series.to_csv('train.csv', index=False) #print(dataset_series) #autocorrelation_plot(dataset_series) #print(res) res = pd.Series(dataset_series, index=dateset) model = SARIMA(res, order=(3, 0, 0), seasonal_order=(3, 0, 0,7), simple_differencing=True) #ARIMA:(Autoregressive Integrated Moving Average) # fit the model model_fit = model.filter(model.start_params) # make forecast #print(model_fit.summary()) forecast= model_fit.predict(end=end_index) #forecast = model_fit.predict(len(dataset_series), len(dataset_series)) return forecast[end_index:end_index]
def model_predict(y): """ Predict using the SARIMAX Model (One-step ahead Forecasting) """ n_y = y.shape[0] pred_outliers = np.zeros(n_y) pred_model = SARIMAX(y, order=order, seasonal_order=seasonal_order, exog=pred_outliers, trend=trend) y_pred = pred_model.filter( fitted_params).get_prediction().predicted_mean return y_pred
def predict_with_trained_SARIMA_model(self, df, game_id, dep_var, indep_var, X_out_of_sample, target, type, model_type): # might be mistakes here! save_path = util.build_model_save_path(game_id, target, type, model_type) if not util.check_for_model_existence(save_path): print("Cannot predict for {}. No model has been trained yet.".format(dep_var)) return saved_model = SARIMAXResults.load(save_path) Y = df[dep_var] X = df[indep_var] X_out_of_sample.index = range(max(Y.index) + 1, max(Y.index) + 24 + 1) latest_index = max(Y.index) latest_timeslot = df.iloc[latest_index]['timeslot'] if self.seasonal_order is None: model = SARIMAX(endog=Y, exog=X, order=self.order) else: model = SARIMAX(endog=Y, exog=X, order=self.order, seasonal_order=self.seasonal_order) fitted_model = model.filter(params=saved_model.params) prediction = fitted_model.predict(exog=X_out_of_sample, start=latest_index + 1, end=latest_index + 1 + self.forecast_length - 1) df_prediction = pd.DataFrame( {'target_timeslot': range(latest_timeslot + 1, latest_timeslot + 1 + self.forecast_length), 'prediction': prediction}) # set lower and upper bound for prediction tolerance = 0.2 prediction_upper_bound = max(Y) * (1 + tolerance) if max(Y) > 0 else max(Y) * (1 - tolerance) prediction_lower_bound = min(Y) * (1 - tolerance) if min(Y) > 0 else min(Y) * (1 + tolerance) # restrict prediction in lower and upper bound df_prediction['prediction'] = df_prediction['prediction'].apply( lambda x: max([min([x, prediction_upper_bound]), prediction_lower_bound])) df_prediction['prediction_timeslot'] = latest_timeslot df_prediction['proximity'] = df_prediction['target_timeslot'] - df_prediction['prediction_timeslot'] df_prediction['game_id'] = game_id df_prediction['target'] = target df_prediction['type'] = type return df_prediction
def fit_arima(training_set, validation_set): training_set = clean_data(training_set, []) validation_set = clean_data(validation_set, []) training_set.reset_index(drop=True, inplace=True) validation_set.reset_index(drop=True, inplace=True) target_variable = "Wind average [m/s]" history = [x for x in training_set[target_variable]] #test_set = test_set[target_variable] validation_set = validation_set[target_variable] model = SARIMAX(history, order=(9, 1, 1)) model_fit = model.fit(disp=0) real_model = SARIMAX(validation_set, order=(9, 1, 1)) res = real_model.filter(model_fit.params) config = get_config(None, None) predictions = [] observations = [] for n in range((len(validation_set) - config["forecast_steps"]) // config["skip_steps"]): output = res.get_prediction(start=n * config["skip_steps"], dynamic=0, end=n * config["skip_steps"] + config["forecast_steps"] - 1).predicted_mean.to_numpy() for t in range(config["forecast_steps"]): yhat = output[t] predictions.append(yhat) obs = validation_set[n * config["skip_steps"] + t] observations.append(obs) # print('predicted=%f, expected=%f' % (yhat, obs)) error = mean_squared_error(observations, predictions) print('Test MSE: %.3f' % error) mae = mean_absolute_error(observations, predictions) print('Test MAE: %.3f' % mae) order = abs((mae / validation_set.mean()) * 100) print("Error of order : %d%%" % order) idx = randrange(len(observations)) #plot_predicted_vs_truth(predictions[idx:idx + 72], observations[idx:idx + 72], validation_set.min(), #validation_set.max()) return predictions[17434:17434 + 72]
def test_integrated_process(ar_params, diff, ma_params, sigma2): # Test loglikelihood computation when model has integration nobs = 100 endog = np.cumsum(np.random.normal(size=nobs)) # Innovations algorithm approach llf_obs = arma_innovations.arma_loglikeobs( np.diff(endog, diff), ar_params, ma_params, sigma2) # Kalman filter apparoach mod = SARIMAX(endog, order=(len(ar_params), diff, len(ma_params)), simple_differencing=True) res = mod.filter(np.r_[ar_params, ma_params, sigma2]) # Test that the two approaches are identical assert_allclose(llf_obs, res.llf_obs)
def predictS(train_signal, test_signal, model_p, model_q, file=None, print_output=False): history = list(train_signal) predictions = [] model_1 = SARIMAX(history, order=(model_p, 0, model_q), enforce_stationarity=False, enforce_invertibility=False) res = model_1.fit() i = 0 for t in test_signal: # model_2 = SARIMAX(history, order=(model_p, 0, model_q)) model_2 = SARIMAX(test_signal[:i], order=(model_p, 0, model_q), enforce_stationarity=False, enforce_invertibility=False) res2 = model_2.filter(res.params) pred = res2.forecast(1) if print_output: print( f"Actual value: {t}, predicted: {pred}, abs: {np.abs(t-pred)}") predictions.append(pred) history.append(t) i = i + 1 predictions = np.array([x for sublist in predictions for x in sublist]) # Evaluate the predictions resid = test_signal - predictions MFE = np.mean(resid) MAE = np.mean(np.abs(resid)) MAPE = np.round(np.mean(np.abs(resid / (test_signal + 1e-16))), 5) if file != None: np.save(file, predictions) return predictions, MFE, MAE, MAPE
def test_innovations_algo_filter_kalman_filter(ar_params, ma_params, sigma2): # Test the innovations algorithm and filter against the Kalman filter # for exact likelihood evaluation of an ARMA process ar = np.r_[1, -ar_params] ma = np.r_[1, ma_params] endog = np.random.normal(size=10) nobs = len(endog) # Innovations algorithm approach arma_process_acovf = arma_acovf(ar, ma, nobs=nobs, sigma2=sigma2) transformed_acov = _arma_innovations.darma_transformed_acovf_fast( ar, ma, arma_process_acovf / sigma2) acovf, acovf2 = (np.array(mv) for mv in transformed_acov) theta, r = _arma_innovations.darma_innovations_algo_fast( nobs, ar_params, ma_params, acovf, acovf2) u = _arma_innovations.darma_innovations_filter(endog, ar_params, ma_params, theta) v = np.array(r) * sigma2 u = np.array(u) llf_obs = -0.5 * u**2 / v - 0.5 * np.log(2 * np.pi * v) # Kalman filter apparoach mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params))) res = mod.filter(np.r_[ar_params, ma_params, sigma2]) # Test that the two approaches are identical assert_allclose(u, res.forecasts_error[0]) # assert_allclose(theta[1:, 0], res.filter_results.kalman_gain[0, 0, :-1]) assert_allclose(llf_obs, res.llf_obs) # Get llf_obs directly llf_obs2 = _arma_innovations.darma_loglikeobs_fast( endog, ar_params, ma_params, sigma2) assert_allclose(llf_obs2, res.llf_obs)
def predict_sequence(save_path, eval_data, order, input_start, input_size, output_size, x_coord, y_coord): # start = time.time() trained_model = sm.load(f"{save_path}/{x_coord}_{y_coord}.pickle") # post_load = time.time() model = SARIMAX(eval_data[input_start:input_start+input_size, x_coord, y_coord], order=order) # post_create = time.time() model_fit = model.filter(trained_model.params) # post_filter = time.time() prediction_wrapper = model_fit.get_prediction(start=0, end= input_size + output_size - 1, dynamic=input_size) post_predict = time.time() # print(f"loading time: {post_load - start}") # print(f"create time: {post_create - post_load}") # print(f"filter time: {post_filter - post_create}") # print(f"predict time: {post_predict - post_filter}") # print(f"full time: {post_predict - start}") return prediction_wrapper.predicted_mean[-output_size:]
def test_innovations_algo_filter_kalman_filter(ar_params, ma_params, sigma2): # Test the innovations algorithm and filter against the Kalman filter # for exact likelihood evaluation of an ARMA process ar = np.r_[1, -ar_params] ma = np.r_[1, ma_params] endog = np.random.normal(size=10) nobs = len(endog) # Innovations algorithm approach arma_process_acovf = arma_acovf(ar, ma, nobs=nobs, sigma2=sigma2) acovf, acovf2 = np.array(_arma_innovations.darma_transformed_acovf_fast( ar, ma, arma_process_acovf / sigma2)) theta, r = _arma_innovations.darma_innovations_algo_fast( nobs, ar_params, ma_params, acovf, acovf2) u = _arma_innovations.darma_innovations_filter(endog, ar_params, ma_params, theta) v = np.array(r) * sigma2 u = np.array(u) llf_obs = -0.5 * u**2 / v - 0.5 * np.log(2 * np.pi * v) # Kalman filter apparoach mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params))) res = mod.filter(np.r_[ar_params, ma_params, sigma2]) # Test that the two approaches are identical assert_allclose(u, res.forecasts_error[0]) # assert_allclose(theta[1:, 0], res.filter_results.kalman_gain[0, 0, :-1]) assert_allclose(llf_obs, res.llf_obs) # Get llf_obs directly llf_obs2 = _arma_innovations.darma_loglikeobs_fast( endog, ar_params, ma_params, sigma2) assert_allclose(llf_obs2, res.llf_obs)
def test_regression_with_arma_errors(ar_params, ma_params, sigma2): # Test loglikelihood computation when model has regressors nobs = 100 eps = np.random.normal(nobs) exog = np.c_[np.ones(nobs), np.random.uniform(size=nobs)] beta = [5, -0.2] endog = np.dot(exog, beta) + eps # Innovations algorithm approach beta_hat = np.squeeze(np.linalg.pinv(exog).dot(endog)) demeaned = endog - np.dot(exog, beta_hat) llf_obs = arma_innovations.arma_loglikeobs( demeaned, ar_params, ma_params, sigma2) # Kalman filter approach # (this works since we impose here that the regression coefficients are # beta_hat - in practice, the MLE estimates will not necessarily match # the OLS estimates beta_hat) mod = SARIMAX(endog, exog=exog, order=(len(ar_params), 0, len(ma_params))) res = mod.filter(np.r_[beta_hat, ar_params, ma_params, sigma2]) # Test that the two approaches are identical assert_allclose(llf_obs, res.llf_obs)
def test(self, test=None): if test is None: sunny_test = self.df.loc['2015-08-15']['total_power'] cloudy_test = self.df.loc['2015-10-18']['total_power'] test = pd.concat([sunny_test, cloudy_test], axis=0) start = test.index[0] ts = pd.date_range(start.date(), periods=len(test), freq='5min') test.index = ts elif isinstance(test, str): test = self.df.loc[test]['total_power'] elif isinstance(test, tuple): test = self.df.loc[test[0]:test[1]]['total_power'] forecasts = [] N = len(test) / 12 for i in xrange(N - 1): next_batch = test.iloc[0:12 * (i + 1)] mod2 = SARIMAX(next_batch, order=self.order) test2 = mod2.filter(self.model_fit.params) forecast = test2.forecast(36) forecasts.append(forecast) self.forecasts = forecasts #self.forecasts.sort_index(inplace=True) self.test_set = test return
def model_predict(y): """ Predict using the SARIMAX Model (Dynamic Forecasting) """ n_y = y.shape[0] y_pred = np.zeros(n_y) pred_start = s pred_end = n_y - 1 pred_outliers = np.zeros(n_y) pred_model = SARIMAX(y, order=order, seasonal_order=seasonal_order, exog=pred_outliers, trend=trend) y_pred[pred_start:pred_end + 1] = pred_model.filter(fitted_params).get_prediction( start=pred_start, end=pred_end, exog=pred_outliers, dynamic=True).predicted_mean return y_pred
# evaluate parameters p_values = [0, 1, 2] d_values = [0, 1] q_values = range(0, 2) warnings.filterwarnings("ignore") evaluate_models(tsv_log, p_values, d_values, q_values) # In[76]: from statsmodels.tsa.statespace.sarimax import SARIMAX mod1 = SARIMAX(tsv_log, order=(2, 1, 1)) res1 = mod1.fit() mod2 = SARIMAX(tsv_log, order=(2, 1, 1)) res2 = mod2.filter(res1.params) pred_v = res2.forecast(91) # In[77]: pred_v = np.exp(pred_v) # In[80]: pred_v = np.round_(pred_v) # In[38]: p_values = [0, 1, 2] d_values = [0, 1] q_values = range(0, 2)