def test_innovations_algo_filter_kalman_filter(reset_randomstate): # Test the innovations algorithm and filter against the Kalman filter # for exact likelihood evaluation of an ARMA process ar_params = np.array([0.5]) ma_params = np.array([0.2]) # TODO could generalize to sigma2 != 1, if desired, after #5324 is merged # and there is a sigma2 argument to arma_acovf # (but maybe this is not really necessary for the point of this test) sigma2 = 1 endog = np.random.normal(size=10) # Innovations algorithm approach acovf = arma_acovf(np.r_[1, -ar_params], np.r_[1, ma_params], nobs=len(endog)) theta, v = innovations_algo(acovf) u = innovations_filter(endog, theta) llf_obs = -0.5 * u**2 / (sigma2 * v) - 0.5 * np.log(2 * np.pi * v) # Kalman filter apparoach mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params))) res = mod.filter(np.r_[ar_params, ma_params, sigma2]) # Test that the two approaches are identical atol = 1e-6 if PLATFORM_WIN else 0.0 assert_allclose(u, res.forecasts_error[0], atol=atol) assert_allclose(theta[1:, 0], res.filter_results.kalman_gain[0, 0, :-1], atol=atol) assert_allclose(llf_obs, res.llf_obs, atol=atol)
def test_standardized_forecasts_error(): """ Simple test that standardized forecasts errors are calculated correctly. Just uses a different calculation method on a univariate series. """ # Get the dataset true = results_kalman_filter.uc_uni data = pd.DataFrame( true['data'], index=pd.date_range('1947-01-01', '1995-07-01', freq='QS'), columns=['GDP'] ) data['lgdp'] = np.log(data['GDP']) # Fit an ARIMA(1,1,0) to log GDP mod = SARIMAX(data['lgdp'], order=(1,1,0)) res = mod.fit(disp=-1) standardized_forecasts_error = ( res.filter_results.forecasts_error[0] / np.sqrt(res.filter_results.forecasts_error_cov[0,0]) ) assert_allclose( res.filter_results.standardized_forecasts_error[0], standardized_forecasts_error, )
def test_innovations_algo_direct_filter_kalman_filter(ar_params, ma_params, sigma2): # Test the innovations algorithm and filter against the Kalman filter # for exact likelihood evaluation of an ARMA process, using the direct # function. endog = np.random.normal(size=10) # Innovations algorithm approach u, r = arma_innovations.arma_innovations(endog, ar_params, ma_params, sigma2) v = np.array(r) * sigma2 u = np.array(u) llf_obs = -0.5 * u**2 / v - 0.5 * np.log(2 * np.pi * v) # Kalman filter apparoach mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params))) res = mod.filter(np.r_[ar_params, ma_params, sigma2]) # Test that the two approaches are identical assert_allclose(u, res.forecasts_error[0]) # assert_allclose(theta[1:, 0], res.filter_results.kalman_gain[0, 0, :-1]) assert_allclose(llf_obs, res.llf_obs) # Get llf_obs directly llf_obs2 = _arma_innovations.darma_loglikeobs_fast( endog, ar_params, ma_params, sigma2) assert_allclose(llf_obs2, res.llf_obs)
def test_integrated_process(ar_params, diff, ma_params, sigma2): # Test loglikelihood computation when model has integration nobs = 100 endog = np.cumsum(np.random.normal(size=nobs)) # Innovations algorithm approach llf_obs = arma_innovations.arma_loglikeobs( np.diff(endog, diff), ar_params, ma_params, sigma2) # Kalman filter apparoach mod = SARIMAX(endog, order=(len(ar_params), diff, len(ma_params)), simple_differencing=True) res = mod.filter(np.r_[ar_params, ma_params, sigma2]) # Test that the two approaches are identical assert_allclose(llf_obs, res.llf_obs)
def sarimax_eval(train_df, test_df, train_column, test_column, start, end, p, d, q, S, P=0, D=0, Q=0): sarima = SARIMAX( endog=train_column, order=(p, d, q), # (p, d, q) seasonal_order=(P, D, Q, S), enforce_stationarity=False, enforce_invertibility=False) # (P, D, Q, S)) # Fit SARIMA model. model = sarima.fit() # Generate predictions based on test set. preds = model.predict(start=start, end=end) # Evaluate predictions. mae = mean_absolute_error(test_column, preds) aic = model.aic #creating a parameter dictionary to be used for evaluating model parameters = { 'mae': mae, 'AIC': aic, 'p': p, 'd': d, 'q': q, 'P': P, 'D': D, 'Q': Q, 'S': S } return parameters
def test_small_sample_serial_correlation_test(): # Test the Ljung Box serial correlation test for small samples with df # adjustment using the Nile dataset. Ljung-Box statistic and p-value # are compared to R's Arima() and checkresiduals() functions in forecast # package: # library(forecast) # fit <- Arima(y, order=c(1,0,1), include.constant=FALSE) # checkresiduals(fit, lag=10) from statsmodels.tsa.statespace.sarimax import SARIMAX niledata = nile.data.load_pandas().data niledata.index = pd.date_range('1871-01-01', '1970-01-01', freq='AS') mod = SARIMAX( endog=niledata['volume'], order=(1, 0, 1), trend='n', freq=niledata.index.freq) res = mod.fit() actual = res.test_serial_correlation( method='ljungbox', lags=10, df_adjust=True)[0, :, -1] assert_allclose(actual, [14.116, 0.0788], atol=1e-3)
def sarimax_model_fit(self, x_train, y_train, df_time):#, y_test, x_test, df_test): # x_test.index = df_test # y_test.index = df_test x_train.index = df_time y_train.index = df_time model = SARIMAX(y_train, exog=x_train, order=(0, 1, 0), seasonal_order=(0, 0, 0, 0)) model_fit = model.fit(disp=-1) print(model_fit.summary()) # fc = model_fit.forecast(y_test.shape[0], exog = x_test) # fc.index = x_test.index # plt.plot(y_test, label='actual') # plt.plot(fc, label='forecast') # plt.legend(loc='upper left', fontsize=8) # st.pyplot() return model_fit
def test_integrated_process(ar_params, diff, ma_params, sigma2): # Test loglikelihood computation when model has integration nobs = 100 endog = np.cumsum(np.random.normal(size=nobs)) # Innovations algorithm approach llf_obs = arma_innovations.arma_loglikeobs(np.diff(endog, diff), ar_params, ma_params, sigma2) # Kalman filter apparoach mod = SARIMAX(endog, order=(len(ar_params), diff, len(ma_params)), simple_differencing=True) res = mod.filter(np.r_[ar_params, ma_params, sigma2]) # Test that the two approaches are identical assert_allclose(llf_obs, res.llf_obs)
def order(self): from statsmodels.tsa.statespace.sarimax import SARIMAX from itertools import product p = d = q = range(0, 2) pdq = list(product(p, d, q)) seasonal_pdq = [(x[0], x[1], x[2], 30) for x in list(product(p, d, q))] for param in pdq: for param_seasonal in seasonal_pdq: try: mod = SARIMAX(self.original.Mean, order=param, seasonal_order=param_seasonal, enforce_stationarity=False, enforce_invertibility=False) results = mod.fit() print('ARIMA{}x{} - AIC:{}'.format(param, param_seasonal, results.aic)) except: continue
def seasonalAutoregressiveIntegratedMovingAverage2(day): col_daily = db['daily'] dailyGrossSet = [] for record in col_daily.find({"Year": 2018}): year = record['Year'] movieNumber = record['MoviesTracked'] gross = record['Gross($)'].replace(",", "") dailyGrossSet.append(int(gross) / int(movieNumber)) print(dailyGrossSet[day]) dailyGrossSet = dailyGrossSet[0:day] print(dailyGrossSet) # fit model model = SARIMAX(dailyGrossSet, order=(1, 1, 1), seasonal_order=(1, 1, 1, 1)) model_fit = model.fit(disp=False) # make prediction yhat = model_fit.predict(len(dailyGrossSet), len(dailyGrossSet)) print(yhat)
def sarimax_forecast(train_data, sent_data, valid_sent_data, config): ''' Returns a sarimax prediction, same as sarima with the addition of the exogenous reddit data ''' order, sorder, trend = config # Pull out configuraton terms # fit model model = SARIMAX(endog=train_data, exog=sent_data, order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False) # make one step prediction prediction = model.fit(disp=0).forecast(exog=valid_sent_data)[0] return prediction
def test_02(self): data = pd.read_csv("nyoka/tests/JohnsonJohnsonWithDate.csv") data['index'] = pd.to_datetime(data['index'], format='%Y-%m-%d') data.set_index(['index'], inplace=True) mod = SARIMAX(data, order=(1, 0, 0), seasonal_order=(1, 0, 0, 4)) result = mod.fit(disp=False) ArimaToPMML(result, 'jnj_seasonal_arima.pmml') model_name = self.adapaUtilities.upload_to_zserver( 'jnj_seasonal_arima.pmml') z_pred = self.adapaUtilities.score_single_record(model_name) model_pred = result.forecast()[0] self.assertEqual(model_pred, z_pred['predicted_value']) z_pred = self.adapa_utility.score_in_zserver( model_name, 'nyoka/tests/test_jnj.csv', 'TS') model_pred = result.forecast(5)[-1] self.assertEqual(model_pred, z_pred)
def sarimax_forecast(train_data, sent_data, valid_sent_data, config): ''' Returns a sarimax prediction ''' order, sorder, trend = tuple(config[0]) # Pull out configuraton terms # fit model model = SARIMAX( endog=train_data, # exog=sent_data, order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False) # make one step prediction prediction = model.fit(disp=0).forecast()[0] return prediction
def SARIMA_Forecast(data, config): #order: A tuple p, d, and q parameters for the modeling of the trend. # sesonal_order: A tuple of P, D, Q, and m parameters for the modeling the seasonality # trend: A parameter for controlling a model of the deterministic trend as one of ‘n’,’c’,’t’,’ct’ for no trend, constant, linear, and constant with linear trend, respectively. order, sorder, trend = config # define model model = SARIMAX(data, order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False) # fit model model_fit = model.fit(disp=False) # make one step forecast forecast = model_fit.get_forecast() return forecast.predicted_mean, forecast.se_mean
class Sarimax: def __init__(self, df, cfg): self.series = df[cfg['target_feature']] self.model = SARIMAX(self.series, order=(3, 1, 0), seasonal_order=(0, 0, 0, 12)) def fit_model(self): # Fit model self.model = self.model.fit(disp=0) print(self.model.summary()) def plot_autocorrelation(self): # Plot auto correlation autocorrelation_plot(self.series) plt.show() def predict_arima(self, series): return self.model.predict(series)
def test_seasonal_arima1(self): ts_data = self.statsmodels_data_helper.getData5() f_name = 'seasonal_arima1.pmml' model = SARIMAX(endog=ts_data, exog=None, order=(3, 1, 1), seasonal_order=(3, 1, 1, 12), trend='t', measurement_error=True, time_varying_regression=True, mle_regression=False, simple_differencing=True, enforce_stationarity=False, enforce_invertibility=False, hamilton_representation=True, concentrate_scale=False) result = model.fit() ArimaToPMML(result, f_name) self.assertEqual(self.schema.is_valid(f_name), True)
def grid_search_sarima_param(data, S = shift_2, print_params = False): """ Grid search for SARIMA optimal pdq and seasonal PDQ parameters """ S = S p = d = q = range(0,2) pdq = list(itertools.product(p,d,q)) seasonal_PDQ = [(x[0], x[1], x[2], S) for x in pdq] warnings.filterwarnings("ignore") # specify to ignore warning messages min_rmse = 10000 for param in pdq: for param_seasonal in seasonal_PDQ: try: model = SARIMAX(data, order=param, seasonal_order=param_seasonal, enforce_stationarity=False, enforce_invertibility=False) results = model.fit() k = len(test_data) forecast = results.forecast(k) forecast = np.exp(forecast) rmse = np.sqrt(sum((forecast-test_data['Airpass'])**2)/len(test_data)) if rmse < min_rmse : min_rmse = round(rmse,2) optimal_aic = round(results.aic,2) optimal_pdq = param optimal_seasonal_pdq = param_seasonal except: continue if print_params: print('SARIMA{}x{} - AIC:{} - RMSE:{}'.format(optimal_pdq, optimal_seasonal_pdq, optimal_aic, min_rmse)) return optimal_pdq, optimal_seasonal_pdq, optimal_aic, min_rmse
def train_test_predict(): for i in range(len(indices)): print( f'-------------------Now analyzing {indices[i]} -------------------' ) # call AA to identify optional params and return fitted model stepwise_fit = AA(transformed_system_forecasts[indices[i]], start_p=1, start_q=1, max_p=3, max_q=3, m=12, start_P=0, seasonal=True, d=None, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) # splitting training and testing datasets (1 full year for testing) # train = transformed_system_forecasts[indices[i]].iloc[:len(transformed_system_forecasts)-12] # test = transformed_system_forecasts[indices[i]].iloc[len(transformed_system_forecasts)-12:,i] train = transformed_system_forecasts.iloc[[0, 12], [ i, i ]] # selecting all but first 12 elements from ith column (time reversed) #test = transformed_system_forecasts.iloc[len(12:0:,i] #print(transformed_system_forecasts[indices[i]]) print(train) #print(train[::-1]) #print(test[::-1]) #print(train.iloc[:,0].values) #try: model = SARIMAX( train, order=stepwise_fit.get_params()['order'], seasonal_order=stepwise_fit.get_params()['seasonal_order']) result = model.fit() result.summary()
def get(self, request, *args, **kwargs): n_steps = int(self.request.query_params.get('nsteps', 10)) last_date = UnivarientData.objects.latest( 'date').date + datetime.timedelta(days=30) data = read_frame(UnivarientData.objects.all()) data['date'] = pd.to_datetime(data['date']) data = data.drop('id', axis=1) data = data.set_index('date') arima = SARIMAX(data, order=(1, 0, 2), freq='M', seasonal_order=(1, 2, 1, 6), enforce_stationarity=False, enforce_invertibility=False, ).fit() date_index = pd.date_range(start=last_date, periods=n_steps, freq='M') data = pd.DataFrame() data['prediction'] = arima.predict(date_index.min(), date_index.max()) data['date'] = date_index data['date'] = data['date'] predicted_data = data[['date', 'prediction']].values.tolist() return Response({'predicted_data': predicted_data})
def predict_product(self, product_id): """ Receives a product id and predicts """ product_ts = self.__get_product_ts(product_id) model = SARIMAX(product_ts, order=(0,1,2), time_varying_regression=True, mle_regression=False, trend='n', seasonal_order=(1,1,1,11)).fit() steps = PREDICTION_TIME * 4 forecast = model.get_forecast(steps=steps, dynamic=True) history = product_ts[(product_ts.index > "2015") & (product_ts.index < "2016")] history = history.fillna(0) # Output predicted_mean = forecast.predicted_mean conf_int = forecast.conf_int() return np.exp(history), np.exp(predicted_mean), np.exp(conf_int)
def __train_model(self, series, order, seasonal_order, exogenous=None, max_iterations=50): """ Trains the ARIMA family of model and returns the best model and fit :param series: time series to train model on :param order: (p, d, q) :param seasonal_order: (P, D, Q, m) :param exogenous: exogenous variables array :return: model, fit, score """ model, fit, score = None, None, None trend = "n" if self.__drift == 0 else "c" try: if not self.__seasonal: model = SARIMAX(series, exog=exogenous, order=order, trend=trend, enforce_stationarity=False, enforce_invertibility=False) else: model = SARIMAX(series, exog=exogenous, order=order, seasonal_order=seasonal_order, trend=trend, enforce_stationarity=False, enforce_invertibility=False) fit = model.fit(maxiter=max_iterations, disp=0) score = fit.aic print( "Order : " + str(order), ", Seasonal Order : " + str(seasonal_order) + ", AIC Score : " + str(score)) except (ValueError, LinAlgError) as error: model, fit, score = None, None, None print(error) return model, fit, score
def arima_best(fh, train, val, p_range, d_range, q_range, loss_metric="MSE"): ''' fh : int. Forecast horizon. While validation set can be longer than the forecast horizon, only the fh portion of the validation set will be used to calculate score/loss, instead of forecasting the entire length of the validation set. This is to keep consistent with the actual use purpose of the model which will be to predict only the selected forecast horizon. p_range: tuple of 2 d_range: tuple of 2 q_range: tuple of 2 ''' # Hyperparameters tunning #print("Tuning p, d, q:") #print("-"*50) # true values to be scored again true = val[:fh] min_loss = float("inf") best_model = None best_p = best_d = best_q = None for p in range(*p_range): for d in range(*d_range): for q in range(*q_range): model = SARIMAX(train, order=(p, d, q), seasonal_order=(4, 1, 2, 8), enforce_stationarity=False, enforce_invertibility=False, trend=None).fit(maxiter=100, method="powell") # make prediction predictions = model.forecast(fh) loss = loss_func(loss_metric, tensor=False)(true, predictions) if loss < min_loss: min_loss = loss best_model = model best_p = p best_d = d best_q = q #print(f"{p}, {d}, {q}: Validation {loss_metric} ", round(min_loss, 4), end="\r") #print("-"*50) #return (best_p, best_d, best_q) return best_model, (best_p, best_d, best_q)
def build_model(series, p, d, q, S, exog_data, P=None, D=None, Q=None): """ Function to build SARIMAX model inputs: series = name of the series in the dataframe; should be specified in the following df['series_name'], series = 'series_name' p,d,q for arima modeling S: seasonal lag P,D,Q for seasonal modeling p,P: autoregressive components d,D: differencing components q,Q: moving average of error term components exog_data = matrix of exogenous variables default mode sets seasonal P, D, Q = p,d,Q Output; SARIMAX model results """ if P is None: P = p if D is None: D = d if Q is None: Q = q model = SARIMAX(series, order=(p, d, q), seasonal_order=(P, D, Q, S), exog=exog_data, enforce_invertibility=True) results = model.fit() return results
def _getNextObs(self): """ :return: ???? """ # print('> In _getNextObs ') # Isolate important features features = self.stacionaryDf[self.stacionaryDf.columns.difference(['index', 'Date'])] # selected what we're gonna use scaled = features[:self.iterator + 1].values # remove infinites scaled[abs(scaled) == inf] = 0 # Normalize scaled = self.scaler.fit_transform(scaled.astype('float32')) # to Df scaled = pd.DataFrame(scaled, columns=features.columns) # Predict next values pastDf = self.stacionaryDf['Close'][:self.iterator + 1] forecast_model = SARIMAX(pastDf.values, enforce_stationarity=False, simple_differencing=True) model_fit = forecast_model.fit(method='bfgs', disp=False) forecast = model_fit.get_forecast( steps=self.forecastLength, alpha=(1 - self.confidenceInterval)) # ??? ??? ??? obs = scaled.values[-1] # len 44 obs = np.insert(obs, len(obs), forecast.predicted_mean, axis=0) # Appends 10 obs = np.insert(obs, len(obs), forecast.conf_int().flatten(), axis=0) # Appends 20 scaled_history = self.scaler.fit_transform( self.accountHistory.astype('float32')) obs = np.insert(obs, len(obs), scaled_history[:, -1], axis=0) obs = np.reshape(obs.astype('float16'), self.obsShape) obs[np.bitwise_not(np.isfinite(obs))] = 0 # print('> Finished getNextObs ') return obs
def _update(self, y, X=None): """ Internal update of forecasts using new data via Kalman smoothing/filtering of forecasts obtained from previously fitted forecaster. Parameters ---------- y : pandas.Series Updated time series which to use for updating the previously fitted forecaster. X : pandas.DataFrame, shape=[n_obs, n_vars], optional (default=None) An optional 2-d dataframe of exogenous variables. If provided, these variables are used as additional features in the regression operation. This should not include a constant or trend. Note that if an ``ARIMA`` is fit on exogenous features, it must also be provided exogenous features for making predictions. Returns ------- self : An instance of self """ # TODO for updating see https://github.com/statsmodels/statsmodels/issues/2788 and # https://github.com/statsmodels/statsmodels/issues/3318 # unnest series # unnest series y = self._prepare_y(y) X = self._prepare_X(X) # Update estimator. estimator = SARIMAX(y, exog=X, order=self.order, seasonal_order=self.seasonal_order, trend=self.trend, enforce_stationarity=self.enforce_stationarity, enforce_invertibility=self.enforce_invertibility) estimator.initialize_known(self._fitted_estimator.predicted_state[:, -1], self._fitted_estimator.predicted_state_cov[:, :, -1]) # Filter given fitted parameters. self._updated_estimator = estimator.smooth(self._fitted_estimator.params) return self
def Auto_Arima(df,dirloc,filename): import itertools from statsmodels.tsa.statespace.sarimax import SARIMAX p=d=q=range(0,3) pdq = list(itertools.product(p,d,q)) seas_decomp=[] for x in pdq: x1=(x[0],x[1],x[2],12) seas_decomp.append(x1) print("Computating AIC of Different Sesonal ARIMA.....\n") arima_order=[] seas_order=[] aic_val=[] for params in pdq: for seas_par in seas_decomp: mod = SARIMAX(df,order=params,seasonal_order=seas_par,enforce_stationarity=False, enforce_invertibility=False,freq="MS").fit() arima_order.append(params) seas_order.append(seas_par) aic_val.append(round(mod.aic,2)) print("SARIMA: {} X {} | AIC = {}".format(params,seas_par,round(mod.aic,2))) results = pd.DataFrame({"ARIMA Order":arima_order,"Seasonal Order":seas_order,"AIC Value":aic_val}) results_sorted = results.sort_values(by="AIC Value",ascending=True) results_sorted=results_sorted.reset_index(drop=True) print("Selected SARIMA Order:",results_sorted.head(2)) final_model = SARIMAX(df,order=results_sorted["ARIMA Order"][0],seasona_order=results_sorted["Seasonal Order"][0],enforce_stationarity=False, enforce_invertibility=False,freq="MS").fit() print("Final Model Result Summary {}".format(final_model.summary())) print(results_sorted["ARIMA Order"][0]) print(results_sorted["Seasonal Order"][0]) predictions = final_model.predict(start=dt.datetime.strptime("2020-06-01","%Y-%m-%d"),end=dt.datetime.strptime("2020-12-01","%Y-%m-%d")) print("Average Monthly WTI Crude Oil Spot Price from June to Dec 2020:") print(predictions) with open(os.path.join(dirloc[:-5],outputfile),"a") as f: f.write("Simulation Result of SARIMA....\n") f.write(str(results_sorted)) f.write("\n") f.write(str(predictions)) f.close() return results_sorted
def graph_full_model_forecast(dataframe, target_column, exog_forecast, df_ref, alpha=.05, days_to_forecast=30, train_days=270, m_periods=1, exogenous_column=None, state_postal_code=None): ''' summary function whose purpose is to graph a target_column's forecast ''' if exogenous_column is not None: stepwise_fit, df_forecast = get_exogenous_forecast_dataframe( dataframe=dataframe, original_dataframe=df_ref, exog_forecast=exog_forecast, target_column=target_column, exogenous_column=exogenous_column, days_to_forecast=days_to_forecast, m_periods=m_periods) full_exog_model = SARIMAX(dataframe[target_column], dataframe[exogenous_column], order=stepwise_fit.order, seasonal_order=stepwise_fit.seasonal_order) model = full_exog_model.fit() exog_forecast, forecast_object = build_SARIMAX_forecast( model=model, dataframe=df_forecast, target_column=target_column, stepwise_fit=stepwise_fit, alpha=alpha, days_to_forecast=days_to_forecast, original_df=df_ref, exogenous_column=exogenous_column, state_postal_code=state_postal_code) return forecast_object
def arima_evaluate(model, test, fh=8, refit=pd.Series(), metric=MAPE): ''' model : SARIMAX model. test : pd Time series. Test data set. fh : int. Forecast horizon. refit : pd Time series. New time series data to refit the model on. ''' if not refit.empty: params = model.params # store previous parameters p_d_q = (model.model.k_ar_params, model.model.k_diff, model.model.k_ma_params) model = SARIMAX(refit, order=p_d_q, enforce_stationarity=False, enforce_invertibility=False, trend=None).fit(params, maxiter=1000) pred = model.forecast(steps=fh) # Forcast value true = test[:fh] # true values loss = metric(pred.array, true.array) return pred, true, loss
def sarimax_predictor(train_user: list, train_match: list, test_match: list) -> float: """ second method: Sarimax sarimax is a statistic method which using previous input and learn its pattern to predict future data input : training data (total_user, with exog data = total_event) in list of float output : list of total user prediction in float >>> sarimax_predictor([4,2,6,8], [3,1,2,4], [2]) 6.6666671111109626 """ order = (1, 2, 1) seasonal_order = (1, 1, 0, 7) model = SARIMAX(train_user, exog=train_match, order=order, seasonal_order=seasonal_order) model_fit = model.fit(disp=False, maxiter=600, method="nm") result = model_fit.predict(1, len(test_match), exog=[test_match]) return result[0]
def param_heatmap(ts, limit_p, limit_q, itr, s=0): aics = np.zeros((limit_p, limit_q)) aiccs = np.zeros((limit_p, limit_q)) bics = np.zeros((limit_p, limit_q)) for i in range(limit_p): for j in range(limit_q): if s == 0: model = SARIMAX(ts, order=(i, itr, j), initialization="approximate_diffuse") else: model = SARIMAX(ts, seasonal_order=(i, itr, j, s), initialization="approximate_diffuse") model_fit = model.fit(disp=0) aics[i, j] = model_fit.aic aiccs[i, j] = model_fit.aicc bics[i, j] = model_fit.bic heatmaps = {'aic': aics, 'aicc': aiccs, 'bic': bics} return heatmaps
def sarima_forecast(history, config): """ This function forecast one step using SARIMAX model. From the statsmodels page: - order -> represented by the parametrs p, d, q for the model of the trend - seasonal_order -> represented by the parameters (P, D, Q) - trend -> to control the model deterministic trend (no trend 'n', 'c' constant, 't' linear, 'ct' constant with linear trend) """ order, sorder, trend = config # define model model = SARIMAX(history, order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False) # fit model model_fit = model.fit(disp=False) # make one-step forecast yhat = model_fit.predict(len(history), len(history)) return yhat[0]
class SARIMAModel(SMModel): type = [ModelType.CONTINUOUS_PRICE, ModelType.UNIVARIATE] name = 'statsmodels.arima' default_params = {'order': (1, 1, 1)} @with_params def fit(self, x, **kwargs): params = kwargs.get('params') try: self.model = SARIMAX(x, order=params['order']) \ .fit(disp=params.get('disp',0)) return self.model except (ValueError, np.linalg.linalg.LinAlgError): logger.error('ARIMA convergence error (order {} {} {})'.format( params['order'][0], params['order'][1], params['order'][2])) return None def predict(self, x, **kwargs): if not self.model: return None try: forecast = self.model.forecast(steps=x.shape[0]) return to_discrete_double(forecast, -0.01, 0.01) except (ValueError, np.linalg.linalg.LinAlgError): logger.error('ARIMA convergence error (order {} {} {})'.format( self.params['order'][0], self.params['order'][1], self.params['order'][2])) @with_x def get_grid_search_configs(self, **kwargs): x_train = kwargs.get('x_train') x_test = kwargs.get('x_test') p_values = range(0, 6) d_values = range(0, 6) q_values = range(0, 6) # If series is stationary, don't apply differentiation adf = adfuller(x_train) # 0 is score, 1 is pvalue if adf[1] < 0.05: # Null hp rejected, series is stationary and requires no differentiation logger.info('Series is stationary, no need for differencing') d_values = [0] # Set d = 0 # Get all possible configs configs = [] for p in p_values: for d in d_values: for q in q_values: configs.append({ 'params': { 'order': (p, d, q) }, 'x_train': x_train, 'x_test': x_test }) return configs
def train(self, features=None): """ Train the model on a chosen set of features. If none are chosen, the default is to re run the model with the current best_features attribute. Note that the training is carried out on the training data, X_train, only. To access the result, use: Args: features : list - train model with list of desired features Returns: """ if not isinstance(self.get_data('X_train'), pd.DataFrame): raise TypeError("ERROR: The input training data was not in the form of a pd.DataFrame.") print(' ') print("Training - ARIMAX") print("=================") print(" ") print("Running ARIMAX model on feauture set:") print(" ") if features == None: features = self.get_best_features() pprint.pprint(features) print(" ") self._current_features = features X_train_data = self.get_data('X_train') X_val_data = self.get_data('X_val') X_test_data = self.get_data('X_test') Y_train_data = self.get_data('Y_train') Y_val_data = self.get_data('Y_val') Y_test_data = self.get_data('Y_test') X_train_data_temp = X_train_data[features] X_val_data_temp = X_val_data[features] X_test_data_temp = X_test_data[features] model = SARIMAX(endog=pd.concat([Y_train_data, Y_val_data]), exog=pd.concat([X_train_data_temp, X_val_data_temp]), order=(self.p,self.d,self.q)) model_fit = model.fit(disp=0) self._model = model_fit Y_test_pred = model_fit.forecast(len(Y_test_data), exog = np.array(X_test_data_temp).reshape(len(Y_test_data), len(X_test_data_temp.columns))) final_rmse_test = _test_metric(Y_test_data, Y_test_pred, 'rmse') self._test_error = final_rmse_test print(' ') print('The RMSE on the test set was: ', final_rmse_test[0]) print('The mean percentage error is: ', final_rmse_test[1], '%.') print('\nFinished training. To access the most recent classifier, call get_model()')
def process_data2(): series = pd.read_excel('../../Data/Styrene-Net Industry Average 2010-2015.xlsx', header=0, index_col=0, parse_dates=True) series.index.freq = 'MS' data = series.copy() actuals = pd.read_excel('../../Data/Styrene-Net Industry Average 2015-2018 Actuals.xlsx', header=0, index_col=0, parse_dates=True) actuals.index.freq = 'MS' #Test ranges data = data['2010-01-01':] model = SARIMAX(np.log(data['Styrene']), order=(1,1,1), enforce_invertibility = False, exog = data[['Oil_Lag', 'Gas_Lag']]).fit() #auto_arima(data['Styrene'], seasonal=True, m=12, enforce_invertibility = False, exog = data[['Oil_Lag']]).summary() preds = [] for i in actuals.index: df = actuals.loc[i,:] df = pd.DataFrame(df).T fd = pd.DataFrame(data = [df['Oil_Lag'], df['Gas_Lag']]) fd.set_index = i+1 fd = pd.DataFrame(fd).T df = pd.concat([df, fd]) yhat_log = model.forecast(steps = 2, exog = df[['Oil_Lag', 'Gas_Lag']]) yhat_log = yhat_log[[1]] yhat = numpy.exp(yhat_log) preds.append(yhat) act = pd.Series(actuals.loc[i,:]) act = pd.DataFrame(act).T data = pd.concat([data, act], axis = 0) model = SARIMAX(np.log(data['Styrene']), order=(1,1,1), enforce_invertibility = False, exog = data[['Oil_Lag', 'Gas_Lag']]).fit() df = pd.DataFrame({'timestamp': [i.index for i in preds], 'value':[round(i[0],2) for i in preds]}) df['timestamp'] = df.timestamp.apply(lambda x: str(x).split('[')[1].split(']')[0]) df['timestamp'] = pd.to_datetime(df['timestamp']) df.to_csv('../../Data/Results.csv', index = False)
def test_innovations_algo_filter_kalman_filter(ar_params, ma_params, sigma2): # Test the innovations algorithm and filter against the Kalman filter # for exact likelihood evaluation of an ARMA process ar = np.r_[1, -ar_params] ma = np.r_[1, ma_params] endog = np.random.normal(size=10) nobs = len(endog) # Innovations algorithm approach arma_process_acovf = arma_acovf(ar, ma, nobs=nobs, sigma2=sigma2) acovf, acovf2 = np.array(_arma_innovations.darma_transformed_acovf_fast( ar, ma, arma_process_acovf / sigma2)) theta, r = _arma_innovations.darma_innovations_algo_fast( nobs, ar_params, ma_params, acovf, acovf2) u = _arma_innovations.darma_innovations_filter(endog, ar_params, ma_params, theta) v = np.array(r) * sigma2 u = np.array(u) llf_obs = -0.5 * u**2 / v - 0.5 * np.log(2 * np.pi * v) # Kalman filter apparoach mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params))) res = mod.filter(np.r_[ar_params, ma_params, sigma2]) # Test that the two approaches are identical assert_allclose(u, res.forecasts_error[0]) # assert_allclose(theta[1:, 0], res.filter_results.kalman_gain[0, 0, :-1]) assert_allclose(llf_obs, res.llf_obs) # Get llf_obs directly llf_obs2 = _arma_innovations.darma_loglikeobs_fast( endog, ar_params, ma_params, sigma2) assert_allclose(llf_obs2, res.llf_obs)
def test_innovations_algo_filter_kalman_filter(ar_params, ma_params, sigma2): # Test the innovations algorithm and filter against the Kalman filter # for exact likelihood evaluation of an ARMA process endog = np.random.normal(size=100) # Innovations algorithm approach llf = arma_innovations.arma_loglike(endog, ar_params, ma_params, sigma2) llf_obs = arma_innovations.arma_loglikeobs(endog, ar_params, ma_params, sigma2) score = arma_innovations.arma_score(endog, ar_params, ma_params, sigma2) score_obs = arma_innovations.arma_scoreobs(endog, ar_params, ma_params, sigma2) # Kalman filter apparoach mod = SARIMAX(endog, order=(len(ar_params), 0, len(ma_params))) params = np.r_[ar_params, ma_params, sigma2] # Test that the two approaches are the same assert_allclose(llf, mod.loglike(params)) assert_allclose(llf_obs, mod.loglikeobs(params)) # Note: the tolerance on the two gets worse as more nobs are added assert_allclose(score, mod.score(params), atol=1e-5) assert_allclose(score_obs, mod.score_obs(params), atol=1e-5)
def test_regression_with_arma_errors(ar_params, ma_params, sigma2): # Test loglikelihood computation when model has regressors nobs = 100 eps = np.random.normal(nobs) exog = np.c_[np.ones(nobs), np.random.uniform(size=nobs)] beta = [5, -0.2] endog = np.dot(exog, beta) + eps # Innovations algorithm approach beta_hat = np.squeeze(np.linalg.pinv(exog).dot(endog)) demeaned = endog - np.dot(exog, beta_hat) llf_obs = arma_innovations.arma_loglikeobs( demeaned, ar_params, ma_params, sigma2) # Kalman filter approach # (this works since we impose here that the regression coefficients are # beta_hat - in practice, the MLE estimates will not necessarily match # the OLS estimates beta_hat) mod = SARIMAX(endog, exog=exog, order=(len(ar_params), 0, len(ma_params))) res = mod.filter(np.r_[beta_hat, ar_params, ma_params, sigma2]) # Test that the two approaches are identical assert_allclose(llf_obs, res.llf_obs)