def arimax_by_month_fit_predict(y_train,y_test,month,param): """The fit_predict function for the ARIMAX by month eviction model. Uses exogenous variable CASANF0URN (unemployment rate in previous year) along with the standard time lagged AR and MA features. Parameters: y_train y_test month -- month being predicted, along with the two successive months param -- the AR, I and MA parameters for the ARIMA model. Output: temp_df - ARIMAX predictions of eviction totals for three successive, future months """ model = pf.ARIMAX(data=y_train,formula='Eviction_Notice~1+CASANF0URN',ar=param[0], ma=param[2]) model.fit() y_hat = model.predict(h=3,oos_data=y_test) actual = y_test.Eviction_Notice.values.tolist() predicted = y_hat.Eviction_Notice.values.tolist() temp_df = pd.DataFrame({'actual_evictions':actual,\ 'predicted_evictions':predicted,'months_ahead':[1,2,3]}) temp_df['month_year']=pd.Series([month,month+pd.offsets.MonthBegin(1),month+pd.offsets.MonthBegin(2)]).values return temp_df
def test2_predict_is_length(): """ Tests that the length of the predict IS dataframe is equal to no of steps h """ model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.t()) x = model.fit() assert(model.predict_is(h=5).shape[0] == 5)
def time_series(self, train_X, train_y, test_X, test_y, ar=1, ma=1, gridsearch=False, gridsearch_training_frac=0.7): """ Apply a time series ARIMAX approach, and use the created network to predict the outcome for both the test and training set. It returns the predictions for the training and test set. Parameters can be provided around the learning algorithm and a grid search can also be performed. """ if gridsearch: ar, ma, d = self.gridsearch_time_series(train_X, train_y, gridsearch_training_frac=gridsearch_training_frac, error='mse') train_dataset = copy.deepcopy(train_X) formula = train_y.name + '~1+' + "+".join(train_X.columns) train_dataset[train_y.name] = train_y test_dataset = copy.deepcopy(test_X) test_dataset[test_y.name] = test_y model = pf.ARIMAX(data=train_dataset, formula=formula, ar=ar, ma=ma) model.fit() model_pred = model.predict(h=len(train_y.index) - max(ar, ma), oos_data=train_dataset) values = np.empty((len(model_pred) + max(ar, ma), 1)) values[:] = np.nan values[max(ar, ma):] = model_pred.values pred_train = pd.DataFrame(values, index=train_y.index, columns=[train_y.name]) pred_train.iloc[max(ar, ma):, :] = model_pred.values pred_test = pd.DataFrame(model.predict(h=len(test_y.index), oos_data=test_dataset).values, index=test_y.index, columns=[test_y.name]) return pred_train, pred_test
def test_bbvi_elbo(): """ Tests that the ELBO increases """ model = pf.ARIMAX(formula="y ~ x1", data=data, ar=1, ma=1, family=pf.t()) x = model.fit('BBVI',iterations=200, record_elbo=True) assert(x.elbo_records[-1]>x.elbo_records[0])
def multivariate_arima(): ''' Reads the data and fits the ARIMAX model Prints the Acccuracy Score Inputs: None Outputs: None ''' data = preprocessing.main() n_train_hours = 52 * 3 train = data.iloc[:n_train_hours, :] test = data.iloc[n_train_hours:, :] model = pf.ARIMAX(data=train, formula = 'milk~1+cheese+dry+corn+Value', \ ar=9, ma=0, integ=1) x = model.fit("MLE") x.summary() # model.plot_fit(figsize=(15,5)) # model.plot_predict(h=38,past_values=20,figsize=(15,5), oos_data=test) yhat = model.predict(h=38, oos_data=test) pred_chg = yhat > 0 actual_chg = test.iloc[:-1, 0].diff() > 0 print accuracy_score(actual_chg, pred_chg)
def test2_predict_is_nans(): """ Tests that the predictions in-sample are not NaNs """ model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.t()) x = model.fit() x.summary() assert(len(model.predict_is(h=5).values[np.isnan(model.predict_is(h=5).values)]) == 0)
def test2_ppc(): """ Tests PPC value """ model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.t()) x = model.fit('BBVI', iterations=100) p_value = model.ppc() assert(0.0 <= p_value <= 1.0)
def test_predict_length(): """ Tests that the length of the predict dataframe is equal to no of steps h """ model = pf.ARIMAX(formula="y ~ x1", data=data, ar=2, ma=2, family=pf.t()) x = model.fit() x.summary() assert(model.predict(h=5, oos_data=data_oos).shape[0] == 5)
def test_predict_nans(): """ Tests that the predictions are not NaNs """ model = pf.ARIMAX(formula="y ~ x1", data=data, ar=2, ma=2, family=pf.t()) x = model.fit() x.summary() assert(len(model.predict(h=5, oos_data=data_oos).values[np.isnan(model.predict(h=5, oos_data=data_oos).values)]) == 0)
def test2_predict_is_nonconstant(): """ We should not really have predictions that are constant (should be some difference)... This captures bugs with the predict function not iterating forward """ model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.t()) x = model.fit() predictions = model.predict_is(h=10, intervals=False) assert(not np.all(predictions.values==predictions.values[0]))
def test2_sample_model(): """ Tests sampling function """ model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.t()) x = model.fit('BBVI', iterations=100) sample = model.sample(nsims=100) assert(sample.shape[0]==100) assert(sample.shape[1]==len(data)-2)
def build_model(data, ar=4, ma=4, integ=0, target=None): model = pf.ARIMAX( data=data, # formula='{}~casos_est_min + casos_est_max+ casos+ p_rt1 + p_inc100k +nivel'.format(target), ar=4, formula='{}~'.format(target) + '+'.join([col for col in data.columns if col != target][:10]), ma=4, integ=0) return model
def test_no_terms(): """ Tests the length of the latent variable vector for an ARIMAX model with no AR or MA terms, and tests that the values are not nan """ model = pf.ARIMAX(formula="y ~ x1", data=data, ar=0, ma=0) x = model.fit() assert (len(model.latent_variables.z_list) == 3) lvs = np.array([i.value for i in model.latent_variables.z_list]) assert (len(lvs[np.isnan(lvs)]) == 0)
def test_laplace(): """ Tests an ARIMAX model estimated with Laplace approximation, and tests that the latent variable vector length is correct, and that value are not nan """ model = pf.ARIMAX(formula="y ~ x1", data=data, ar=1, ma=1, family=pf.t()) x = model.fit('Laplace') assert(len(model.latent_variables.z_list) == 6) lvs = np.array([i.value for i in model.latent_variables.z_list]) assert(len(lvs[np.isnan(lvs)]) == 0)
def test_mh(): """ Tests an ARIMAX model estimated with Metropolis-Hastings, and tests that the latent variable vector length is correct, and that value are not nan """ model = pf.ARIMAX(formula="y ~ x1", data=data, ar=1, ma=1, family=pf.t()) x = model.fit('M-H',nsims=300) assert(len(model.latent_variables.z_list) == 6) lvs = np.array([i.value for i in model.latent_variables.z_list]) assert(len(lvs[np.isnan(lvs)]) == 0)
def test2_pml(): """ Tests an ARIMAX model estimated with PML, with multiple predictors, and tests that the latent variable vector length is correct, and that value are not nan """ model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=1, ma=1, family=pf.t()) x = model.fit('PML') assert(len(model.latent_variables.z_list) == 7) lvs = np.array([i.value for i in model.latent_variables.z_list]) assert(len(lvs[np.isnan(lvs)]) == 0)
def test_bbvi_mini_batch(): """ Tests an ARIMA model estimated with BBVI and that the length of the latent variable list is correct, and that the estimated latent variables are not nan """ model = pf.ARIMAX(formula="y ~ x1", data=data, ar=1, ma=1, family=pf.t()) x = model.fit('BBVI',iterations=100, mini_batch=32) assert(len(model.latent_variables.z_list) == 6) lvs = np.array([i.value for i in model.latent_variables.z_list]) assert(len(lvs[np.isnan(lvs)]) == 0)
def test_couple_terms(): """ Tests the length of the latent variable vector for an ARIMAX model with 1 AR and 1 MA term, and tests that the values are not nan """ model = pf.ARIMAX(formula="y ~ x1", data=data, ar=1, ma=1, family=pf.t()) x = model.fit() assert(len(model.latent_variables.z_list) == 6) lvs = np.array([i.value for i in model.latent_variables.z_list]) assert(len(lvs[np.isnan(lvs)]) == 0)
def test2_bbvi(): """ Tests an ARIMAX model estimated with BBVI, and tests that the latent variable vector length is correct, and that value are not nan """ model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=1, ma=1, family=pf.t()) x = model.fit('BBVI',iterations=100) assert(len(model.latent_variables.z_list) == 7) lvs = np.array([i.value for i in model.latent_variables.z_list]) assert(len(lvs[np.isnan(lvs)]) == 0)
def test2_predict_is_intervals_mh(): """ Tests prediction intervals are ordered correctly """ model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.t()) x = model.fit('M-H', nsims=400) predictions = model.predict_is(h=10, intervals=True) assert(np.all(predictions['99% Prediction Interval'].values > predictions['95% Prediction Interval'].values)) assert(np.all(predictions['95% Prediction Interval'].values > predictions[model.data_name].values)) assert(np.all(predictions[model.data_name].values > predictions['5% Prediction Interval'].values)) assert(np.all(predictions['5% Prediction Interval'].values > predictions['1% Prediction Interval'].values))
def a_test_bbvi_elbo(): """ Tests that the ELBO increases """ model = pf.ARIMAX(formula="y ~ x1", data=data, ar=1, ma=1, family=pf.Exponential()) x = model.fit('BBVI', iterations=300, record_elbo=True, map_start=False) assert (x.elbo_records[-1] > x.elbo_records[0])
def arimax_by_zip_fit_predict(y_train,y_test,param): """Fits and predicts 1,2 and 3 months into the future for the given eviction dataset. Exogenous variable is used to perform a linear regression underlying the ARIMA fit. """ model = pf.ARIMAX(data=y_train, formula='Eviction_Notice~1+CASANF0URN', ar=param[0], ma=param[2]) model.fit() y_hat = model.predict(h=3,oos_data=y_test) predicted = y_hat.Eviction_Notice.values.tolist() actual = y_test.Eviction_Notice.values.tolist() return predicted, actual
def test2_predict_intervals_bbvi(): """ Tests prediction intervals are ordered correctly """ model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.t()) x = model.fit('BBVI', iterations=100) predictions = model.predict(h=10, oos_data=data_oos, intervals=True) assert(np.all(predictions['99% Prediction Interval'].values > predictions['95% Prediction Interval'].values)) assert(np.all(predictions['95% Prediction Interval'].values > predictions[model.data_name].values)) assert(np.all(predictions[model.data_name].values > predictions['5% Prediction Interval'].values)) assert(np.all(predictions['5% Prediction Interval'].values > predictions['1% Prediction Interval'].values))
def a_test_predict_is_nonconstant(): """ We should not really have predictions that are constant (should be some difference)... This captures bugs with the predict function not iterating forward """ model = pf.ARIMAX(formula="y ~ x1", data=data, ar=1, ma=1, family=pf.Exponential()) x = model.fit('BBVI', iterations=200) predictions = model.predict_is(h=10, fit_method='BBVI', intervals=False) assert (not np.all(predictions.values == predictions.values[0]))
def test2_predict_nonconstant(): """ We should not really have predictions that are constant (should be some difference)... This captures bugs with the predict function not iterating forward """ model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=1, ma=1, family=pf.Poisson()) x = model.fit('BBVI', iterations=200) predictions = model.predict(h=10, oos_data=data_oos, intervals=False) print(predictions) assert (not np.all(predictions.values == predictions.values[0]))
def test_predict_is_intervals(): """ Tests prediction intervals are ordered correctly """ model = pf.ARIMAX(formula="y ~ x1", data=data, ar=2, ma=2) x = model.fit() predictions = model.predict_is(h=10, intervals=True) assert (np.all(predictions['99% Prediction Interval'].values > predictions['95% Prediction Interval'].values)) assert (np.all(predictions['95% Prediction Interval'].values > predictions[ model.data_name].values)) assert (np.all(predictions[model.data_name].values > predictions['5% Prediction Interval'].values)) assert (np.all(predictions['5% Prediction Interval'].values > predictions['1% Prediction Interval'].values))
def test_bbvi_mini_batch_elbo(): """ Tests that the ELBO increases """ model = pf.ARIMAX(formula="y ~ x1", data=data, ar=1, ma=1, family=pf.Poisson()) x = model.fit('BBVI', iterations=300, mini_batch=32, record_elbo=True, map_start=False) assert (x.elbo_records[-1] > x.elbo_records[0])
def arimax_base_rmse_mode(train_input, train_target, test_input, test_target): train_input_diff_arr = np.array([]) train_columns_name = [] train_input_column = int(train_input.shape[1]) for i in range(train_input_column): if (i % 2 == 0): train_columns_name.append('price_' + str(i)) else: train_columns_name.append('totaltx_' + str(i)) train_input_diff = np.diff(train_input[:, i]) if i == 0: train_input_diff_arr = train_input_diff else: train_input_diff_arr = np.dstack( (train_input_diff_arr, train_input_diff)) columns_name = copy.deepcopy(train_columns_name) columns_name.append('current_price') train_target_diff = np.diff(train_target) train_input_diff_arr = np.dstack((train_input_diff_arr, train_target_diff)) train_input_diff_arr = pd.DataFrame(train_input_diff_arr[0], columns=columns_name) model = pf.ARIMAX(data=train_input_diff_arr, formula="current_price~totaltx_5", ar=2, ma=2, integ=0) model_1 = model.fit("MLE") model_1.summary() test_input_pd = pd.DataFrame(test_input, columns=train_columns_name) test_target_pd = pd.DataFrame(test_target, columns=['current_price']) test_input_target = pd.concat([test_input_pd, test_target_pd], axis=1) pred = model.predict( h=test_input_target.shape[0], oos_data=test_input_target, intervals=True, ) arimax_base_rmse = mean_squared_error( [test_input_target.iloc[0, 6]], [(train_target[99]) + pred.current_price[99]]) print("arimax_base_rmse:", arimax_base_rmse) return arimax_base_rmse
def test2_predict_is_intervals(): """ Tests prediction intervals are ordered correctly """ model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.Poisson()) x = model.fit() predictions = model.predict_is(h=10, intervals=True) assert (np.all(predictions['99% Prediction Interval'].values >= predictions['95% Prediction Interval'].values)) assert (np.all(predictions['95% Prediction Interval'].values >= predictions['5% Prediction Interval'].values)) assert (np.all(predictions['5% Prediction Interval'].values >= predictions['1% Prediction Interval'].values))
def a_test2_predict_is_intervals_bbvi(): """ Tests prediction intervals are ordered correctly """ model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.Exponential()) x = model.fit('BBVI', iterations=100) predictions = model.predict_is(h=10, intervals=True) assert (np.all(predictions['99% Prediction Interval'].values >= predictions['95% Prediction Interval'].values)) assert (np.all(predictions['95% Prediction Interval'].values >= predictions['5% Prediction Interval'].values)) assert (np.all(predictions['5% Prediction Interval'].values >= predictions['1% Prediction Interval'].values))