def test_with_seasonality(): fit = ARIMA(order=(1, 1, 1), seasonal_order=(0, 1, 1, 12), suppress_warnings=True).fit(y=wineind) _try_get_attrs(fit) # R code AIC result is ~3004 assert abs(fit.aic() - 3004) < 100 # show equal within 100 or so # R code AICc result is ~3005 assert abs(fit.aicc() - 3005) < 100 # show equal within 100 or so # R code BIC result is ~3017 assert abs(fit.bic() - 3017) < 100 # show equal within 100 or so # show we can predict in-sample fit.predict_in_sample() # test with SARIMAX confidence intervals fit.predict(n_periods=10, return_conf_int=True, alpha=0.05)
def test_oob_for_issue_29(): dta = sm.datasets.sunspots.load_pandas().data dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008')) del dta["YEAR"] xreg = np.random.RandomState(1).rand(dta.shape[0], 3) # Try for cv on/off, various D levels, and various Xregs for d in (0, 1): for cv in (0, 3): for exog in (xreg, None): # surround with try/except so we can log the failing combo try: model = ARIMA(order=(2, d, 0), out_of_sample_size=cv).fit(dta, exogenous=exog) # If exogenous is defined, we need to pass n_periods of # exogenous rows to the predict function. Otherwise we'll # just leave it at None if exog is not None: xr = exog[:3, :] else: xr = None _, _ = model.predict(n_periods=3, return_conf_int=True, exogenous=xr) # Statsmodels can be fragile with ARMA coefficient # computation. If we encounter that, pass: # ValueError: The computed initial MA coefficients are # not invertible. You should induce invertibility, # choose a different model order, or ... except Exception as ex: # print("Failing combo: d=%i, cv=%i, exog=%r" # % (d, cv, exog)) if "invertibility" in pytest_error_str(ex): pass else: raise
def test_new_serialization(): arima = ARIMA(order=(0, 0, 0), suppress_warnings=True).fit(y) # Serialize it, show there is no tmp_loc_ pkl_file = "file.pkl" new_loc = "ts_wrapper.pkl" try: joblib.dump(arima, pkl_file) # Assert it does NOT use the old-style pickling assert not _uses_legacy_pickling(arima) loaded = joblib.load(pkl_file) assert not _uses_legacy_pickling(loaded) preds = loaded.predict() os.unlink(pkl_file) # Now save out the arima_res_ piece separately, and show we can load # it from the legacy method arima.summary() arima.arima_res_.save(fname=new_loc) arima.tmp_pkl_ = new_loc assert _uses_legacy_pickling(arima) # Save/load it and show it works joblib.dump(arima, pkl_file) loaded2 = joblib.load(pkl_file) assert_array_almost_equal(loaded2.predict(), preds) # De-cache arima._clear_cached_state() assert not os.path.exists(new_loc) # Show we get an OSError now with pytest.raises(OSError) as ose: joblib.load(pkl_file) assert "Does it still" in str(ose), ose finally: _unlink_if_exists(pkl_file) _unlink_if_exists(new_loc)
def run_auto_arimax(self): lower_aic = float(99999) best_pdq = [0, 0, 0] param = list(itertools.product(range(0, 4), range(0, 2), range(0, 4))) for pdq in param: #print(pdq) try: self.arima_model = ARIMA(order=pdq, suppress_warnings=True).fit( y=self.endo_obs2, exogenous=self.exo_obs) if self.arima_model.aic() < lower_aic: lower_aic = self.arima_model.aic() best_pdq = tuple(self.arima_model.order) except: continue #print(model.arima_model.summary()) # Compile parameters to list self.parameters = [best_pdq, self.lambda_boxcox[0], lower_aic] print(self.parameters) return (self.arima_model)
def test_basic_arma(): arma = ARIMA(order=(0, 0, 0), suppress_warnings=True) preds = arma.fit_predict(y) # fit/predict for coverage # No OOB, so assert none assert arma.oob_preds_ is None # test some of the attrs assert_almost_equal(arma.aic(), 11.201, decimal=3) # equivalent in R # intercept is param 0 intercept = arma.params()[0] assert_almost_equal(intercept, 0.441, decimal=3) # equivalent in R assert_almost_equal(arma.aicc(), 11.74676, decimal=5) assert_almost_equal(arma.bic(), 13.639060053303311, decimal=5) # get predictions expected_preds = np.array([ 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876 ]) # generate predictions assert_array_almost_equal(preds, expected_preds) # Make sure we can get confidence intervals expected_intervals = np.array([[-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139]]) _, intervals = arma.predict(n_periods=10, return_conf_int=True, alpha=0.05) assert_array_almost_equal(intervals, expected_intervals)
def _build(self, **config): """ build the models and initialize. :param config: hyperparameters for the model """ p = config.get('p', 2) d = config.get('d', 0) q = config.get('q', 2) self.seasonal = config.get('seasonality_mode', True) P = config.get('P', 1) D = config.get('D', 0) Q = config.get('Q', 1) m = config.get('m', 7) self.metric = config.get('metric', self.metric) order = (p, d, q) if not self.seasonal: seasonal_order = (0, 0, 0, 0) else: seasonal_order = (P, D, Q, m) self.model = ARIMA(order=order, seasonal_order=seasonal_order, suppress_warnings=True)
def run_ARIMA(data, param): order = param['order'] testsize = param['testsize'] T = data.shape[-1] T_test = int((T * testsize) // 1) result_full = np.zeros([data.shape[0], T_test]) total_time = 0 n_round = 0 for i in range(T_test): y = data[..., i:T - T_test + i].copy() n_round += 1 start = time.time() for j in range(y.shape[0]): model = ARIMA(order, suppress_warnings=True, enforce_stationarity=True) result = model.fit_predict(y[j], n_periods=1) result_full[j, i] = result[..., -1] end = time.time() total_time = total_time + (end - start) true_value = data[..., -T_test:] stat = {} stat['acc'] = get_acc(result_full, true_value) stat['nrmse'] = nrmse(result_full, true_value) stat['ave_time'] = total_time / n_round return (stat)
def test_the_r_src(): # this is the test the R code provides fit = ARIMA(order=(2, 0, 1), trend='c', suppress_warnings=True).fit(abc) # the R code's AIC = ~135 assert abs(135 - fit.aic()) < 1.0 # the R code's AICc = ~ 137 assert abs(137 - fit.aicc()) < 1.0 # the R code's BIC = ~145 assert abs(145 - fit.bic()) < 1.0 # R's coefficients: # ar1 ar2 ma1 mean # -0.6515 -0.2449 0.8012 5.0370 # note that statsmodels' mean is on the front, not the end. params = fit.params() assert_almost_equal(params, np.array([5.0370, -0.6515, -0.2449, 0.8012]), decimal=2) # > fit = forecast::auto.arima(abc, max.p=5, max.d=5, # max.q=5, max.order=100, stepwise=F) fit = auto_arima(abc, max_p=5, max_d=5, max_q=5, max_order=100, seasonal=False, trend='c', suppress_warnings=True, error_action='ignore') # this differs from the R fit with a slightly higher AIC... assert abs(137 - fit.aic()) < 1.0 # R's is 135.28
def test_the_r_src(): # this is the test the R code provides fit = ARIMA(order=(2, 0, 1), trend='c', suppress_warnings=True).fit(abc) # the R code's AIC = 135.4 assert abs(135.4 - fit.aic()) < 1.0 # the R code's AICc = ~ 137 assert abs(137 - fit.aicc()) < 1.0 # the R code's BIC = ~145 assert abs(145 - fit.bic()) < 1.0 # R's coefficients: # ar1 ar2 ma1 mean # -0.6515 -0.2449 0.8012 5.0370 arparams = fit.arparams() assert_almost_equal(arparams, [-0.6515, -0.2449], decimal=3) maparams = fit.maparams() assert_almost_equal(maparams, [0.8012], decimal=3) # > fit = forecast::auto.arima(abc, max.p=5, max.d=5, # max.q=5, max.order=100, stepwise=F) fit = auto_arima(abc, max_p=5, max_d=5, max_q=5, max_order=100, seasonal=False, trend='c', suppress_warnings=True, error_action='ignore') assert abs(135.28 - fit.aic()) < 1.0 # R's is 135.28
exog = np.random.RandomState(1).rand(vec.shape[0], 2) auto_arima(vec, exogenous=exog, out_of_sample_size=1, seasonal=False, suppress_warnings=True) # This is a way to force it: ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec, exogenous=exog) @pytest.mark.parametrize( # will be m - d 'model', [ ARIMA(order=(2, 0, 0)), # arma ARIMA(order=(2, 1, 0)), # arima ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)), # sarimax ]) def test_predict_in_sample_conf_int(model): model.fit(wineind) expected_m_dim = wineind.shape[0] preds, confints = model.predict_in_sample(return_conf_int=True, alpha=0.05) assert preds.shape[0] == expected_m_dim assert confints.shape == (expected_m_dim, 2) @pytest.mark.parametrize( 'model', [ ARIMA(order=(2, 0, 0)), # arma
def test_cross_val_predict_error(): cv = SlidingWindowForecastCV(step=24, h=1) with pytest.raises(ValueError): cross_val_predict(ARIMA(order=(2, 1, 0), maxiter=3), y, cv=cv)
_check_scoring, cross_validate, cross_val_predict, _check_averaging from pmdarima.datasets import load_wineind import pytest import numpy as np from unittest import mock y = load_wineind() exogenous = np.random.RandomState(1).rand(y.shape[0], 2) @pytest.mark.parametrize('cv', [ SlidingWindowForecastCV(window_size=100, step=24, h=1), RollingForecastCV(initial=150, step=12, h=1), ]) @pytest.mark.parametrize('est', [ ARIMA(order=(2, 1, 1)), ARIMA( order=(1, 1, 2), seasonal_order=(0, 1, 1, 12), suppress_warnings=True), Pipeline([("fourier", FourierFeaturizer(m=12)), ("arima", ARIMA(order=(2, 1, 0), maxiter=3))]) ]) @pytest.mark.parametrize('verbose', [0, 2, 4]) @pytest.mark.parametrize('exog', [None, exogenous]) def test_cv_scores(cv, est, verbose, exog): scores = cross_val_score(est, y, exogenous=exog, scoring='mean_squared_error', cv=cv, verbose=verbose) assert isinstance(scores, np.ndarray)
# -*- coding: utf-8 -*- from sklearn.base import clone from pmdarima.arima import ARIMA, AutoARIMA from pmdarima.pipeline import Pipeline from pmdarima.datasets import load_wineind from pmdarima.preprocessing import FourierFeaturizer import pytest y = load_wineind() @pytest.mark.parametrize( 'est', [ ARIMA(order=(2, 1, 1)), AutoARIMA(seasonal=False, maxiter=3), Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, d=1, max_p=2, max_q=0, start_q=0, start_p=1, maxiter=3, error_action='ignore')) ]) ] ) def test_clonable(est): # fit it, then clone it est.fit(y) est2 = clone(est) assert isinstance(est2, est.__class__) assert est is not est2
# -*- coding: utf-8 -*- from sklearn.base import clone from pmdarima.arima import ARIMA, AutoARIMA from pmdarima.pipeline import Pipeline from pmdarima.datasets import load_wineind from pmdarima.preprocessing import FourierFeaturizer import pytest y = load_wineind() @pytest.mark.parametrize( 'est', [ ARIMA(order=(2, 1, 1), seasonal_order=(0, 0, 0, 1)), AutoARIMA(seasonal=False, maxiter=3), Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, d=1, max_p=2, max_q=0, start_q=0, start_p=1, maxiter=3, error_action='ignore')) ]) ] ) def test_clonable(est): # fit it, then clone it est.fit(y) est2 = clone(est) assert isinstance(est2, est.__class__) assert est is not est2
# It may be a lost cause, but by all means prove me wrong. def example_pmd(): s = {} y, a = hospital_with_exog(k=3) x = [pmd_exogenous(y=yj, s=s, k=3, a=aj) for yj, aj in zip(y[:500], a)] return s def arima_res_to_dict(arima_res): state = arima_res.__dict__ return state def pmd_to_dict(pmd): pmd['model'] = pmd['model'].__getstate__() pmd['model']['arima_res_'] = arima_res_to_dict(pmd['model']['arima_res_']) return pmd def pmd_from_dict(pmd): pmd['model']['arima_res_'] = '' if __name__ == '__main__': pmd = example_pmd() model = pmd['model'] model1 = ARIMA(**model.get_params()) prms = model.__dict__['arima_res_'].__dict__['_results'].params
def test_to_dict_raises_attribute_error_on_unfit_model(): modl = ARIMA(order=(1, 1, 0)) with pytest.raises(AttributeError): modl.to_dict()
def test_oob_for_issue_28(): # Continuation of above: can we do one with an exogenous array, too? xreg = rs.rand(hr.shape[0], 4) arima = ARIMA(order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=10).fit(y=hr, exogenous=xreg) oob = arima.oob() assert not np.isnan(oob) # Assert that the endog shapes match. First is equal to the original, # and the second is the differenced array, with original shape - d. assert np.allclose(arima.arima_res_.data.endog, hr, rtol=1e-2) assert arima.arima_res_.model.endog.shape[0] == hr.shape[0] - 1 # Now assert the same for exog assert np.allclose(arima.arima_res_.data.exog, xreg, rtol=1e-2) assert arima.arima_res_.model.exog.shape[0] == xreg.shape[0] - 1 # Compare the OOB score to an equivalent fit on data - 10 obs, but # without any OOB scoring, and we'll show that the OOB scoring in the # first IS in fact only applied to the first (train - n_out_of_bag) # samples arima_no_oob = ARIMA(order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=0).fit(y=hr[:-10], exogenous=xreg[:-10, :]) scoring = get_callable(arima_no_oob.scoring, VALID_SCORING) preds = arima_no_oob.predict(n_periods=10, exogenous=xreg[-10:, :]) assert np.allclose(oob, scoring(hr[-10:], preds), rtol=1e-2) # Show that the model parameters are exactly the same xreg_test = rs.rand(5, 4) assert np.allclose(arima.params(), arima_no_oob.params(), rtol=1e-2) # Now assert on the forecast differences. with_oob_forecasts = arima.predict(n_periods=5, exogenous=xreg_test) no_oob_forecasts = arima_no_oob.predict(n_periods=5, exogenous=xreg_test) assert_raises(AssertionError, assert_array_almost_equal, with_oob_forecasts, no_oob_forecasts) # But after we update the no_oob model with the latest data, we should # be producing the same exact forecasts # First, show we'll fail if we try to add observations with no exogenous assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:], None) # Also show we'll fail if we try to add mis-matched shapes of data assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:], xreg_test) # Show we fail if we try to add observations with a different dim exog assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:], xreg_test[:, :2]) # Actually add them now, and compare the forecasts (should be the same) arima_no_oob.add_new_observations(hr[-10:], xreg[-10:, :]) assert np.allclose(with_oob_forecasts, arima_no_oob.predict(n_periods=5, exogenous=xreg_test), rtol=1e-2)
with pytest.raises(ValueError) as ve: pipeline.predict(3, **kwargs) assert "'n_periods'" in pytest_error_str(ve) # Assert that we can update the model pipeline.update(test, maxiter=5) # And that the fourier transformer was updated properly... assert pipeline.steps_[0][1].n_ == wineind.shape[0] @pytest.mark.parametrize( 'pipeline', [ Pipeline([("arma", ARIMA(order=(2, 0, 0)))]), Pipeline([("arima", ARIMA(order=(2, 1, 0)))]), Pipeline([ ("sarimax", ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12))) ]), Pipeline([("fourier", FourierFeaturizer(m=12)), ("arma", ARIMA(order=(2, 0, 0)))]), Pipeline([("fourier", FourierFeaturizer(m=12)), ("arima", ARIMA(order=(2, 1, 0)))]), # one with a boxcox transformer Pipeline([("boxcox", BoxCoxEndogTransformer()), ("fourier", FourierFeaturizer(m=12)), ("arima", AutoARIMA(seasonal=False, stepwise=True,
train, test = data[:train_len], data[train_len:] # KPSS test KPSSResults = namedtuple("KPSSResults", ["kpss_stat", "p_value", "lags", "critical_values"]) kpss_results = KPSSResults(*tsa.kpss(data, nlags='auto')) print("KPSS results:\n", kpss_results) auto_fit = False if auto_fit: arima = auto_arima(train, stepwise=True, trace=1, seasonal=False) print(arima.summary()) else: with warnings.catch_warnings(): warnings.simplefilter("ignore") arima = ARIMA(order=(4, 1, 4), seasonal_order=None) arima.fit(train) # Diagnostics plot arima.plot_diagnostics(lags=50) plt.gcf().suptitle('Diagnostics Plot', fontsize=14) # !! not necessary !! Everything already plotted # Plot Residuals and fitted values # plt.figure() # fitted_values = arima.predict_in_sample() # plt.plot(df.index[:train_len - 1], fitted_values, # color='C0', label="Fitted values") # plt.plot(pd.to_datetime(df.index), data, color='C1', label="Data") # plt.plot(df.index[:train_len - 1], arima.resid(), # color='C2', label="Residuals")
# Show we get an OSError now with pytest.raises(OSError) as ose: joblib.load(pkl_file) assert "Does it still" in str(ose), ose finally: _unlink_if_exists(pkl_file) _unlink_if_exists(new_loc) @pytest.mark.parametrize( 'model', [ # ARMA ARIMA(order=(1, 0, 0)), # ARIMA ARIMA(order=(1, 1, 2)), # SARIMAX ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12)) ]) def test_issue_104(model): # Issue 104 shows that observations were not being updated appropriately. # We need to make sure they update for ALL models (ARMA, ARIMA, SARIMAX) endog = wineind train, test = endog[:125], endog[125:] model.fit(train) preds1 = model.predict(n_periods=100)
def model_plot(days): days = int(days) pd.plotting.register_matplotlib_converters() df = pd.read_csv('data/new_york.csv') df['Date'] = pd.to_datetime(df['Date']) #converting data to daily usage. df.index = df.Date df = df.drop('Date', axis=1) # resample the dataframe every 1 day (D) and sum ovr each day df = df.resample('D').sum() df = df.tz_localize(None) nyc_weather = pd.read_csv('data/weather/weatherNY.csv') nyc_weather['DATE'] = pd.to_datetime(nyc_weather['DATE']) nyc_weather = nyc_weather.set_index('DATE') nyc_weather.drop(['NAME','STATION'],axis=1,inplace=True) nyc_weather = nyc_weather['2015-07-01':'2020-08-10'] df = df[:'2020-08-10'] #trying 1 day increments with EXOG. MAYBE BEST CANDIDATE? with fourier terms june to june as 638 and august to august 516 day = days real_values = [] predictions = [] df1 = df["2016":"2019"] nyc_weather = nyc_weather["2016":"2019"] y = df1.Consumption exog = pd.DataFrame({'date': y.index}) exog = exog.set_index(pd.PeriodIndex(exog['date'], freq='D')) exog['is_weekend'] = np.where(exog.index.dayofweek < 5,0,1) #add weather data exog['TMIN'] = nyc_weather['TMIN'].values exog['sin1'] = np.sin(2 * np.pi * exog.index.dayofyear / 638) exog['cos1'] = np.cos(2 * np.pi * exog.index.dayofyear / 638) exog['sin2'] = np.sin(4 * np.pi * exog.index.dayofyear /638) exog['cos2'] = np.cos(4 * np.pi * exog.index.dayofyear /638) exog['sin3'] = np.sin(2 * np.pi * exog.index.dayofyear / 516) exog['cos3'] = np.cos(2 * np.pi * exog.index.dayofyear / 516) exog['sin4'] = np.sin(4 * np.pi * exog.index.dayofyear /516) exog['cos4'] = np.cos(4 * np.pi * exog.index.dayofyear /516) exog = exog.drop(columns=['date']) num_to_update = 0 y_to_train = y.iloc[:(len(y)-100)] exog_to_train = exog.iloc[:(len(y)-100)] dates = [] steps = [] for i in range(5): #first iteration train the model if i == 0: arima_exog_model = ARIMA(order=(3, 0, 1), seasonal_order=(2, 0, 0, 7),exogenous=exog_to_train, error_action='ignore', initialization='approximate_diffuse', suppress_warnings=True).fit(y=y_to_train) preds = arima_exog_model.predict_in_sample(exog_to_train) #first prediction y_to_test = y.iloc[(len(y)-100):(len(y)-100+day)] y_exog_to_test = exog.iloc[(len(y)-100):(len(y)-100+day)] y_arima_exog_forecast = arima_exog_model.predict(n_periods=day, exogenous=y_exog_to_test) real_values.append(y_to_test.values) predictions.append(y_arima_exog_forecast.tolist()) dates.append(y_to_test.index) steps.append(y_to_test.index[-1]) #y_arima_exog_forecast = arima_exog_model.predict(n_periods=2, exogenous=exog_to_test) else: y_to_update = y.iloc[(len(y)-100+num_to_update):(len(y)-100+num_to_update)+day] exog_to_update = exog.iloc[(len(y)-100+num_to_update):(len(y)-100+num_to_update)+day] #to test to_test = y.iloc[(len(y)-100+num_to_update)+day:(len(y)-100+num_to_update)+(day*2)] exog_to_test = exog.iloc[(len(y)-100+num_to_update)+day:(len(y)-100+num_to_update)+(day*2)] #update the model arima_exog_model.update(y_to_update,exogenous=exog_to_update) y_arima_exog_forecast = arima_exog_model.predict(n_periods=day, exogenous=exog_to_test) dates.append(to_test.index) steps.append(to_test.index[-1]) predictions.append(y_arima_exog_forecast.tolist()) real_values.append(to_test.values) num_to_update += day predict = [item for sublist in predictions for item in sublist] true = [item for sublist in real_values for item in sublist] dates = [item for sublist in dates for item in sublist] #for viz purposes y_to_train2 = y_to_train[-200:] preds = preds[-200:] y_to_train2 = y_to_train2.to_frame() fig = go.Figure() # Create and style traces fig.add_trace(go.Scatter(x=y_to_train2.index, y=y_to_train2.Consumption, name='True values', line=dict(color='firebrick', width=4,dash='dot'))) fig.add_trace(go.Scatter(x=y_to_train2.index, y=preds[-200:], name='In-sample Prediction', line=dict(color='royalblue', width=4))) fig.add_trace(go.Scatter(x=dates, y=predict, name='Prediction', line=dict(color='green', width=4))) fig.add_trace(go.Scatter(x=dates, y=true, name='True', line=dict(color='firebrick', width=4,dash='dot'))) fig.update_layout(title='Electricity Consumption in New York', xaxis_title='Date', yaxis_title='Consumption', xaxis_showgrid=True, yaxis_showgrid=True, #autosize=False, #width=500, #height=500, paper_bgcolor=app_colors['background'], plot_bgcolor=app_colors['background']) return fig
if args.fit or args.auto_fit: if args.auto_fit: arima = auto_arima( train, stepwise=True, trace=1, m=args.period, information_criterion="aicc", seasonal=args.seasonal, error_action="ignore", suppress_warnings=True, ) elif args.fit: with warnings.catch_warnings(): warnings.simplefilter("ignore") arima = ARIMA(order=args.order, seasonal_order=args.seasonal_order) arima.fit(train) print(arima.summary()) residuals = arima.resid() print("train lengths: data={} resid={}".format( train_len, residuals.shape[0])) len_delta = train_len - residuals.shape[0] # Diagnostics plot arima.plot_diagnostics(lags=50) box_ljung(residuals, nlags=20).format() plt.gcf().suptitle('Diagnostics Plot') plt.figure() plt.plot(df.value.index[len_delta:train_len],
_check_scoring, cross_validate from pmdarima.datasets import load_wineind import pytest import numpy as np from unittest import mock y = load_wineind() exogenous = np.random.RandomState(1).rand(y.shape[0], 2) @pytest.mark.parametrize('cv', [ SlidingWindowForecastCV(window_size=100, step=24, h=1), RollingForecastCV(initial=150, step=12, h=1), ]) @pytest.mark.parametrize('est', [ ARIMA(order=(2, 1, 1), seasonal_order=(0, 0, 0, 1)), ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12)), Pipeline([("fourier", FourierFeaturizer(m=12)), ("arima", ARIMA(order=(2, 1, 0), maxiter=3))]) ]) @pytest.mark.parametrize('verbose', [0, 2, 4]) @pytest.mark.parametrize('exog', [None, exogenous]) def test_cv_scores(cv, est, verbose, exog): scores = cross_val_score(est, y, exogenous=exog, scoring='mean_squared_error', cv=cv, verbose=verbose) assert isinstance(scores, np.ndarray)
} with pytest.raises(ValueError) as ve: pipeline.predict(3, **kwargs) assert "'n_periods'" in pytest_error_str(ve) # Assert that we can update the model pipeline.update(test, maxiter=5) # And that the fourier transformer was updated properly... assert pipeline.steps_[0][1].n_ == wineind.shape[0] @pytest.mark.parametrize('pipeline', [ Pipeline([ ("arma", ARIMA(order=(2, 0, 0))) ]), Pipeline([ ("arima", ARIMA(order=(2, 1, 0))) ]), Pipeline([ ("sarimax", ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12))) ]), Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("arma", ARIMA(order=(2, 0, 0))) ]),
def test_not_fitted_error(): with pytest.raises(sk.NotFittedError) as nfe: mod = ARIMA((0, 1, 0)) sk.check_is_fitted(mod, "arima_res_") assert "Model has not been fit!" in pytest_error_str(nfe)
def predict_arima(df): time_in=current_milli_time() try: forecast_in = open("forecast.pickle","rb") future_forecast = pickle.load(forecast_in) forecast_in.append(df) error=[] """ Calculate errors """ if len(df) < len(future_forecast): error=df["memory_used"] - future_forecast[:len(df)]["memory_used"] elif len(df) > len(future_forecast): error=df[0:len(future_forecast)]["memory_used"]- future_forecast["memory_used"] else: error=df["memory_used"]-future_forecast["memory_used"] overestimation=[x for x in error if x<0] overestimation=sum(overestimation)/len(overestimation) underestimation=[x for x in error if x>=0] underestimation=sum(underestimation)/len(underestimation) print("UNDERESTIMATION ERROR: "+underestimation) print("OVERESTIMATION ERROR: "+overestimation) print("Mean Absolute Error in Last iteration "+str(error)) """ Overestimation & Underestimation errors """ except Exception as e: print("RMSE To be computed") # Do Nothing try: pm.plot_pacf(df,show=False).savefig('pacf.png') pm.plot_acf(df,show=False).savefig('acf.png') except: print("Data points insufficient for ACF & PACF") try: pickle_in = open("arima.pickle","rb") arima_data = pickle.load(pickle_in) arima_data.append(df) #df=arima_data except Exception as e: arima_data_out = open("arima.pickle","wb") pickle.dump([], arima_data_out) arima_data_out = open("arima.pickle","wb") pickle.dump(df, arima_data_out) arima_data_out.close() ''' tests ''' nd=1 nsd=1 try: adf_test=ADFTest(alpha=0.05) p_val, should_diff = adf_test.is_stationary(df["memory_used"]) nd = ndiffs(df, test='adf') logging.info(nd) nsd = nsdiffs(df,12) logging.info(nd) except: nd=1 print("Exception on tests") ch_test=CHTest(12) try: nsd=ch_test.estimate_seasonal_differencing_term(df) except Exception as e: print(e) logging.error(e) ''' ARIMA MODEL ''' ''' Find p,q dynamically ''' acf_lags=acf(df["memory_used"]) acf_lags_threshold=[x for x in acf_lags if x>=getThreshold()] p=len(acf_lags_threshold) if len(acf_lags_threshold)<=4 else 4 pacf_lags=pacf(df["memory_used"]) pacf_lags_threshold=[x for x in pacf_lags if x>=getThreshold()] q=len(pacf_lags_threshold) if len(pacf_lags_threshold)<=1 else 1 d=nd train, test = train_test_split(df,shuffle=False, test_size=0.3) # If data is seasonal set the values of P,D,Q in seasonal order stepwise_model = ARIMA( order=(p,d,q), seasonal_order=(0,nsd,0,12), suppress_warnings=True, scoring='mse' ) x=str(p)+" "+str(nd)+" "+str(q) print("Model with p="+str(q)+" d="+str(d)+" q="+str(q)) try: stepwise_model.fit(df) """ Vary the periods as per the forecasting window n_periods= 30 = 5mins n_periods= 60 = 10mins n_periods= 90 = 15mins """ future_forecast = stepwise_model.predict(n_periods=len(test)) future_forecast = pd.DataFrame(future_forecast,index=test.index,columns=["prediction"]) res=pd.concat([df,future_forecast],axis=1) ''' Save Forecast in Pickle ''' forecast_out = open("forecast.pickle","wb") pickle.dump(future_forecast,forecast_out) forecast_out.close() trace1 = go.Scatter(x=res.index, y=res["prediction"],name="Prediction", mode='lines') trace2 = go.Scatter(x=df.index, y=df["memory_used"],name="DF data", mode='lines') data=[trace1,trace2] layout = go.Layout( title=x ) fig = go.Figure(data=data, layout=layout) plot(fig, filename="prediction") print("Current values") print(df) print("Predicted Data Points") print(future_forecast) time_out=current_milli_time() print("TIME for RNN(ms):"+str(time_out-time_in)) return future_forecast except Exception as e: time_out=current_milli_time() print("TIME for RNN(ms):"+str(time_out-time_in)) print(e) return None
from pmdarima.datasets import load_airpassengers import pytest import numpy as np from unittest import mock y = load_airpassengers() exogenous = np.random.RandomState(1).rand(y.shape[0], 2) @pytest.mark.parametrize('cv', [ SlidingWindowForecastCV(window_size=100, step=24, h=1), RollingForecastCV(initial=120, step=12, h=1), ]) @pytest.mark.parametrize( 'est', [ ARIMA(order=(2, 1, 1), maxiter=2, simple_differencing=True), ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12), maxiter=2, simple_differencing=True, suppress_warnings=True), Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("arima", ARIMA(order=(2, 1, 0), maxiter=2, simple_differencing=True)) ]) ] ) @pytest.mark.parametrize('verbose', [0, 2, 4]) @pytest.mark.parametrize('X', [None, exogenous])
[-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139]]) _, intervals = arma.predict(n_periods=10, return_conf_int=True, alpha=0.05) assert_array_almost_equal(intervals, expected_intervals) @pytest.mark.parametrize( # will be m - d 'model, expected_m_dim', [ pytest.param(ARIMA(order=(2, 0, 0)), wineind.shape[0]), # arma pytest.param(ARIMA(order=(2, 1, 0)), wineind.shape[0] - 1), # arima pytest.param(ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)), wineind.shape[0]), # sarimax ]) def test_predict_in_sample_conf_int(model, expected_m_dim): model.fit(wineind) preds, confints = model.predict_in_sample(return_conf_int=True, alpha=0.05) assert preds.shape[0] == expected_m_dim assert confints.shape == (expected_m_dim, 2) @pytest.mark.parametrize( 'model', [ ARIMA(order=(2, 0, 0)), # arma
def set_ylim(self, mn, mx): self.mn = mn self.mx = mx def mock_qqplot(resid, line, ax): ax.qqplot_called = True def mock_acf_plot(resid, ax, lags): ax.acfplot_called = True @pytest.mark.parametrize('model_type,model', [ pytest.param('arma', ARIMA(order=(1, 0, 0), maxiter=50)), pytest.param('arima', ARIMA(order=(1, 1, 0), maxiter=50)), pytest.param( 'sarimax', ARIMA(order=(1, 1, 0), maxiter=50, seasonal_order=(1, 0, 0, 12))) ]) def test_mock_plot_diagnostics(model_type, model): model.fit(lynx) with patch('statsmodels.graphics.utils.create_mpl_fig', MockMPLFigure),\ patch('statsmodels.graphics.gofplots.qqplot', mock_qqplot),\ patch('statsmodels.graphics.tsaplots.plot_acf', mock_acf_plot): diag = model.plot_diagnostics(figsize=(10, 12)) # Asserting on mock attributes to show that we follow the expected
def run(): symbol = input("Enter ticker symbol: ") now = dt.datetime.now() timeFinish = now + dt.timedelta(minutes=minutes) while (now < timeFinish): try: now = dt.datetime.now() client = Client(environment=PRACTICE, account_id="", access_token=ACCESS_TOKEN) json_data = [] json_data = client.get_instrument_history(instrument=symbol, granularity=timeframe, candle_format="midpoint", count=1440) json_data = json_data['candles'] df = pd.DataFrame(json_data) data = df.copy() data = data.set_index('time')[['closeMid']] data = data.set_index(pd.to_datetime(data.index)) data.columns = [CLOSE] # Rescale data lnprice = np.log(data) # Create and fit the model model_temp = auto_arima(lnprice.values, start_p=1, start_q=1, max_p=1, max_q=1, m=4, start_P=0, seasonal=False, d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) model = ARIMA(order=model_temp.order) fit = model.fit(lnprice.values) # Predict future_forecast = fit.predict(n_periods=n_periods_ahead) future_forecast = np.exp(future_forecast) # Calculations lowest = min(future_forecast[0], future_forecast[-1]) highest = max(future_forecast[0], future_forecast[-1]) current = data[CLOSE].iloc[-1] x = ((future_forecast[0] - future_forecast[-1]) / future_forecast[0]) * 100 slope = (future_forecast[0] - future_forecast[-1]) / n_periods_ahead degree = math.degrees(math.atan(slope)) # Trending if (x > 0): trending = "Positivly / Call" else: trending = "Negativaly / Put" # View print("==========================") print("Current Price: ", current) print("Highest price: ", highest) print("Lowest Price: ", lowest) print("Trending: ", trending) print("Degrees: ", degree) print("==========================" + "\n") except Exception as e: print(e) time.sleep(SLEEP) return 0