Пример #1
0
def test_with_seasonality():
    fit = ARIMA(order=(1, 1, 1),
                seasonal_order=(0, 1, 1, 12),
                suppress_warnings=True).fit(y=wineind)
    _try_get_attrs(fit)

    # R code AIC result is ~3004
    assert abs(fit.aic() - 3004) < 100  # show equal within 100 or so

    # R code AICc result is ~3005
    assert abs(fit.aicc() - 3005) < 100  # show equal within 100 or so

    # R code BIC result is ~3017
    assert abs(fit.bic() - 3017) < 100  # show equal within 100 or so

    # show we can predict in-sample
    fit.predict_in_sample()

    # test with SARIMAX confidence intervals
    fit.predict(n_periods=10, return_conf_int=True, alpha=0.05)
Пример #2
0
def test_oob_for_issue_29():
    dta = sm.datasets.sunspots.load_pandas().data
    dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008'))
    del dta["YEAR"]

    xreg = np.random.RandomState(1).rand(dta.shape[0], 3)

    # Try for cv on/off, various D levels, and various Xregs
    for d in (0, 1):
        for cv in (0, 3):
            for exog in (xreg, None):

                # surround with try/except so we can log the failing combo
                try:
                    model = ARIMA(order=(2, d, 0),
                                  out_of_sample_size=cv).fit(dta,
                                                             exogenous=exog)

                    # If exogenous is defined, we need to pass n_periods of
                    # exogenous rows to the predict function. Otherwise we'll
                    # just leave it at None
                    if exog is not None:
                        xr = exog[:3, :]
                    else:
                        xr = None

                    _, _ = model.predict(n_periods=3, return_conf_int=True,
                                         exogenous=xr)

                # Statsmodels can be fragile with ARMA coefficient
                # computation. If we encounter that, pass:
                #   ValueError: The computed initial MA coefficients are
                #       not invertible. You should induce invertibility,
                #       choose a different model order, or ...
                except Exception as ex:
                    # print("Failing combo: d=%i, cv=%i, exog=%r"
                    #       % (d, cv, exog))
                    if "invertibility" in pytest_error_str(ex):
                        pass
                    else:
                        raise
Пример #3
0
def test_new_serialization():
    arima = ARIMA(order=(0, 0, 0), suppress_warnings=True).fit(y)

    # Serialize it, show there is no tmp_loc_
    pkl_file = "file.pkl"
    new_loc = "ts_wrapper.pkl"
    try:
        joblib.dump(arima, pkl_file)

        # Assert it does NOT use the old-style pickling
        assert not _uses_legacy_pickling(arima)
        loaded = joblib.load(pkl_file)
        assert not _uses_legacy_pickling(loaded)
        preds = loaded.predict()
        os.unlink(pkl_file)

        # Now save out the arima_res_ piece separately, and show we can load
        # it from the legacy method
        arima.summary()
        arima.arima_res_.save(fname=new_loc)
        arima.tmp_pkl_ = new_loc

        assert _uses_legacy_pickling(arima)

        # Save/load it and show it works
        joblib.dump(arima, pkl_file)
        loaded2 = joblib.load(pkl_file)
        assert_array_almost_equal(loaded2.predict(), preds)

        # De-cache
        arima._clear_cached_state()
        assert not os.path.exists(new_loc)

        # Show we get an OSError now
        with pytest.raises(OSError) as ose:
            joblib.load(pkl_file)
        assert "Does it still" in str(ose), ose

    finally:
        _unlink_if_exists(pkl_file)
        _unlink_if_exists(new_loc)
Пример #4
0
 def run_auto_arimax(self):
     lower_aic = float(99999)
     best_pdq = [0, 0, 0]
     param = list(itertools.product(range(0, 4), range(0, 2), range(0, 4)))
     for pdq in param:
         #print(pdq)
         try:
             self.arima_model = ARIMA(order=pdq,
                                      suppress_warnings=True).fit(
                                          y=self.endo_obs2,
                                          exogenous=self.exo_obs)
             if self.arima_model.aic() < lower_aic:
                 lower_aic = self.arima_model.aic()
                 best_pdq = tuple(self.arima_model.order)
         except:
             continue
     #print(model.arima_model.summary())
     # Compile parameters to list
     self.parameters = [best_pdq, self.lambda_boxcox[0], lower_aic]
     print(self.parameters)
     return (self.arima_model)
Пример #5
0
def test_basic_arma():
    arma = ARIMA(order=(0, 0, 0), suppress_warnings=True)
    preds = arma.fit_predict(y)  # fit/predict for coverage

    # No OOB, so assert none
    assert arma.oob_preds_ is None

    # test some of the attrs
    assert_almost_equal(arma.aic(), 11.201, decimal=3)  # equivalent in R

    # intercept is param 0
    intercept = arma.params()[0]
    assert_almost_equal(intercept, 0.441, decimal=3)  # equivalent in R
    assert_almost_equal(arma.aicc(), 11.74676, decimal=5)
    assert_almost_equal(arma.bic(), 13.639060053303311, decimal=5)

    # get predictions
    expected_preds = np.array([
        0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876,
        0.44079876, 0.44079876, 0.44079876, 0.44079876
    ])

    # generate predictions
    assert_array_almost_equal(preds, expected_preds)

    # Make sure we can get confidence intervals
    expected_intervals = np.array([[-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139]])

    _, intervals = arma.predict(n_periods=10, return_conf_int=True, alpha=0.05)
    assert_array_almost_equal(intervals, expected_intervals)
Пример #6
0
    def _build(self, **config):
        """
        build the models and initialize.
        :param config: hyperparameters for the model
        """
        p = config.get('p', 2)
        d = config.get('d', 0)
        q = config.get('q', 2)
        self.seasonal = config.get('seasonality_mode', True)
        P = config.get('P', 1)
        D = config.get('D', 0)
        Q = config.get('Q', 1)
        m = config.get('m', 7)
        self.metric = config.get('metric', self.metric)

        order = (p, d, q)
        if not self.seasonal:
            seasonal_order = (0, 0, 0, 0)
        else:
            seasonal_order = (P, D, Q, m)

        self.model = ARIMA(order=order, seasonal_order=seasonal_order, suppress_warnings=True)
def run_ARIMA(data, param):

    order = param['order']
    testsize = param['testsize']

    T = data.shape[-1]
    T_test = int((T * testsize) // 1)
    result_full = np.zeros([data.shape[0], T_test])

    total_time = 0
    n_round = 0

    for i in range(T_test):

        y = data[..., i:T - T_test + i].copy()
        n_round += 1
        start = time.time()

        for j in range(y.shape[0]):

            model = ARIMA(order,
                          suppress_warnings=True,
                          enforce_stationarity=True)
            result = model.fit_predict(y[j], n_periods=1)
            result_full[j, i] = result[..., -1]

        end = time.time()
        total_time = total_time + (end - start)

    true_value = data[..., -T_test:]

    stat = {}
    stat['acc'] = get_acc(result_full, true_value)
    stat['nrmse'] = nrmse(result_full, true_value)
    stat['ave_time'] = total_time / n_round

    return (stat)
Пример #8
0
def test_the_r_src():
    # this is the test the R code provides
    fit = ARIMA(order=(2, 0, 1), trend='c', suppress_warnings=True).fit(abc)

    # the R code's AIC = ~135
    assert abs(135 - fit.aic()) < 1.0

    # the R code's AICc = ~ 137
    assert abs(137 - fit.aicc()) < 1.0

    # the R code's BIC = ~145
    assert abs(145 - fit.bic()) < 1.0

    # R's coefficients:
    #     ar1      ar2     ma1    mean
    # -0.6515  -0.2449  0.8012  5.0370

    # note that statsmodels' mean is on the front, not the end.
    params = fit.params()
    assert_almost_equal(params,
                        np.array([5.0370, -0.6515, -0.2449, 0.8012]),
                        decimal=2)

    # > fit = forecast::auto.arima(abc, max.p=5, max.d=5,
    #             max.q=5, max.order=100, stepwise=F)
    fit = auto_arima(abc,
                     max_p=5,
                     max_d=5,
                     max_q=5,
                     max_order=100,
                     seasonal=False,
                     trend='c',
                     suppress_warnings=True,
                     error_action='ignore')

    # this differs from the R fit with a slightly higher AIC...
    assert abs(137 - fit.aic()) < 1.0  # R's is 135.28
Пример #9
0
def test_the_r_src():
    # this is the test the R code provides
    fit = ARIMA(order=(2, 0, 1), trend='c', suppress_warnings=True).fit(abc)

    # the R code's AIC = 135.4
    assert abs(135.4 - fit.aic()) < 1.0

    # the R code's AICc = ~ 137
    assert abs(137 - fit.aicc()) < 1.0

    # the R code's BIC = ~145
    assert abs(145 - fit.bic()) < 1.0

    # R's coefficients:
    #     ar1      ar2     ma1    mean
    # -0.6515  -0.2449  0.8012  5.0370

    arparams = fit.arparams()
    assert_almost_equal(arparams, [-0.6515, -0.2449], decimal=3)

    maparams = fit.maparams()
    assert_almost_equal(maparams, [0.8012], decimal=3)

    # > fit = forecast::auto.arima(abc, max.p=5, max.d=5,
    #             max.q=5, max.order=100, stepwise=F)
    fit = auto_arima(abc,
                     max_p=5,
                     max_d=5,
                     max_q=5,
                     max_order=100,
                     seasonal=False,
                     trend='c',
                     suppress_warnings=True,
                     error_action='ignore')

    assert abs(135.28 - fit.aic()) < 1.0  # R's is 135.28
Пример #10
0
    exog = np.random.RandomState(1).rand(vec.shape[0], 2)
    auto_arima(vec,
               exogenous=exog,
               out_of_sample_size=1,
               seasonal=False,
               suppress_warnings=True)

    # This is a way to force it:
    ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec, exogenous=exog)


@pytest.mark.parametrize(
    # will be m - d
    'model',
    [
        ARIMA(order=(2, 0, 0)),  # arma
        ARIMA(order=(2, 1, 0)),  # arima
        ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)),  # sarimax
    ])
def test_predict_in_sample_conf_int(model):
    model.fit(wineind)
    expected_m_dim = wineind.shape[0]
    preds, confints = model.predict_in_sample(return_conf_int=True, alpha=0.05)
    assert preds.shape[0] == expected_m_dim
    assert confints.shape == (expected_m_dim, 2)


@pytest.mark.parametrize(
    'model',
    [
        ARIMA(order=(2, 0, 0)),  # arma
Пример #11
0
def test_cross_val_predict_error():
    cv = SlidingWindowForecastCV(step=24, h=1)
    with pytest.raises(ValueError):
        cross_val_predict(ARIMA(order=(2, 1, 0), maxiter=3), y, cv=cv)
Пример #12
0
    _check_scoring, cross_validate, cross_val_predict, _check_averaging
from pmdarima.datasets import load_wineind
import pytest
import numpy as np
from unittest import mock

y = load_wineind()
exogenous = np.random.RandomState(1).rand(y.shape[0], 2)


@pytest.mark.parametrize('cv', [
    SlidingWindowForecastCV(window_size=100, step=24, h=1),
    RollingForecastCV(initial=150, step=12, h=1),
])
@pytest.mark.parametrize('est', [
    ARIMA(order=(2, 1, 1)),
    ARIMA(
        order=(1, 1, 2), seasonal_order=(0, 1, 1, 12), suppress_warnings=True),
    Pipeline([("fourier", FourierFeaturizer(m=12)),
              ("arima", ARIMA(order=(2, 1, 0), maxiter=3))])
])
@pytest.mark.parametrize('verbose', [0, 2, 4])
@pytest.mark.parametrize('exog', [None, exogenous])
def test_cv_scores(cv, est, verbose, exog):
    scores = cross_val_score(est,
                             y,
                             exogenous=exog,
                             scoring='mean_squared_error',
                             cv=cv,
                             verbose=verbose)
    assert isinstance(scores, np.ndarray)
Пример #13
0
# -*- coding: utf-8 -*-

from sklearn.base import clone
from pmdarima.arima import ARIMA, AutoARIMA
from pmdarima.pipeline import Pipeline
from pmdarima.datasets import load_wineind
from pmdarima.preprocessing import FourierFeaturizer
import pytest

y = load_wineind()


@pytest.mark.parametrize(
    'est', [
        ARIMA(order=(2, 1, 1)),
        AutoARIMA(seasonal=False, maxiter=3),
        Pipeline([
            ("fourier", FourierFeaturizer(m=12)),
            ("arima", AutoARIMA(seasonal=False, stepwise=True,
                                suppress_warnings=True, d=1, max_p=2, max_q=0,
                                start_q=0, start_p=1,
                                maxiter=3, error_action='ignore'))
        ])
    ]
)
def test_clonable(est):
    # fit it, then clone it
    est.fit(y)
    est2 = clone(est)
    assert isinstance(est2, est.__class__)
    assert est is not est2
Пример #14
0
# -*- coding: utf-8 -*-

from sklearn.base import clone
from pmdarima.arima import ARIMA, AutoARIMA
from pmdarima.pipeline import Pipeline
from pmdarima.datasets import load_wineind
from pmdarima.preprocessing import FourierFeaturizer
import pytest

y = load_wineind()


@pytest.mark.parametrize(
    'est', [
        ARIMA(order=(2, 1, 1), seasonal_order=(0, 0, 0, 1)),
        AutoARIMA(seasonal=False, maxiter=3),
        Pipeline([
            ("fourier", FourierFeaturizer(m=12)),
            ("arima", AutoARIMA(seasonal=False, stepwise=True,
                                suppress_warnings=True, d=1, max_p=2, max_q=0,
                                start_q=0, start_p=1,
                                maxiter=3, error_action='ignore'))
        ])
    ]
)
def test_clonable(est):
    # fit it, then clone it
    est.fit(y)
    est2 = clone(est)
    assert isinstance(est2, est.__class__)
    assert est is not est2
# It may be a lost cause, but by all means prove me wrong.


def example_pmd():
    s = {}
    y, a = hospital_with_exog(k=3)
    x = [pmd_exogenous(y=yj, s=s, k=3, a=aj) for yj, aj in zip(y[:500], a)]
    return s


def arima_res_to_dict(arima_res):
    state = arima_res.__dict__
    return state


def pmd_to_dict(pmd):
    pmd['model'] = pmd['model'].__getstate__()
    pmd['model']['arima_res_'] = arima_res_to_dict(pmd['model']['arima_res_'])
    return pmd


def pmd_from_dict(pmd):
    pmd['model']['arima_res_'] = ''


if __name__ == '__main__':
    pmd = example_pmd()
    model = pmd['model']
    model1 = ARIMA(**model.get_params())
    prms = model.__dict__['arima_res_'].__dict__['_results'].params
Пример #16
0
def test_to_dict_raises_attribute_error_on_unfit_model():
    modl = ARIMA(order=(1, 1, 0))
    with pytest.raises(AttributeError):
        modl.to_dict()
Пример #17
0
def test_oob_for_issue_28():
    # Continuation of above: can we do one with an exogenous array, too?
    xreg = rs.rand(hr.shape[0], 4)
    arima = ARIMA(order=(2, 1, 2),
                  suppress_warnings=True,
                  out_of_sample_size=10).fit(y=hr, exogenous=xreg)

    oob = arima.oob()
    assert not np.isnan(oob)

    # Assert that the endog shapes match. First is equal to the original,
    # and the second is the differenced array, with original shape - d.
    assert np.allclose(arima.arima_res_.data.endog, hr, rtol=1e-2)
    assert arima.arima_res_.model.endog.shape[0] == hr.shape[0] - 1

    # Now assert the same for exog
    assert np.allclose(arima.arima_res_.data.exog, xreg, rtol=1e-2)
    assert arima.arima_res_.model.exog.shape[0] == xreg.shape[0] - 1

    # Compare the OOB score to an equivalent fit on data - 10 obs, but
    # without any OOB scoring, and we'll show that the OOB scoring in the
    # first IS in fact only applied to the first (train - n_out_of_bag)
    # samples
    arima_no_oob = ARIMA(order=(2, 1, 2),
                         suppress_warnings=True,
                         out_of_sample_size=0).fit(y=hr[:-10],
                                                   exogenous=xreg[:-10, :])

    scoring = get_callable(arima_no_oob.scoring, VALID_SCORING)
    preds = arima_no_oob.predict(n_periods=10, exogenous=xreg[-10:, :])
    assert np.allclose(oob, scoring(hr[-10:], preds), rtol=1e-2)

    # Show that the model parameters are exactly the same
    xreg_test = rs.rand(5, 4)
    assert np.allclose(arima.params(), arima_no_oob.params(), rtol=1e-2)

    # Now assert on the forecast differences.
    with_oob_forecasts = arima.predict(n_periods=5, exogenous=xreg_test)
    no_oob_forecasts = arima_no_oob.predict(n_periods=5, exogenous=xreg_test)

    assert_raises(AssertionError, assert_array_almost_equal,
                  with_oob_forecasts, no_oob_forecasts)

    # But after we update the no_oob model with the latest data, we should
    # be producing the same exact forecasts

    # First, show we'll fail if we try to add observations with no exogenous
    assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:],
                  None)

    # Also show we'll fail if we try to add mis-matched shapes of data
    assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:],
                  xreg_test)

    # Show we fail if we try to add observations with a different dim exog
    assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:],
                  xreg_test[:, :2])

    # Actually add them now, and compare the forecasts (should be the same)
    arima_no_oob.add_new_observations(hr[-10:], xreg[-10:, :])
    assert np.allclose(with_oob_forecasts,
                       arima_no_oob.predict(n_periods=5, exogenous=xreg_test),
                       rtol=1e-2)
Пример #18
0
    with pytest.raises(ValueError) as ve:
        pipeline.predict(3, **kwargs)
    assert "'n_periods'" in pytest_error_str(ve)

    # Assert that we can update the model
    pipeline.update(test, maxiter=5)

    # And that the fourier transformer was updated properly...
    assert pipeline.steps_[0][1].n_ == wineind.shape[0]


@pytest.mark.parametrize(
    'pipeline',
    [
        Pipeline([("arma", ARIMA(order=(2, 0, 0)))]),
        Pipeline([("arima", ARIMA(order=(2, 1, 0)))]),
        Pipeline([
            ("sarimax", ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)))
        ]),
        Pipeline([("fourier", FourierFeaturizer(m=12)),
                  ("arma", ARIMA(order=(2, 0, 0)))]),
        Pipeline([("fourier", FourierFeaturizer(m=12)),
                  ("arima", ARIMA(order=(2, 1, 0)))]),

        # one with a boxcox transformer
        Pipeline([("boxcox", BoxCoxEndogTransformer()),
                  ("fourier", FourierFeaturizer(m=12)),
                  ("arima",
                   AutoARIMA(seasonal=False,
                             stepwise=True,
Пример #19
0
train, test = data[:train_len], data[train_len:]

# KPSS test
KPSSResults = namedtuple("KPSSResults",
                         ["kpss_stat", "p_value", "lags", "critical_values"])
kpss_results = KPSSResults(*tsa.kpss(data, nlags='auto'))
print("KPSS results:\n", kpss_results)

auto_fit = False
if auto_fit:
    arima = auto_arima(train, stepwise=True, trace=1, seasonal=False)
    print(arima.summary())
else:
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        arima = ARIMA(order=(4, 1, 4), seasonal_order=None)
        arima.fit(train)

# Diagnostics plot
arima.plot_diagnostics(lags=50)
plt.gcf().suptitle('Diagnostics Plot', fontsize=14)

# !! not necessary !! Everything already plotted
# Plot Residuals and fitted values
# plt.figure()
# fitted_values = arima.predict_in_sample()
# plt.plot(df.index[:train_len - 1], fitted_values,
#          color='C0', label="Fitted values")
# plt.plot(pd.to_datetime(df.index), data, color='C1', label="Data")
# plt.plot(df.index[:train_len - 1], arima.resid(),
#          color='C2', label="Residuals")
Пример #20
0
        # Show we get an OSError now
        with pytest.raises(OSError) as ose:
            joblib.load(pkl_file)
        assert "Does it still" in str(ose), ose

    finally:
        _unlink_if_exists(pkl_file)
        _unlink_if_exists(new_loc)


@pytest.mark.parametrize(
    'model',
    [
        # ARMA
        ARIMA(order=(1, 0, 0)),

        # ARIMA
        ARIMA(order=(1, 1, 2)),

        # SARIMAX
        ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12))
    ])
def test_issue_104(model):
    # Issue 104 shows that observations were not being updated appropriately.
    # We need to make sure they update for ALL models (ARMA, ARIMA, SARIMAX)
    endog = wineind
    train, test = endog[:125], endog[125:]

    model.fit(train)
    preds1 = model.predict(n_periods=100)
Пример #21
0
def model_plot(days):
    days = int(days)
    pd.plotting.register_matplotlib_converters()

    df = pd.read_csv('data/new_york.csv')
    df['Date'] = pd.to_datetime(df['Date'])

    #converting data to daily usage.
    df.index = df.Date
    df = df.drop('Date', axis=1)
    # resample the dataframe every 1 day (D) and sum ovr each day
    df = df.resample('D').sum()
    df = df.tz_localize(None)

    nyc_weather = pd.read_csv('data/weather/weatherNY.csv')
    nyc_weather['DATE'] = pd.to_datetime(nyc_weather['DATE'])
    nyc_weather = nyc_weather.set_index('DATE')
    nyc_weather.drop(['NAME','STATION'],axis=1,inplace=True)
    nyc_weather = nyc_weather['2015-07-01':'2020-08-10']

    df = df[:'2020-08-10']

    #trying 1 day increments with EXOG. MAYBE BEST CANDIDATE? with fourier terms june to june as 638 and august to august 516
    day = days
    real_values = []
    predictions = []

    df1 = df["2016":"2019"]
    nyc_weather = nyc_weather["2016":"2019"]

    y = df1.Consumption

    exog = pd.DataFrame({'date': y.index})
    exog = exog.set_index(pd.PeriodIndex(exog['date'], freq='D'))
    exog['is_weekend'] = np.where(exog.index.dayofweek < 5,0,1)

    #add weather data
    exog['TMIN'] = nyc_weather['TMIN'].values
    exog['sin1'] = np.sin(2 * np.pi * exog.index.dayofyear / 638)
    exog['cos1'] = np.cos(2 * np.pi * exog.index.dayofyear / 638)
    exog['sin2'] = np.sin(4 * np.pi * exog.index.dayofyear /638)
    exog['cos2'] = np.cos(4 * np.pi * exog.index.dayofyear /638)
    exog['sin3'] = np.sin(2 * np.pi * exog.index.dayofyear / 516)
    exog['cos3'] = np.cos(2 * np.pi * exog.index.dayofyear / 516)
    exog['sin4'] = np.sin(4 * np.pi * exog.index.dayofyear /516)
    exog['cos4'] = np.cos(4 * np.pi * exog.index.dayofyear /516)



    exog = exog.drop(columns=['date'])

    num_to_update = 0
    y_to_train = y.iloc[:(len(y)-100)]    
    exog_to_train = exog.iloc[:(len(y)-100)]

    dates = []

    steps = []

    for i in range(5):

        #first iteration train the model
        if i == 0:
            arima_exog_model = ARIMA(order=(3, 0, 1), seasonal_order=(2, 0, 0, 7),exogenous=exog_to_train, error_action='ignore',
                                    initialization='approximate_diffuse', suppress_warnings=True).fit(y=y_to_train)  

            preds = arima_exog_model.predict_in_sample(exog_to_train)            
            #first prediction
            y_to_test = y.iloc[(len(y)-100):(len(y)-100+day)]
            y_exog_to_test = exog.iloc[(len(y)-100):(len(y)-100+day)]
            y_arima_exog_forecast = arima_exog_model.predict(n_periods=day, exogenous=y_exog_to_test)
            
            real_values.append(y_to_test.values)
            predictions.append(y_arima_exog_forecast.tolist())
            
            dates.append(y_to_test.index)
            steps.append(y_to_test.index[-1])
                                                    
            #y_arima_exog_forecast = arima_exog_model.predict(n_periods=2, exogenous=exog_to_test)
        else:
            y_to_update = y.iloc[(len(y)-100+num_to_update):(len(y)-100+num_to_update)+day]
            exog_to_update = exog.iloc[(len(y)-100+num_to_update):(len(y)-100+num_to_update)+day]

            #to test
            to_test = y.iloc[(len(y)-100+num_to_update)+day:(len(y)-100+num_to_update)+(day*2)]
            exog_to_test = exog.iloc[(len(y)-100+num_to_update)+day:(len(y)-100+num_to_update)+(day*2)]
            #update the model

            arima_exog_model.update(y_to_update,exogenous=exog_to_update)
            y_arima_exog_forecast = arima_exog_model.predict(n_periods=day, exogenous=exog_to_test)

            dates.append(to_test.index)
            steps.append(to_test.index[-1])

            predictions.append(y_arima_exog_forecast.tolist())    
            real_values.append(to_test.values)
            
            num_to_update += day


    predict =  [item for sublist in predictions for item in sublist]
    true = [item for sublist in real_values for item in sublist]
    dates = [item for sublist in dates for item in sublist]

    #for viz purposes
    y_to_train2 = y_to_train[-200:]
    preds = preds[-200:]
    y_to_train2 = y_to_train2.to_frame()
    fig = go.Figure()
    # Create and style traces
    fig.add_trace(go.Scatter(x=y_to_train2.index, y=y_to_train2.Consumption, name='True values',
                            line=dict(color='firebrick', width=4,dash='dot')))

    fig.add_trace(go.Scatter(x=y_to_train2.index, y=preds[-200:], name='In-sample Prediction',
                            line=dict(color='royalblue', width=4)))

    fig.add_trace(go.Scatter(x=dates, y=predict, name='Prediction',
                            line=dict(color='green', width=4)))

    fig.add_trace(go.Scatter(x=dates, y=true, name='True',
                            line=dict(color='firebrick', width=4,dash='dot')))

    fig.update_layout(title='Electricity Consumption in New York',
                    xaxis_title='Date',
                    yaxis_title='Consumption',
                    xaxis_showgrid=True,
                    yaxis_showgrid=True,
                    #autosize=False,
                    #width=500,
                    #height=500,
                    paper_bgcolor=app_colors['background'], 
                    plot_bgcolor=app_colors['background'])


    return fig 
Пример #22
0
if args.fit or args.auto_fit:
    if args.auto_fit:
        arima = auto_arima(
            train,
            stepwise=True,
            trace=1,
            m=args.period,
            information_criterion="aicc",
            seasonal=args.seasonal,
            error_action="ignore",
            suppress_warnings=True,
        )
    elif args.fit:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            arima = ARIMA(order=args.order,
                          seasonal_order=args.seasonal_order)
            arima.fit(train)

    print(arima.summary())

    residuals = arima.resid()
    print("train lengths: data={} resid={}".format(
        train_len, residuals.shape[0]))
    len_delta = train_len - residuals.shape[0]

    # Diagnostics plot
    arima.plot_diagnostics(lags=50)
    box_ljung(residuals, nlags=20).format()
    plt.gcf().suptitle('Diagnostics Plot')
    plt.figure()
    plt.plot(df.value.index[len_delta:train_len],
Пример #23
0
    _check_scoring, cross_validate
from pmdarima.datasets import load_wineind
import pytest
import numpy as np
from unittest import mock

y = load_wineind()
exogenous = np.random.RandomState(1).rand(y.shape[0], 2)


@pytest.mark.parametrize('cv', [
    SlidingWindowForecastCV(window_size=100, step=24, h=1),
    RollingForecastCV(initial=150, step=12, h=1),
])
@pytest.mark.parametrize('est', [
    ARIMA(order=(2, 1, 1), seasonal_order=(0, 0, 0, 1)),
    ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12)),
    Pipeline([("fourier", FourierFeaturizer(m=12)),
              ("arima", ARIMA(order=(2, 1, 0), maxiter=3))])
])
@pytest.mark.parametrize('verbose', [0, 2, 4])
@pytest.mark.parametrize('exog', [None, exogenous])
def test_cv_scores(cv, est, verbose, exog):
    scores = cross_val_score(est,
                             y,
                             exogenous=exog,
                             scoring='mean_squared_error',
                             cv=cv,
                             verbose=verbose)
    assert isinstance(scores, np.ndarray)
Пример #24
0
    }

    with pytest.raises(ValueError) as ve:
        pipeline.predict(3, **kwargs)
    assert "'n_periods'" in pytest_error_str(ve)

    # Assert that we can update the model
    pipeline.update(test, maxiter=5)

    # And that the fourier transformer was updated properly...
    assert pipeline.steps_[0][1].n_ == wineind.shape[0]


@pytest.mark.parametrize('pipeline', [
    Pipeline([
        ("arma", ARIMA(order=(2, 0, 0)))
    ]),

    Pipeline([
        ("arima", ARIMA(order=(2, 1, 0)))
    ]),

    Pipeline([
        ("sarimax", ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)))
    ]),

    Pipeline([
        ("fourier", FourierFeaturizer(m=12)),
        ("arma", ARIMA(order=(2, 0, 0)))
    ]),
Пример #25
0
def test_not_fitted_error():
    with pytest.raises(sk.NotFittedError) as nfe:
        mod = ARIMA((0, 1, 0))
        sk.check_is_fitted(mod, "arima_res_")
    assert "Model has not been fit!" in pytest_error_str(nfe)
Пример #26
0
def predict_arima(df):

    time_in=current_milli_time()
    try:
        forecast_in = open("forecast.pickle","rb")
        future_forecast = pickle.load(forecast_in)
        forecast_in.append(df)
        error=[]
        """
        Calculate errors
        """
        if len(df) < len(future_forecast):
            error=df["memory_used"] - future_forecast[:len(df)]["memory_used"]
        elif len(df) > len(future_forecast):
            error=df[0:len(future_forecast)]["memory_used"]- future_forecast["memory_used"]
        else:
            error=df["memory_used"]-future_forecast["memory_used"]
        overestimation=[x for x in error if x<0]
        overestimation=sum(overestimation)/len(overestimation)
        underestimation=[x for x in error if x>=0]
        underestimation=sum(underestimation)/len(underestimation)
        print("UNDERESTIMATION ERROR: "+underestimation)
        print("OVERESTIMATION ERROR: "+overestimation)
        print("Mean Absolute Error in Last iteration "+str(error))
        """
        Overestimation & Underestimation errors
        """



    except Exception as e:
        print("RMSE To be computed")
        # Do Nothing
  
    try:
        pm.plot_pacf(df,show=False).savefig('pacf.png')
        pm.plot_acf(df,show=False).savefig('acf.png')
    except:
        print("Data points insufficient for ACF & PACF")


    try:
        pickle_in = open("arima.pickle","rb")
        arima_data = pickle.load(pickle_in)
        arima_data.append(df)
        #df=arima_data
    except Exception as e:
        arima_data_out = open("arima.pickle","wb")    
        pickle.dump([], arima_data_out)
    arima_data_out = open("arima.pickle","wb")
    pickle.dump(df, arima_data_out)
    arima_data_out.close()
    
    '''
    tests 
    '''
    nd=1
    nsd=1
    try:
        adf_test=ADFTest(alpha=0.05)
        p_val, should_diff = adf_test.is_stationary(df["memory_used"])    

        nd = ndiffs(df, test='adf')
        logging.info(nd)
        nsd = nsdiffs(df,12)
        logging.info(nd)
    except:
        nd=1
        print("Exception on tests")

    ch_test=CHTest(12)
    
    try:
        nsd=ch_test.estimate_seasonal_differencing_term(df)
    except Exception as e:
        print(e)
        logging.error(e)
    

    '''
        ARIMA MODEL
    '''

    '''
        Find p,q dynamically
    '''
    acf_lags=acf(df["memory_used"])
    acf_lags_threshold=[x for x in acf_lags if x>=getThreshold()]
    p=len(acf_lags_threshold) if len(acf_lags_threshold)<=4 else 4

    pacf_lags=pacf(df["memory_used"])
    pacf_lags_threshold=[x for x in pacf_lags if x>=getThreshold()]
    q=len(pacf_lags_threshold) if len(pacf_lags_threshold)<=1 else 1
    d=nd

    train, test = train_test_split(df,shuffle=False, test_size=0.3)

    # If data is seasonal set the values of P,D,Q in seasonal order
    stepwise_model = ARIMA(
        order=(p,d,q),
        seasonal_order=(0,nsd,0,12),
        suppress_warnings=True,
        scoring='mse'
    )
    x=str(p)+" "+str(nd)+" "+str(q)
    print("Model with p="+str(q)+" d="+str(d)+" q="+str(q))

    try:

        stepwise_model.fit(df)
        """ 
          Vary the periods as per the forecasting window 
          n_periods= 30 = 5mins
          n_periods= 60 = 10mins
          n_periods= 90 = 15mins
        """
        future_forecast = stepwise_model.predict(n_periods=len(test))
        future_forecast = pd.DataFrame(future_forecast,index=test.index,columns=["prediction"])

        res=pd.concat([df,future_forecast],axis=1)

        '''
            Save Forecast in Pickle 
        '''
        forecast_out = open("forecast.pickle","wb")
        pickle.dump(future_forecast,forecast_out)
        forecast_out.close()
        
        trace1 = go.Scatter(x=res.index, y=res["prediction"],name="Prediction", mode='lines')
        trace2 = go.Scatter(x=df.index, y=df["memory_used"],name="DF data", mode='lines')
        data=[trace1,trace2]
        layout = go.Layout(
            title=x
        )
        fig = go.Figure(data=data, layout=layout)
        plot(fig, filename="prediction")
        print("Current values")
        print(df)
        print("Predicted Data Points")
        print(future_forecast)
        time_out=current_milli_time()
        print("TIME for RNN(ms):"+str(time_out-time_in))
        return future_forecast
    except Exception as e:
        time_out=current_milli_time()
        print("TIME for RNN(ms):"+str(time_out-time_in))
        print(e)
        return None
Пример #27
0
from pmdarima.datasets import load_airpassengers
import pytest
import numpy as np
from unittest import mock

y = load_airpassengers()
exogenous = np.random.RandomState(1).rand(y.shape[0], 2)


@pytest.mark.parametrize('cv', [
    SlidingWindowForecastCV(window_size=100, step=24, h=1),
    RollingForecastCV(initial=120, step=12, h=1),
])
@pytest.mark.parametrize(
    'est', [
        ARIMA(order=(2, 1, 1), maxiter=2, simple_differencing=True),
        ARIMA(order=(1, 1, 2),
              seasonal_order=(0, 1, 1, 12),
              maxiter=2,
              simple_differencing=True,
              suppress_warnings=True),
        Pipeline([
            ("fourier", FourierFeaturizer(m=12)),
            ("arima", ARIMA(order=(2, 1, 0),
                            maxiter=2,
                            simple_differencing=True))
        ])
    ]
)
@pytest.mark.parametrize('verbose', [0, 2, 4])
@pytest.mark.parametrize('X', [None, exogenous])
Пример #28
0
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139]])

    _, intervals = arma.predict(n_periods=10, return_conf_int=True, alpha=0.05)
    assert_array_almost_equal(intervals, expected_intervals)


@pytest.mark.parametrize(
    # will be m - d
    'model, expected_m_dim',
    [
        pytest.param(ARIMA(order=(2, 0, 0)), wineind.shape[0]),  # arma
        pytest.param(ARIMA(order=(2, 1, 0)), wineind.shape[0] - 1),  # arima
        pytest.param(ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)),
                     wineind.shape[0]),  # sarimax
    ])
def test_predict_in_sample_conf_int(model, expected_m_dim):
    model.fit(wineind)
    preds, confints = model.predict_in_sample(return_conf_int=True, alpha=0.05)
    assert preds.shape[0] == expected_m_dim
    assert confints.shape == (expected_m_dim, 2)


@pytest.mark.parametrize(
    'model',
    [
        ARIMA(order=(2, 0, 0)),  # arma
Пример #29
0
    def set_ylim(self, mn, mx):
        self.mn = mn
        self.mx = mx


def mock_qqplot(resid, line, ax):
    ax.qqplot_called = True


def mock_acf_plot(resid, ax, lags):
    ax.acfplot_called = True


@pytest.mark.parametrize('model_type,model', [
    pytest.param('arma', ARIMA(order=(1, 0, 0), maxiter=50)),
    pytest.param('arima', ARIMA(order=(1, 1, 0), maxiter=50)),
    pytest.param(
        'sarimax',
        ARIMA(order=(1, 1, 0), maxiter=50, seasonal_order=(1, 0, 0, 12)))
])
def test_mock_plot_diagnostics(model_type, model):
    model.fit(lynx)

    with patch('statsmodels.graphics.utils.create_mpl_fig', MockMPLFigure),\
            patch('statsmodels.graphics.gofplots.qqplot', mock_qqplot),\
            patch('statsmodels.graphics.tsaplots.plot_acf', mock_acf_plot):

        diag = model.plot_diagnostics(figsize=(10, 12))

        # Asserting on mock attributes to show that we follow the expected
Пример #30
0
def run():
    symbol = input("Enter ticker symbol: ")

    now = dt.datetime.now()
    timeFinish = now + dt.timedelta(minutes=minutes)

    while (now < timeFinish):
        try:
            now = dt.datetime.now()

            client = Client(environment=PRACTICE,
                            account_id="",
                            access_token=ACCESS_TOKEN)

            json_data = []

            json_data = client.get_instrument_history(instrument=symbol,
                                                      granularity=timeframe,
                                                      candle_format="midpoint",
                                                      count=1440)
            json_data = json_data['candles']
            df = pd.DataFrame(json_data)

            data = df.copy()
            data = data.set_index('time')[['closeMid']]
            data = data.set_index(pd.to_datetime(data.index))
            data.columns = [CLOSE]

            # Rescale data
            lnprice = np.log(data)

            # Create and fit the model
            model_temp = auto_arima(lnprice.values,
                                    start_p=1,
                                    start_q=1,
                                    max_p=1,
                                    max_q=1,
                                    m=4,
                                    start_P=0,
                                    seasonal=False,
                                    d=1,
                                    D=1,
                                    trace=True,
                                    error_action='ignore',
                                    suppress_warnings=True,
                                    stepwise=True)

            model = ARIMA(order=model_temp.order)
            fit = model.fit(lnprice.values)

            # Predict
            future_forecast = fit.predict(n_periods=n_periods_ahead)
            future_forecast = np.exp(future_forecast)

            # Calculations
            lowest = min(future_forecast[0], future_forecast[-1])
            highest = max(future_forecast[0], future_forecast[-1])
            current = data[CLOSE].iloc[-1]
            x = ((future_forecast[0] - future_forecast[-1]) /
                 future_forecast[0]) * 100
            slope = (future_forecast[0] -
                     future_forecast[-1]) / n_periods_ahead
            degree = math.degrees(math.atan(slope))

            # Trending
            if (x > 0):
                trending = "Positivly / Call"
            else:
                trending = "Negativaly / Put"

            # View
            print("==========================")
            print("Current Price: ", current)
            print("Highest price: ", highest)
            print("Lowest Price: ", lowest)
            print("Trending: ", trending)
            print("Degrees: ", degree)
            print("==========================" + "\n")
        except Exception as e:
            print(e)

        time.sleep(SLEEP)

    return 0