Exemplo n.º 1
0
def test_AutoARIMA_class():
    train, test = wineind[:125], wineind[125:]
    mod = AutoARIMA(maxiter=5)
    mod.fit(train)

    endog = mod.model_.arima_res_.data.endog
    assert_array_almost_equal(train, endog)

    # update
    mod.update(test, maxiter=2)
    new_endog = mod.model_.arima_res_.data.endog
    assert_array_almost_equal(wineind, new_endog)
Exemplo n.º 2
0
def test_pipeline_behavior():
    pipeline = Pipeline([("fourier", FourierFeaturizer(m=12)),
                         ("boxcox", BoxCoxEndogTransformer()),
                         ("arima",
                          AutoARIMA(seasonal=False,
                                    stepwise=True,
                                    suppress_warnings=True,
                                    d=1,
                                    max_p=2,
                                    max_q=0,
                                    start_q=0,
                                    start_p=1,
                                    maxiter=3,
                                    error_action='ignore'))])

    # Quick assertions on indexing
    assert len(pipeline) == 3

    pipeline.fit(train)
    preds = pipeline.predict(5)
    assert preds.shape[0] == 5

    assert pipeline._final_estimator.model_.fit_with_exog_

    # Assert that when the n_periods kwarg is set manually and incorrectly for
    # the fourier transformer, we get a ValueError
    kwargs = {"fourier__n_periods": 10}

    with pytest.raises(ValueError) as ve:
        pipeline.predict(3, **kwargs)
    assert "'n_periods'" in pytest_error_str(ve)

    # Assert that we can update the model
    pipeline.update(test, maxiter=5)

    # And that the fourier transformer was updated properly...
    assert pipeline.steps_[0][1].n_ == wineind.shape[0]
Exemplo n.º 3
0
class AutoArima:
    def __init__(self, args):
        self.model = AutoARIMA()
        self.seq_len_x = args.seq_len_x
        self.out_seq_len = args.out_seq_len
        self.args = args

    def predict(self, x):
        # input [batch, in_seq_len, n]

        b, seq_x, n = x.shape
        x = np.reshape(x, [-1, seq_x])

        n_samples, _ = x.shape

        xhat = []
        for i in range(n_samples):
            y = self.model.fit_predict(x[i], n_periods=self.out_seq_len)
            xhat.append(y)

        xhat = np.stack(xhat, axis=-1)
        xhat = np.reshape(xhat, (b, self.out_seq_len, n))

        return xhat  # (b, out_len, n)
def test_arima_setup(params, X):
    """Checks if parameters are passed to Auto-Arima correctly"""
    coverage = 0.99
    model = AutoArimaEstimator(score_func=mean_squared_error,
                               coverage=coverage,
                               null_model_params=None,
                               **params)

    # set_params must be able to replicate the init
    model2 = AutoArimaEstimator()
    model2.set_params(**dict(score_func=mean_squared_error,
                             coverage=coverage,
                             null_model_params=None,
                             **params))
    assert model2.__dict__ == model.__dict__

    model.fit(X)
    direct_model = AutoARIMA(**params)

    model_params = model.model.__dict__
    direct_model_params = direct_model.__dict__

    assert model_params["start_p"] == direct_model_params["start_p"]
    assert model_params["d"] == direct_model_params["d"]
    assert model_params["start_q"] == direct_model_params["start_q"]
    assert model_params["max_p"] == direct_model_params["max_p"]
    assert model_params["max_d"] == direct_model_params["max_d"]
    assert model_params["max_q"] == direct_model_params["max_q"]
    assert model_params["start_P"] == direct_model_params["start_P"]
    assert model_params["D"] == direct_model_params["D"]
    assert model_params["start_Q"] == direct_model_params["start_Q"]
    assert model_params["max_P"] == direct_model_params["max_P"]
    assert model_params["max_D"] == direct_model_params["max_D"]
    assert model_params["max_Q"] == direct_model_params["max_Q"]
    assert model_params["max_order"] == direct_model_params["max_order"]
    assert model_params["m"] == direct_model_params["m"]
    assert model_params["seasonal"] == direct_model_params["seasonal"]
    assert model_params["stationary"] == direct_model_params["stationary"]
    assert model_params["information_criterion"] == direct_model_params[
        "information_criterion"]
    assert model_params["alpha"] == direct_model_params["alpha"]
    assert model_params["test"] == direct_model_params["test"]
    assert model_params["seasonal_test"] == direct_model_params[
        "seasonal_test"]
    assert model_params["stepwise"] == direct_model_params["stepwise"]
    assert model_params["n_jobs"] == direct_model_params["n_jobs"]
    assert model_params["start_params"] == direct_model_params["start_params"]
    assert model_params["trend"] == direct_model_params["trend"]
    assert model_params["method"] == direct_model_params["method"]
    assert model_params["maxiter"] == direct_model_params["maxiter"]
    assert model_params["offset_test_args"] == direct_model_params[
        "offset_test_args"]
    assert model_params["seasonal_test_args"] == direct_model_params[
        "seasonal_test_args"]
    assert model_params["suppress_warnings"] == direct_model_params[
        "suppress_warnings"]
    assert model_params["error_action"] == direct_model_params["error_action"]
    assert model_params["trace"] == direct_model_params["trace"]
    assert model_params["random"] == direct_model_params["random"]
    assert model_params["random_state"] == direct_model_params["random_state"]
    assert model_params["n_fits"] == direct_model_params["n_fits"]
    assert model_params["out_of_sample_size"] == direct_model_params[
        "out_of_sample_size"]
    assert model_params["scoring"] == direct_model_params["scoring"]
    assert model_params["scoring_args"] == direct_model_params["scoring_args"]
    assert model_params["with_intercept"] == direct_model_params[
        "with_intercept"]
    assert model_params["kwargs"] == direct_model_params["kwargs"]
Exemplo n.º 5
0
        ]
    )
    def test_bad_last_stage(self, stages):
        # Will fail since the last stage is not an estimator
        with pytest.raises(TypeError) as ve:
            Pipeline(stages)

        assert "Last step of Pipeline should be" in pytest_error_str(ve)


@pytest.mark.parametrize(
    'pipe,kwargs,expected', [
        pytest.param(
            Pipeline([
                ("boxcox", BoxCoxEndogTransformer()),
                ("arima", AutoARIMA())
            ]),
            {},
            {"boxcox": {}, "arima": {}}
        ),

        pytest.param(
            Pipeline([
                ("boxcox", BoxCoxEndogTransformer()),
                ("arima", AutoARIMA())
            ]),
            {"boxcox__lmdba1": 0.001},
            {"boxcox": {"lmdba1": 0.001}, "arima": {}}
        ),
    ]
)
Exemplo n.º 6
0
            # Two transformers
            [("stage1", BoxCoxEndogTransformer()),
             ("stage2", FourierFeaturizer(m=12))]
        ])
    def test_bad_last_stage(self, stages):
        # Will fail since the last stage is not an estimator
        with pytest.raises(TypeError) as ve:
            Pipeline(stages)

        assert "Last step of Pipeline should be" in pytest_error_str(ve)


@pytest.mark.parametrize('pipe,kwargs,expected', [
    pytest.param(
        Pipeline([("boxcox", BoxCoxEndogTransformer()),
                  ("arima", AutoARIMA())]), {}, {
                      "boxcox": {},
                      "arima": {}
                  }),
    pytest.param(
        Pipeline([("boxcox", BoxCoxEndogTransformer()),
                  ("arima", AutoARIMA())]), {"boxcox__lmdba1": 0.001}, {
                      "boxcox": {
                          "lmdba1": 0.001
                      },
                      "arima": {}
                  }),
])
def test_get_kwargs(pipe, kwargs, expected):
    # Test we get the kwargs we expect
    kw = pipe._get_kwargs(**kwargs)
Exemplo n.º 7
0
# -*- coding: utf-8 -*-

from sklearn.base import clone
from pmdarima.arima import ARIMA, AutoARIMA
from pmdarima.pipeline import Pipeline
from pmdarima.datasets import load_wineind
from pmdarima.preprocessing import FourierFeaturizer
import pytest

y = load_wineind()


@pytest.mark.parametrize(
    'est', [
        ARIMA(order=(2, 1, 1), seasonal_order=(0, 0, 0, 1)),
        AutoARIMA(seasonal=False, maxiter=3),
        Pipeline([
            ("fourier", FourierFeaturizer(m=12)),
            ("arima", AutoARIMA(seasonal=False, stepwise=True,
                                suppress_warnings=True, d=1, max_p=2, max_q=0,
                                start_q=0, start_p=1,
                                maxiter=3, error_action='ignore'))
        ])
    ]
)
def test_clonable(est):
    # fit it, then clone it
    est.fit(y)
    est2 = clone(est)
    assert isinstance(est2, est.__class__)
    assert est is not est2
Exemplo n.º 8
0
 def __init__(self, args):
     self.model = AutoARIMA()
     self.seq_len_x = args.seq_len_x
     self.out_seq_len = args.out_seq_len
     self.args = args
Exemplo n.º 9
0
class AutoArimaEstimator(BaseForecastEstimator):
    """Wrapper for ``pmdarima.arima.AutoARIMA``.
    It currently does not handle the regressor issue when there is
    gap between train and predict periods.

    Parameters
    ----------
    score_func : callable
        see ``BaseForecastEstimator``.
    coverage : float between [0.0, 1.0]
        see ``BaseForecastEstimator``.
    null_model_params : dict with arguments to define DummyRegressor null model, optional, default=None
        see ``BaseForecastEstimator``.
    regressor_cols: `list` [`str`], optional, default None
        A list of regressor columns used during training and prediction.
        If None, no regressor columns are used.

    See ``AutoArima`` documentation for rest of the parameter descriptions:

            * https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.AutoARIMA.html#pmdarima.arima.AutoARIMA

    Attributes
    ----------
    model : ``AutoArima`` object
        Auto arima model object
    fit_df : `pandas.DataFrame` or None
        The training data used to fit the model.
    forecast : `pandas.DataFrame`
        Output of the predict method of ``AutoArima``.
    """
    def __init__(
            self,
            # Null model parameters
            score_func: callable = mean_squared_error,
            coverage: float = 0.90,
            null_model_params: Optional[Dict] = None,
            # Additional parameters
            regressor_cols: Optional[List[str]] = None,
            freq: Optional[float] = None,
            # pmdarima fit parameters
            start_p: Optional[int] = 2,
            d: Optional[int] = None,
            start_q: Optional[int] = 2,
            max_p: Optional[int] = 5,
            max_d: Optional[int] = 2,
            max_q: Optional[int] = 5,
            start_P: Optional[int] = 1,
            D: Optional[int] = None,
            start_Q: Optional[int] = 1,
            max_P: Optional[int] = 2,
            max_D: Optional[int] = 1,
            max_Q: Optional[int] = 2,
            max_order: Optional[int] = 5,
            m: Optional[int] = 1,
            seasonal: Optional[bool] = True,
            stationary: Optional[bool] = False,
            information_criterion: Optional[str] = 'aic',
            alpha: Optional[int] = 0.05,
            test: Optional[str] = 'kpss',
            seasonal_test: Optional[str] = 'ocsb',
            stepwise: Optional[bool] = True,
            n_jobs: Optional[int] = 1,
            start_params: Optional[Dict] = None,
            trend: Optional[str] = None,
            method: Optional[str] = 'lbfgs',
            maxiter: Optional[int] = 50,
            offset_test_args: Optional[Dict] = None,
            seasonal_test_args: Optional[Dict] = None,
            suppress_warnings: Optional[bool] = True,
            error_action: Optional[str] = 'trace',
            trace: Optional[Union[int, bool]] = False,
            random: Optional[bool] = False,
            random_state: Optional[Union[int, callable]] = None,
            n_fits: Optional[int] = 10,
            out_of_sample_size: Optional[int] = 0,
            scoring: Optional[str] = 'mse',
            scoring_args: Optional[Dict] = None,
            with_intercept: Optional[Union[bool, str]] = "auto",
            # pmdarima predict parameters
            return_conf_int: Optional[bool] = True,
            dynamic: Optional[bool] = False):
        # Every subclass of BaseForecastEstimator must call super().__init__
        super().__init__(
            score_func=score_func,
            coverage=coverage,
            null_model_params=null_model_params)
        self.regressor_cols = regressor_cols
        self.freq = freq
        self.start_p = start_p
        self.d = d
        self.start_q = start_q
        self.max_p = max_p
        self.max_d = max_d
        self.max_q = max_q
        self.start_P = start_P
        self.D = D
        self.start_Q = start_Q
        self.max_P = max_P
        self.max_D = max_D
        self.max_Q = max_Q
        self.max_order = max_order
        self.m = m
        self.seasonal = seasonal
        self.stationary = stationary
        self.information_criterion = information_criterion
        self.alpha = alpha
        self.test = test
        self.seasonal_test = seasonal_test
        self.stepwise = stepwise
        self.n_jobs = n_jobs
        self.start_params = start_params
        self.trend = trend
        self.method = method
        self.maxiter = maxiter
        self.offset_test_args = offset_test_args
        self.seasonal_test_args = seasonal_test_args
        self.suppress_warnings = suppress_warnings
        self.error_action = error_action
        self.trace = trace
        self.random = random
        self.random_state = random_state
        self.n_fits = n_fits
        self.out_of_sample_size = out_of_sample_size
        self.scoring = scoring
        self.scoring_args = scoring_args
        self.with_intercept = with_intercept
        self.return_conf_int = return_conf_int
        self.coverage = coverage
        self.dynamic = dynamic

        # set by the fit method
        self.model = None
        self.fit_df = None
        # set by the predict method
        self.forecast = None

    def fit(self, X, y=None, time_col=TIME_COL, value_col=VALUE_COL, **fit_params):
        """Fits ``ARIMA`` forecast model.

        Parameters
        ----------
        X : `pandas.DataFrame`
            Input timeseries, with timestamp column,
            value column, and any additional regressors.
            The value column is the response, included in
            X to allow transformation by `sklearn.pipeline.Pipeline`
        y : ignored
            The original timeseries values, ignored.
            (The y for fitting is included in ``X``.)
        time_col : `str`
            Time column name in ``X``
        value_col : `str`
            Value column name in ``X``
        fit_params : `dict`
            additional parameters for null model
        Returns
        -------
        self : self
            Fitted model is stored in ``self.model``.
        """
        X = X.sort_values(by=time_col)
        # fits null model
        super().fit(X, y=y, time_col=time_col, value_col=value_col, **fit_params)

        self.fit_df = X
        # fits AutoArima model
        self.model = AutoARIMA(
            start_p=self.start_p,
            d=self.d,
            start_q=self.start_q,
            max_p=self.max_p,
            max_d=self.max_d,
            max_q=self.max_q,
            start_P=self.start_P,
            D=self.D,
            start_Q=self.start_Q,
            max_P=self.max_P,
            max_D=self.max_D,
            max_Q=self.max_Q,
            max_order=self.max_order,
            m=self.m,
            seasonal=self.seasonal,
            stationary=self.stationary,
            information_criterion=self.information_criterion,
            alpha=self.alpha,
            test=self.test,
            seasonal_test=self.seasonal_test,
            stepwise=self.stepwise,
            n_jobs=self.n_jobs,
            start_params=self.start_params,
            trend=self.trend,
            method=self.method,
            maxiter=self.maxiter,
            offset_test_args=self.offset_test_args,
            seasonal_test_args=self.seasonal_test_args,
            suppress_warnings=self.suppress_warnings,
            error_action=self.error_action,
            trace=self.trace,
            random=self.random,
            random_state=self.random_state,
            n_fits=self.n_fits,
            out_of_sample_size=self.out_of_sample_size,
            scoring=self.scoring,
            scoring_args=self.scoring_args,
            with_intercept=self.with_intercept,
            return_conf_int=self.return_conf_int,
            dynamic=self.dynamic,
            regressor_cols=self.regressor_cols
        )

        # fits auto-arima
        if self.regressor_cols is None:
            reg_df = None
        else:
            reg_df = X[self.regressor_cols]
        self.model.fit(y=X[[value_col]], X=reg_df)

        return self

    def predict(self, X, y=None):
        """Creates forecast for the dates specified in ``X``.
        Currently does not support the regressor case where there is gap between
        train and predict periods.

        Parameters
        ----------
        X: `pandas.DataFrame`
            Input timeseries with timestamp column and any additional regressors.
            Timestamps are the dates for prediction.
            Value column, if provided in ``X``, is ignored.
        y: ignored.

        Returns
        -------
        predictions: `pandas.DataFrame`
            Forecasted values for the dates in ``X``. Columns:

                - ``TIME_COL``: dates
                - ``PREDICTED_COL``: predictions
                - ``PREDICTED_LOWER_COL``: lower bound of predictions
                - ``PREDICTED_UPPER_COL``: upper bound of predictions
        """
        X = X.sort_values(by=self.time_col_)
        # Returns the cached result if applicable
        cached_predictions = super().predict(X=X)
        if cached_predictions is not None:
            return cached_predictions

        # Currently does not support the regressor case where
        # there is gap between train and predict periods
        if self.regressor_cols is None:
            fut_reg_df = None
        else:
            fut_df = X[X[self.time_col_] > self.fit_df[self.time_col_].iloc[-1]]
            fut_reg_df = fut_df[self.regressor_cols]  # Auto-arima only accepts regressor values beyond `fit_df`

        if self.freq is None:
            self.freq = pd.infer_freq(self.fit_df[self.time_col_])
        if self.freq == "H":
            self.freq = self.freq.lower()  # np.timedelta recognizes lower case letters
        chosen_d = self.model.model_.order[1]  # This is the value of the d chosen by auto-arima
        forecast_start = int((X[self.time_col_].iloc[0] - self.fit_df[self.time_col_].iloc[0])/np.timedelta64(1, self.freq))
        if forecast_start < chosen_d:
            append_length = chosen_d - forecast_start  # Number of NaNs to append to `pred_df`
            forecast_start = chosen_d  # Auto-arima can not predict below the chosen d
        else:
            append_length = 0
        forecast_end = int((X[self.time_col_].iloc[-1] - self.fit_df[self.time_col_].iloc[0])/np.timedelta64(1, self.freq))

        predictions = self.model.predict_in_sample(
            X=fut_reg_df,
            start=forecast_start,
            end=forecast_end,
            dynamic=self.dynamic,
            return_conf_int=self.return_conf_int,
            alpha=(1-self.coverage)
        )

        if append_length > 0:
            pred_df = pd.DataFrame({
                TIME_COL: X[self.time_col_],
                PREDICTED_COL: np.append(np.repeat(np.nan, append_length), predictions[0]),
                PREDICTED_LOWER_COL: np.append(np.repeat(np.nan, append_length), predictions[1][:, 0]),
                PREDICTED_UPPER_COL: np.append(np.repeat(np.nan, append_length), predictions[1][:, 1])
            })
        else:
            pred_df = pd.DataFrame({
                TIME_COL: X[self.time_col_],
                PREDICTED_COL: predictions[0],
                PREDICTED_LOWER_COL: predictions[1][:, 0],
                PREDICTED_UPPER_COL: predictions[1][:, 1]
            })
        self.forecast = pred_df

        # Caches the predictions
        self.cached_predictions_ = pred_df

        return pred_df

    def summary(self):
        BaseForecastEstimator.summary(self)
        # AutoArima summary
        return self.model.summary()
Exemplo n.º 10
0
    def fit(self, X, y=None, time_col=TIME_COL, value_col=VALUE_COL, **fit_params):
        """Fits ``ARIMA`` forecast model.

        Parameters
        ----------
        X : `pandas.DataFrame`
            Input timeseries, with timestamp column,
            value column, and any additional regressors.
            The value column is the response, included in
            X to allow transformation by `sklearn.pipeline.Pipeline`
        y : ignored
            The original timeseries values, ignored.
            (The y for fitting is included in ``X``.)
        time_col : `str`
            Time column name in ``X``
        value_col : `str`
            Value column name in ``X``
        fit_params : `dict`
            additional parameters for null model
        Returns
        -------
        self : self
            Fitted model is stored in ``self.model``.
        """
        X = X.sort_values(by=time_col)
        # fits null model
        super().fit(X, y=y, time_col=time_col, value_col=value_col, **fit_params)

        self.fit_df = X
        # fits AutoArima model
        self.model = AutoARIMA(
            start_p=self.start_p,
            d=self.d,
            start_q=self.start_q,
            max_p=self.max_p,
            max_d=self.max_d,
            max_q=self.max_q,
            start_P=self.start_P,
            D=self.D,
            start_Q=self.start_Q,
            max_P=self.max_P,
            max_D=self.max_D,
            max_Q=self.max_Q,
            max_order=self.max_order,
            m=self.m,
            seasonal=self.seasonal,
            stationary=self.stationary,
            information_criterion=self.information_criterion,
            alpha=self.alpha,
            test=self.test,
            seasonal_test=self.seasonal_test,
            stepwise=self.stepwise,
            n_jobs=self.n_jobs,
            start_params=self.start_params,
            trend=self.trend,
            method=self.method,
            maxiter=self.maxiter,
            offset_test_args=self.offset_test_args,
            seasonal_test_args=self.seasonal_test_args,
            suppress_warnings=self.suppress_warnings,
            error_action=self.error_action,
            trace=self.trace,
            random=self.random,
            random_state=self.random_state,
            n_fits=self.n_fits,
            out_of_sample_size=self.out_of_sample_size,
            scoring=self.scoring,
            scoring_args=self.scoring_args,
            with_intercept=self.with_intercept,
            return_conf_int=self.return_conf_int,
            dynamic=self.dynamic,
            regressor_cols=self.regressor_cols
        )

        # fits auto-arima
        if self.regressor_cols is None:
            reg_df = None
        else:
            reg_df = X[self.regressor_cols]
        self.model.fit(y=X[[value_col]], X=reg_df)

        return self