Пример #1
0
def test_pipeline_behavior():
    wineind = load_wineind()
    train, test = wineind[:125], wineind[125:]

    pipeline = Pipeline([
        ("fourier", FourierFeaturizer(m=12)),
        ("arima", AutoARIMA(seasonal=False, stepwise=True,
                            suppress_warnings=True,
                            maxiter=3, error_action='ignore'))
    ])

    # Quick assertions on indexing
    assert len(pipeline) == 2

    pipeline.fit(train)
    preds = pipeline.predict(5)
    assert preds.shape[0] == 5

    assert pipeline._final_estimator.model_.fit_with_exog_

    # Assert that when the n_periods kwarg is set manually and incorrectly for
    # the fourier transformer, we get a ValueError
    kwargs = {
        "fourier__n_periods": 10
    }

    with pytest.raises(ValueError) as ve:
        pipeline.predict(3, **kwargs)
    assert "'n_periods'" in pytest_error_str(ve)

    # Assert that we can update the model
    pipeline.update(test, maxiter=5)

    # And that the fourier transformer was updated properly...
    assert pipeline.steps_[0][1].n_ == wineind.shape[0]
Пример #2
0
    def fit(self, X, y):
        """Transform input data to `pmdarima.arima.ARIMA` required format and fit the model.

        Parameters
        ----------
        X : pandas.DataFrame
            Input features.

        y : array_like, (1d)
            Target vector.

        Returns
        -------
        self
        """
        if X.filter(like="_holiday_").shape[1] > 0:
            X = self._adjust_holidays(X)
        endog, exog = self._transform_data_to_tsmodel_input_format(X, y)
        if self.init_with_autoarima or self.always_search_model:
            autoarima_params = self.autoarima_dict or {}
            found_params = AutoARIMA(**autoarima_params).fit(y=endog, exog=exog).model_.get_params()
            self.set_params(**found_params)
            self.init_with_autoarima = self.always_search_model
        elif self.order is None:
            raise ValueError("Parameter `order` must be set if `init_with_autoarima` is set to False!")
        self.model = self._init_tsmodel(ARIMA)
        self.model.fit(y=endog, exog=exog)
        self.fitted = True
        return self
Пример #3
0
def forecast(us_counties: pd.DataFrame,
             log_metrics: bool,
             hp: dict,
             metric_threshold: int = 5):
    metrics = {}
    growth_rates = {}
    horizon = hp['horizon']
    metric_skip = 0

    for location in tqdm(us_counties['location'].unique(), unit=' counties'):
        if log_metrics:
            if metric_skip == metric_threshold:
                metric_skip = 0
            else:
                metric_skip += 1
                continue
        y = us_counties[us_counties.location ==
                        location].reset_index()['cases']
        if len(y) < horizon:
            continue
        model = AutoARIMA(**hp)
        with warnings.catch_warnings():
            # When there is no cases, it will throw a warning
            warnings.filterwarnings("ignore")
            try:
                if log_metrics:
                    y, yv = train_test_split(y, test_size=horizon)
                model.fit(y)
                predictions = model.predict(n_periods=horizon)
            # Value error very rarely with weird/broken time series data
            except (ValueError, IndexError):
                continue
            if log_metrics:
                metrics[location] = np.mean(
                    np.abs(yv - predictions) /
                    (np.abs(yv) + np.abs(predictions)))
            last_forecast = predictions[len(predictions) - 1]
            todays_cases = y[len(y) - 1]
            # Places with very small amount of cases are hard to predict
            case_handicap = min(1.0, 0.5 + (todays_cases / 120))
            growth = (last_forecast / todays_cases) * case_handicap
            growth_rates[location] = growth

    final_list = [
        i[0]
        for i in sorted(growth_rates.items(), key=lambda i: i[1], reverse=True)
    ]

    def rank_risk(row) -> int:
        case_growth = growth_rates.get(row.location)
        if not case_growth:
            return 1
        return round(max(0, (case_growth - 1) * 100))

    if not log_metrics:
        us_counties['outbreak_risk'] = us_counties.apply(rank_risk, axis=1)

    return us_counties, final_list, metrics
Пример #4
0
    def test_non_transformer_in_steps(self):
        # Will fail since the first stage is not a transformer
        with pytest.raises(TypeError) as ve:
            Pipeline([
                ("stage1", (lambda *args, **kwargs: None)),  # Fail
                ("stage2", AutoARIMA())
            ])

        assert "instances of BaseTransformer" in pytest_error_str(ve)
Пример #5
0
def test_order_does_not_matter_with_date_transformer():
    train_y_dates, test_y_dates, train_X_dates, test_X_dates = \
        train_test_split(y_dates, X_dates, test_size=15)

    pipeline_a = Pipeline([
        ('fourier', FourierFeaturizer(m=3, prefix="FOURIER")),
        ('dates', DateFeaturizer(column_name="date", prefix="DATE")),
        ("arima",
         AutoARIMA(seasonal=False,
                   stepwise=True,
                   suppress_warnings=True,
                   maxiter=3,
                   error_action='ignore'))
    ]).fit(train_y_dates, train_X_dates)
    Xt_a = pipeline_a.transform(exogenous=test_X_dates)
    pred_a = pipeline_a.predict(exogenous=test_X_dates)

    pipeline_b = Pipeline([
        ('dates', DateFeaturizer(column_name="date", prefix="DATE")),
        ('fourier', FourierFeaturizer(m=3, prefix="FOURIER")),
        ("arima",
         AutoARIMA(seasonal=False,
                   stepwise=True,
                   suppress_warnings=True,
                   maxiter=3,
                   error_action='ignore'))
    ]).fit(train_y_dates, train_X_dates)
    Xt_b = pipeline_b.transform(exogenous=test_X_dates)
    pred_b = pipeline_b.predict(exogenous=test_X_dates)

    # dates in A should differ from those in B
    assert pipeline_a.x_feats_[0].startswith("FOURIER")
    assert pipeline_a.x_feats_[-1].startswith("DATE")

    assert pipeline_b.x_feats_[0].startswith("DATE")
    assert pipeline_b.x_feats_[-1].startswith("FOURIER")

    # columns should be identical once ordered appropriately
    assert Xt_a.equals(Xt_b[pipeline_a.x_feats_])

    # forecasts should be identical
    assert_array_almost_equal(pred_a, pred_b, decimal=3)
Пример #6
0
def test_AutoARIMA_class():
    train, test = wineind[:125], wineind[125:]
    mod = AutoARIMA(maxiter=5)
    mod.fit(train)

    endog = mod.model_.arima_res_.data.endog
    assert_array_almost_equal(train, endog)

    # update
    mod.update(test, maxiter=2)
    new_endog = mod.model_.arima_res_.data.endog
    assert_array_almost_equal(wineind, new_endog)
Пример #7
0
def test_issue_30():
    # From the issue:
    vec = np.array([33., 44., 58., 49., 46., 98., 97.])

    arm = AutoARIMA(out_of_sample_size=1, seasonal=False,
                    suppress_warnings=True)
    arm.fit(vec)

    # This is a way to force it:
    ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec)

    # Want to make sure it works with exog arrays as well
    exog = np.random.RandomState(1).rand(vec.shape[0], 2)
    auto_arima(vec, exogenous=exog, out_of_sample_size=1,
               seasonal=False,
               suppress_warnings=True)

    # This is a way to force it:
    ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec, exogenous=exog)
def test_arima_setup(params, X):
    """Checks if parameters are passed to Auto-Arima correctly"""
    coverage = 0.99
    model = AutoArimaEstimator(score_func=mean_squared_error,
                               coverage=coverage,
                               null_model_params=None,
                               **params)

    # set_params must be able to replicate the init
    model2 = AutoArimaEstimator()
    model2.set_params(**dict(score_func=mean_squared_error,
                             coverage=coverage,
                             null_model_params=None,
                             **params))
    assert model2.__dict__ == model.__dict__

    model.fit(X)
    direct_model = AutoARIMA(**params)

    model_params = model.model.__dict__
    direct_model_params = direct_model.__dict__

    assert model_params["start_p"] == direct_model_params["start_p"]
    assert model_params["d"] == direct_model_params["d"]
    assert model_params["start_q"] == direct_model_params["start_q"]
    assert model_params["max_p"] == direct_model_params["max_p"]
    assert model_params["max_d"] == direct_model_params["max_d"]
    assert model_params["max_q"] == direct_model_params["max_q"]
    assert model_params["start_P"] == direct_model_params["start_P"]
    assert model_params["D"] == direct_model_params["D"]
    assert model_params["start_Q"] == direct_model_params["start_Q"]
    assert model_params["max_P"] == direct_model_params["max_P"]
    assert model_params["max_D"] == direct_model_params["max_D"]
    assert model_params["max_Q"] == direct_model_params["max_Q"]
    assert model_params["max_order"] == direct_model_params["max_order"]
    assert model_params["m"] == direct_model_params["m"]
    assert model_params["seasonal"] == direct_model_params["seasonal"]
    assert model_params["stationary"] == direct_model_params["stationary"]
    assert model_params["information_criterion"] == direct_model_params[
        "information_criterion"]
    assert model_params["alpha"] == direct_model_params["alpha"]
    assert model_params["test"] == direct_model_params["test"]
    assert model_params["seasonal_test"] == direct_model_params[
        "seasonal_test"]
    assert model_params["stepwise"] == direct_model_params["stepwise"]
    assert model_params["n_jobs"] == direct_model_params["n_jobs"]
    assert model_params["start_params"] == direct_model_params["start_params"]
    assert model_params["trend"] == direct_model_params["trend"]
    assert model_params["method"] == direct_model_params["method"]
    assert model_params["maxiter"] == direct_model_params["maxiter"]
    assert model_params["offset_test_args"] == direct_model_params[
        "offset_test_args"]
    assert model_params["seasonal_test_args"] == direct_model_params[
        "seasonal_test_args"]
    assert model_params["suppress_warnings"] == direct_model_params[
        "suppress_warnings"]
    assert model_params["error_action"] == direct_model_params["error_action"]
    assert model_params["trace"] == direct_model_params["trace"]
    assert model_params["random"] == direct_model_params["random"]
    assert model_params["random_state"] == direct_model_params["random_state"]
    assert model_params["n_fits"] == direct_model_params["n_fits"]
    assert model_params["out_of_sample_size"] == direct_model_params[
        "out_of_sample_size"]
    assert model_params["scoring"] == direct_model_params["scoring"]
    assert model_params["scoring_args"] == direct_model_params["scoring_args"]
    assert model_params["with_intercept"] == direct_model_params[
        "with_intercept"]
    assert model_params["kwargs"] == direct_model_params["kwargs"]
Пример #9
0
        ]
    )
    def test_bad_last_stage(self, stages):
        # Will fail since the last stage is not an estimator
        with pytest.raises(TypeError) as ve:
            Pipeline(stages)

        assert "Last step of Pipeline should be" in pytest_error_str(ve)


@pytest.mark.parametrize(
    'pipe,kwargs,expected', [
        pytest.param(
            Pipeline([
                ("boxcox", BoxCoxEndogTransformer()),
                ("arima", AutoARIMA())
            ]),
            {},
            {"boxcox": {}, "arima": {}}
        ),

        pytest.param(
            Pipeline([
                ("boxcox", BoxCoxEndogTransformer()),
                ("arima", AutoARIMA())
            ]),
            {"boxcox__lmdba1": 0.001},
            {"boxcox": {"lmdba1": 0.001}, "arima": {}}
        ),
    ]
)
Пример #10
0
            # Two transformers
            [("stage1", BoxCoxEndogTransformer()),
             ("stage2", FourierFeaturizer(m=12))]
        ])
    def test_bad_last_stage(self, stages):
        # Will fail since the last stage is not an estimator
        with pytest.raises(TypeError) as ve:
            Pipeline(stages)

        assert "Last step of Pipeline should be" in pytest_error_str(ve)


@pytest.mark.parametrize('pipe,kwargs,expected', [
    pytest.param(
        Pipeline([("boxcox", BoxCoxEndogTransformer()),
                  ("arima", AutoARIMA())]), {}, {
                      "boxcox": {},
                      "arima": {}
                  }),
    pytest.param(
        Pipeline([("boxcox", BoxCoxEndogTransformer()),
                  ("arima", AutoARIMA())]), {"boxcox__lmdba1": 0.001}, {
                      "boxcox": {
                          "lmdba1": 0.001
                      },
                      "arima": {}
                  }),
])
def test_get_kwargs(pipe, kwargs, expected):
    # Test we get the kwargs we expect
    kw = pipe._get_kwargs(**kwargs)
Пример #11
0
# -*- coding: utf-8 -*-

from sklearn.base import clone
from pmdarima.arima import ARIMA, AutoARIMA
from pmdarima.pipeline import Pipeline
from pmdarima.datasets import load_wineind
from pmdarima.preprocessing import FourierFeaturizer
import pytest

y = load_wineind()


@pytest.mark.parametrize(
    'est', [
        ARIMA(order=(2, 1, 1), seasonal_order=(0, 0, 0, 1)),
        AutoARIMA(seasonal=False, maxiter=3),
        Pipeline([
            ("fourier", FourierFeaturizer(m=12)),
            ("arima", AutoARIMA(seasonal=False, stepwise=True,
                                suppress_warnings=True, d=1, max_p=2, max_q=0,
                                start_q=0, start_p=1,
                                maxiter=3, error_action='ignore'))
        ])
    ]
)
def test_clonable(est):
    # fit it, then clone it
    est.fit(y)
    est2 = clone(est)
    assert isinstance(est2, est.__class__)
    assert est is not est2
Пример #12
0
 def __init__(self, args):
     self.model = AutoARIMA()
     self.seq_len_x = args.seq_len_x
     self.out_seq_len = args.out_seq_len
     self.args = args
Пример #13
0
    def fit(self, X, y=None, time_col=TIME_COL, value_col=VALUE_COL, **fit_params):
        """Fits ``ARIMA`` forecast model.

        Parameters
        ----------
        X : `pandas.DataFrame`
            Input timeseries, with timestamp column,
            value column, and any additional regressors.
            The value column is the response, included in
            X to allow transformation by `sklearn.pipeline.Pipeline`
        y : ignored
            The original timeseries values, ignored.
            (The y for fitting is included in ``X``.)
        time_col : `str`
            Time column name in ``X``
        value_col : `str`
            Value column name in ``X``
        fit_params : `dict`
            additional parameters for null model
        Returns
        -------
        self : self
            Fitted model is stored in ``self.model``.
        """
        X = X.sort_values(by=time_col)
        # fits null model
        super().fit(X, y=y, time_col=time_col, value_col=value_col, **fit_params)

        self.fit_df = X
        # fits AutoArima model
        self.model = AutoARIMA(
            start_p=self.start_p,
            d=self.d,
            start_q=self.start_q,
            max_p=self.max_p,
            max_d=self.max_d,
            max_q=self.max_q,
            start_P=self.start_P,
            D=self.D,
            start_Q=self.start_Q,
            max_P=self.max_P,
            max_D=self.max_D,
            max_Q=self.max_Q,
            max_order=self.max_order,
            m=self.m,
            seasonal=self.seasonal,
            stationary=self.stationary,
            information_criterion=self.information_criterion,
            alpha=self.alpha,
            test=self.test,
            seasonal_test=self.seasonal_test,
            stepwise=self.stepwise,
            n_jobs=self.n_jobs,
            start_params=self.start_params,
            trend=self.trend,
            method=self.method,
            maxiter=self.maxiter,
            offset_test_args=self.offset_test_args,
            seasonal_test_args=self.seasonal_test_args,
            suppress_warnings=self.suppress_warnings,
            error_action=self.error_action,
            trace=self.trace,
            random=self.random,
            random_state=self.random_state,
            n_fits=self.n_fits,
            out_of_sample_size=self.out_of_sample_size,
            scoring=self.scoring,
            scoring_args=self.scoring_args,
            with_intercept=self.with_intercept,
            return_conf_int=self.return_conf_int,
            dynamic=self.dynamic,
            regressor_cols=self.regressor_cols
        )

        # fits auto-arima
        if self.regressor_cols is None:
            reg_df = None
        else:
            reg_df = X[self.regressor_cols]
        self.model.fit(y=X[[value_col]], X=reg_df)

        return self