def test_pipeline_behavior(): wineind = load_wineind() train, test = wineind[:125], wineind[125:] pipeline = Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, maxiter=3, error_action='ignore')) ]) # Quick assertions on indexing assert len(pipeline) == 2 pipeline.fit(train) preds = pipeline.predict(5) assert preds.shape[0] == 5 assert pipeline._final_estimator.model_.fit_with_exog_ # Assert that when the n_periods kwarg is set manually and incorrectly for # the fourier transformer, we get a ValueError kwargs = { "fourier__n_periods": 10 } with pytest.raises(ValueError) as ve: pipeline.predict(3, **kwargs) assert "'n_periods'" in pytest_error_str(ve) # Assert that we can update the model pipeline.update(test, maxiter=5) # And that the fourier transformer was updated properly... assert pipeline.steps_[0][1].n_ == wineind.shape[0]
def fit(self, X, y): """Transform input data to `pmdarima.arima.ARIMA` required format and fit the model. Parameters ---------- X : pandas.DataFrame Input features. y : array_like, (1d) Target vector. Returns ------- self """ if X.filter(like="_holiday_").shape[1] > 0: X = self._adjust_holidays(X) endog, exog = self._transform_data_to_tsmodel_input_format(X, y) if self.init_with_autoarima or self.always_search_model: autoarima_params = self.autoarima_dict or {} found_params = AutoARIMA(**autoarima_params).fit(y=endog, exog=exog).model_.get_params() self.set_params(**found_params) self.init_with_autoarima = self.always_search_model elif self.order is None: raise ValueError("Parameter `order` must be set if `init_with_autoarima` is set to False!") self.model = self._init_tsmodel(ARIMA) self.model.fit(y=endog, exog=exog) self.fitted = True return self
def forecast(us_counties: pd.DataFrame, log_metrics: bool, hp: dict, metric_threshold: int = 5): metrics = {} growth_rates = {} horizon = hp['horizon'] metric_skip = 0 for location in tqdm(us_counties['location'].unique(), unit=' counties'): if log_metrics: if metric_skip == metric_threshold: metric_skip = 0 else: metric_skip += 1 continue y = us_counties[us_counties.location == location].reset_index()['cases'] if len(y) < horizon: continue model = AutoARIMA(**hp) with warnings.catch_warnings(): # When there is no cases, it will throw a warning warnings.filterwarnings("ignore") try: if log_metrics: y, yv = train_test_split(y, test_size=horizon) model.fit(y) predictions = model.predict(n_periods=horizon) # Value error very rarely with weird/broken time series data except (ValueError, IndexError): continue if log_metrics: metrics[location] = np.mean( np.abs(yv - predictions) / (np.abs(yv) + np.abs(predictions))) last_forecast = predictions[len(predictions) - 1] todays_cases = y[len(y) - 1] # Places with very small amount of cases are hard to predict case_handicap = min(1.0, 0.5 + (todays_cases / 120)) growth = (last_forecast / todays_cases) * case_handicap growth_rates[location] = growth final_list = [ i[0] for i in sorted(growth_rates.items(), key=lambda i: i[1], reverse=True) ] def rank_risk(row) -> int: case_growth = growth_rates.get(row.location) if not case_growth: return 1 return round(max(0, (case_growth - 1) * 100)) if not log_metrics: us_counties['outbreak_risk'] = us_counties.apply(rank_risk, axis=1) return us_counties, final_list, metrics
def test_non_transformer_in_steps(self): # Will fail since the first stage is not a transformer with pytest.raises(TypeError) as ve: Pipeline([ ("stage1", (lambda *args, **kwargs: None)), # Fail ("stage2", AutoARIMA()) ]) assert "instances of BaseTransformer" in pytest_error_str(ve)
def test_order_does_not_matter_with_date_transformer(): train_y_dates, test_y_dates, train_X_dates, test_X_dates = \ train_test_split(y_dates, X_dates, test_size=15) pipeline_a = Pipeline([ ('fourier', FourierFeaturizer(m=3, prefix="FOURIER")), ('dates', DateFeaturizer(column_name="date", prefix="DATE")), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, maxiter=3, error_action='ignore')) ]).fit(train_y_dates, train_X_dates) Xt_a = pipeline_a.transform(exogenous=test_X_dates) pred_a = pipeline_a.predict(exogenous=test_X_dates) pipeline_b = Pipeline([ ('dates', DateFeaturizer(column_name="date", prefix="DATE")), ('fourier', FourierFeaturizer(m=3, prefix="FOURIER")), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, maxiter=3, error_action='ignore')) ]).fit(train_y_dates, train_X_dates) Xt_b = pipeline_b.transform(exogenous=test_X_dates) pred_b = pipeline_b.predict(exogenous=test_X_dates) # dates in A should differ from those in B assert pipeline_a.x_feats_[0].startswith("FOURIER") assert pipeline_a.x_feats_[-1].startswith("DATE") assert pipeline_b.x_feats_[0].startswith("DATE") assert pipeline_b.x_feats_[-1].startswith("FOURIER") # columns should be identical once ordered appropriately assert Xt_a.equals(Xt_b[pipeline_a.x_feats_]) # forecasts should be identical assert_array_almost_equal(pred_a, pred_b, decimal=3)
def test_AutoARIMA_class(): train, test = wineind[:125], wineind[125:] mod = AutoARIMA(maxiter=5) mod.fit(train) endog = mod.model_.arima_res_.data.endog assert_array_almost_equal(train, endog) # update mod.update(test, maxiter=2) new_endog = mod.model_.arima_res_.data.endog assert_array_almost_equal(wineind, new_endog)
def test_issue_30(): # From the issue: vec = np.array([33., 44., 58., 49., 46., 98., 97.]) arm = AutoARIMA(out_of_sample_size=1, seasonal=False, suppress_warnings=True) arm.fit(vec) # This is a way to force it: ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec) # Want to make sure it works with exog arrays as well exog = np.random.RandomState(1).rand(vec.shape[0], 2) auto_arima(vec, exogenous=exog, out_of_sample_size=1, seasonal=False, suppress_warnings=True) # This is a way to force it: ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec, exogenous=exog)
def test_arima_setup(params, X): """Checks if parameters are passed to Auto-Arima correctly""" coverage = 0.99 model = AutoArimaEstimator(score_func=mean_squared_error, coverage=coverage, null_model_params=None, **params) # set_params must be able to replicate the init model2 = AutoArimaEstimator() model2.set_params(**dict(score_func=mean_squared_error, coverage=coverage, null_model_params=None, **params)) assert model2.__dict__ == model.__dict__ model.fit(X) direct_model = AutoARIMA(**params) model_params = model.model.__dict__ direct_model_params = direct_model.__dict__ assert model_params["start_p"] == direct_model_params["start_p"] assert model_params["d"] == direct_model_params["d"] assert model_params["start_q"] == direct_model_params["start_q"] assert model_params["max_p"] == direct_model_params["max_p"] assert model_params["max_d"] == direct_model_params["max_d"] assert model_params["max_q"] == direct_model_params["max_q"] assert model_params["start_P"] == direct_model_params["start_P"] assert model_params["D"] == direct_model_params["D"] assert model_params["start_Q"] == direct_model_params["start_Q"] assert model_params["max_P"] == direct_model_params["max_P"] assert model_params["max_D"] == direct_model_params["max_D"] assert model_params["max_Q"] == direct_model_params["max_Q"] assert model_params["max_order"] == direct_model_params["max_order"] assert model_params["m"] == direct_model_params["m"] assert model_params["seasonal"] == direct_model_params["seasonal"] assert model_params["stationary"] == direct_model_params["stationary"] assert model_params["information_criterion"] == direct_model_params[ "information_criterion"] assert model_params["alpha"] == direct_model_params["alpha"] assert model_params["test"] == direct_model_params["test"] assert model_params["seasonal_test"] == direct_model_params[ "seasonal_test"] assert model_params["stepwise"] == direct_model_params["stepwise"] assert model_params["n_jobs"] == direct_model_params["n_jobs"] assert model_params["start_params"] == direct_model_params["start_params"] assert model_params["trend"] == direct_model_params["trend"] assert model_params["method"] == direct_model_params["method"] assert model_params["maxiter"] == direct_model_params["maxiter"] assert model_params["offset_test_args"] == direct_model_params[ "offset_test_args"] assert model_params["seasonal_test_args"] == direct_model_params[ "seasonal_test_args"] assert model_params["suppress_warnings"] == direct_model_params[ "suppress_warnings"] assert model_params["error_action"] == direct_model_params["error_action"] assert model_params["trace"] == direct_model_params["trace"] assert model_params["random"] == direct_model_params["random"] assert model_params["random_state"] == direct_model_params["random_state"] assert model_params["n_fits"] == direct_model_params["n_fits"] assert model_params["out_of_sample_size"] == direct_model_params[ "out_of_sample_size"] assert model_params["scoring"] == direct_model_params["scoring"] assert model_params["scoring_args"] == direct_model_params["scoring_args"] assert model_params["with_intercept"] == direct_model_params[ "with_intercept"] assert model_params["kwargs"] == direct_model_params["kwargs"]
] ) def test_bad_last_stage(self, stages): # Will fail since the last stage is not an estimator with pytest.raises(TypeError) as ve: Pipeline(stages) assert "Last step of Pipeline should be" in pytest_error_str(ve) @pytest.mark.parametrize( 'pipe,kwargs,expected', [ pytest.param( Pipeline([ ("boxcox", BoxCoxEndogTransformer()), ("arima", AutoARIMA()) ]), {}, {"boxcox": {}, "arima": {}} ), pytest.param( Pipeline([ ("boxcox", BoxCoxEndogTransformer()), ("arima", AutoARIMA()) ]), {"boxcox__lmdba1": 0.001}, {"boxcox": {"lmdba1": 0.001}, "arima": {}} ), ] )
# Two transformers [("stage1", BoxCoxEndogTransformer()), ("stage2", FourierFeaturizer(m=12))] ]) def test_bad_last_stage(self, stages): # Will fail since the last stage is not an estimator with pytest.raises(TypeError) as ve: Pipeline(stages) assert "Last step of Pipeline should be" in pytest_error_str(ve) @pytest.mark.parametrize('pipe,kwargs,expected', [ pytest.param( Pipeline([("boxcox", BoxCoxEndogTransformer()), ("arima", AutoARIMA())]), {}, { "boxcox": {}, "arima": {} }), pytest.param( Pipeline([("boxcox", BoxCoxEndogTransformer()), ("arima", AutoARIMA())]), {"boxcox__lmdba1": 0.001}, { "boxcox": { "lmdba1": 0.001 }, "arima": {} }), ]) def test_get_kwargs(pipe, kwargs, expected): # Test we get the kwargs we expect kw = pipe._get_kwargs(**kwargs)
# -*- coding: utf-8 -*- from sklearn.base import clone from pmdarima.arima import ARIMA, AutoARIMA from pmdarima.pipeline import Pipeline from pmdarima.datasets import load_wineind from pmdarima.preprocessing import FourierFeaturizer import pytest y = load_wineind() @pytest.mark.parametrize( 'est', [ ARIMA(order=(2, 1, 1), seasonal_order=(0, 0, 0, 1)), AutoARIMA(seasonal=False, maxiter=3), Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, d=1, max_p=2, max_q=0, start_q=0, start_p=1, maxiter=3, error_action='ignore')) ]) ] ) def test_clonable(est): # fit it, then clone it est.fit(y) est2 = clone(est) assert isinstance(est2, est.__class__) assert est is not est2
def __init__(self, args): self.model = AutoARIMA() self.seq_len_x = args.seq_len_x self.out_seq_len = args.out_seq_len self.args = args
def fit(self, X, y=None, time_col=TIME_COL, value_col=VALUE_COL, **fit_params): """Fits ``ARIMA`` forecast model. Parameters ---------- X : `pandas.DataFrame` Input timeseries, with timestamp column, value column, and any additional regressors. The value column is the response, included in X to allow transformation by `sklearn.pipeline.Pipeline` y : ignored The original timeseries values, ignored. (The y for fitting is included in ``X``.) time_col : `str` Time column name in ``X`` value_col : `str` Value column name in ``X`` fit_params : `dict` additional parameters for null model Returns ------- self : self Fitted model is stored in ``self.model``. """ X = X.sort_values(by=time_col) # fits null model super().fit(X, y=y, time_col=time_col, value_col=value_col, **fit_params) self.fit_df = X # fits AutoArima model self.model = AutoARIMA( start_p=self.start_p, d=self.d, start_q=self.start_q, max_p=self.max_p, max_d=self.max_d, max_q=self.max_q, start_P=self.start_P, D=self.D, start_Q=self.start_Q, max_P=self.max_P, max_D=self.max_D, max_Q=self.max_Q, max_order=self.max_order, m=self.m, seasonal=self.seasonal, stationary=self.stationary, information_criterion=self.information_criterion, alpha=self.alpha, test=self.test, seasonal_test=self.seasonal_test, stepwise=self.stepwise, n_jobs=self.n_jobs, start_params=self.start_params, trend=self.trend, method=self.method, maxiter=self.maxiter, offset_test_args=self.offset_test_args, seasonal_test_args=self.seasonal_test_args, suppress_warnings=self.suppress_warnings, error_action=self.error_action, trace=self.trace, random=self.random, random_state=self.random_state, n_fits=self.n_fits, out_of_sample_size=self.out_of_sample_size, scoring=self.scoring, scoring_args=self.scoring_args, with_intercept=self.with_intercept, return_conf_int=self.return_conf_int, dynamic=self.dynamic, regressor_cols=self.regressor_cols ) # fits auto-arima if self.regressor_cols is None: reg_df = None else: reg_df = X[self.regressor_cols] self.model.fit(y=X[[value_col]], X=reg_df) return self