def test_airline_allow_multiplicative_trend(): """ Allow multiplicative trend. fit <- ets(AirPassengers, model = "ZZZ", allow.multiplicative.trend = TRUE) components: "M" "M" "M" "TRUE" discrepancy lies in damped (True in R but False in statsmodels) Test failed on linux environment, fixed by fixing pandas==1.1.5 in #581 """ fit_result_R = ["mul", "mul", "mul"] forecaster = AutoETS(auto=True, sp=12, n_jobs=-1, allow_multiplicative_trend=True) forecaster.fit(y) fitted_forecaster = forecaster._fitted_forecaster fit_result = [ fitted_forecaster.error, fitted_forecaster.trend, fitted_forecaster.seasonal, ] assert_array_equal(fit_result_R, fit_result)
def test_inf_ic_false(): forecaster = AutoETS(auto=True, sp=52, n_jobs=-1, ignore_inf_ic=False) forecaster.fit(inf_ic_ts) fitted_forecaster = forecaster._fitted_forecaster # check that all of the information criteria are infinite assert (np.isinf(fitted_forecaster.aic) and np.isinf(fitted_forecaster.bic) and np.isinf(fitted_forecaster.aicc))
def test_multiplex_or_dunder(): """Test that the MultiplexForecaster magic "|" dunder methodbahves as expected. A MultiplexForecaster can be created by using the "|" dunder method on either forecaster or MultiplexForecaster objects. Here we test that it performs as expected on all the use cases, and raises the expected error in some others. """ # test a simple | example with two forecasters: multiplex_two_forecaster = AutoETS() | NaiveForecaster() assert isinstance(multiplex_two_forecaster, MultiplexForecaster) assert len(multiplex_two_forecaster.forecasters) == 2 # now test that | also works on two MultiplexForecasters: multiplex_one = MultiplexForecaster([("arima", AutoARIMA()), ("ets", AutoETS())]) multiplex_two = MultiplexForecaster([("theta", ThetaForecaster()), ("naive", NaiveForecaster())]) multiplex_two_multiplex = multiplex_one | multiplex_two assert isinstance(multiplex_two_multiplex, MultiplexForecaster) assert len(multiplex_two_multiplex.forecasters) == 4 # last we will check 3 forecaster with the same name - should check both that # MultiplexForecaster | forecaster works, and that ensure_unique_names works multiplex_same_name_three_test = (NaiveForecaster(strategy="last") | NaiveForecaster(strategy="mean") | NaiveForecaster(strategy="drift")) assert isinstance(multiplex_same_name_three_test, MultiplexForecaster) assert len(multiplex_same_name_three_test.forecasters) == 3 assert (len( set( multiplex_same_name_three_test._get_estimator_names( multiplex_same_name_three_test.forecasters))) == 3) # test we get a ValueError if we try to | with anything else: with pytest.raises(TypeError): multiplex_one | "this shouldn't work"
def test_inf_ic_true(): """Ignore infinite IC models when ignore_inf_ic is `True`.""" forecaster = AutoETS(auto=True, sp=52, n_jobs=-1, ignore_inf_ic=True) forecaster.fit(inf_ic_ts) fitted_forecaster = forecaster._fitted_forecaster # check that none of the information criteria are infinite assert (np.isfinite(fitted_forecaster.aic) and np.isfinite(fitted_forecaster.bic) and np.isfinite(fitted_forecaster.aicc))
def test_estimator_fh(freqstr): """Test model fitting with anchored frequency.""" train = pd.Series( np.random.uniform(low=2000, high=7000, size=(104, )), index=pd.date_range("2019-01-02", freq=freqstr, periods=104), ) forecaster = AutoETS(auto=True, sp=52, n_jobs=-1, restrict=True) forecaster.fit(train) pred = forecaster.predict(np.arange(1, 27)) expected_fh = ForecastingHorizon(np.arange(1, 27)).to_absolute( train.index[-1]) assert_array_equal(pred.index.to_numpy(), expected_fh.to_numpy())
def test_airline_default(): fit_result_R = ["mul", "add", "mul"] forecaster = AutoETS(auto=True, sp=12, n_jobs=-1) forecaster.fit(y) fitted_forecaster = forecaster._fitted_forecaster fit_result = [ fitted_forecaster.error, fitted_forecaster.trend, fitted_forecaster.seasonal, ] assert_array_equal(fit_result_R, fit_result)
def test_airline_allow_multiplicative_trend(): fit_result_R = ["mul", "mul", "mul"] forecaster = AutoETS(auto=True, sp=12, n_jobs=-1, allow_multiplicative_trend=True) forecaster.fit(y) fitted_forecaster = forecaster._fitted_forecaster fit_result = [ fitted_forecaster.error, fitted_forecaster.trend, fitted_forecaster.seasonal, ] assert_array_equal(fit_result_R, fit_result)
def test_multiplex_with_grid_search(): """Test MultiplexForecaster perfromas as expected with ForecastingGridSearchCV. Because the typical use case of MultiplexForecaster is to use it with the ForecastingGridSearchCV forecaster - here we simply test that the best "selected_forecaster" for MultiplexForecaster found using ForecastingGridSearchCV is the same forecaster we would find if we evaluated all the forecasters in MultiplexForecaster independently. """ y = load_shampoo_sales() forecasters = [ ("ets", AutoETS()), ("naive", NaiveForecaster()), ] multiplex_forecaster = MultiplexForecaster(forecasters=forecasters) forecaster_names = [name for name, _ in forecasters] cv = ExpandingWindowSplitter(start_with_window=True, step_length=12) gscv = ForecastingGridSearchCV( cv=cv, param_grid={"selected_forecaster": forecaster_names}, forecaster=multiplex_forecaster, ) gscv.fit(y) gscv_best_name = gscv.best_forecaster_.selected_forecaster best_name = _score_forecasters(forecasters, cv, y) assert gscv_best_name == best_name
def test_auto_ets(): """Fix bug in 1435. https://github.com/alan-turing-institute/sktime/issues/1435#issue-1000175469 """ freq = "30T" _y = np.arange(50) + np.random.rand(50) + np.sin(np.arange(50) / 4) * 10 t = pd.date_range("2021-09-19", periods=50, freq=freq) y = pd.Series(_y, index=t) y.index = y.index.to_period(freq=freq) forecaster = AutoETS(sp=12, auto=True, n_jobs=-1) forecaster.fit(y) y_pred = forecaster.predict(fh=[1, 2, 3]) pd.testing.assert_index_equal( y_pred.index, pd.date_range("2021-09-19", periods=53, freq=freq)[-3:].to_period(freq=freq), )
def test_airline_default(): """ Default condition. fit <- ets(AirPassengers, model = "ZZZ") components: "M" "A" "M" "TRUE" (error, trend, season, damped) discrepancy lies in damped (True in R but False in statsmodels) """ fit_result_R = ["mul", "add", "mul"] forecaster = AutoETS(auto=True, sp=12, n_jobs=-1) forecaster.fit(y) fitted_forecaster = forecaster._fitted_forecaster fit_result = [ fitted_forecaster.error, fitted_forecaster.trend, fitted_forecaster.seasonal, ] assert_array_equal(fit_result_R, fit_result)
def test_nesting_pipelines(): """Test that nesting of pipelines works.""" from sktime.forecasting.ets import AutoETS from sktime.transformations.series.boxcox import LogTransformer from sktime.transformations.series.compose import OptionalPassthrough from sktime.transformations.series.detrend import Detrender from sktime.utils._testing.scenarios_forecasting import ( ForecasterFitPredictUnivariateWithX, ) pipe = ForecastingPipeline(steps=[ ("logX", OptionalPassthrough(LogTransformer())), ("detrenderX", OptionalPassthrough(Detrender(forecaster=AutoETS()))), ( "etsforecaster", TransformedTargetForecaster(steps=[ ("log", OptionalPassthrough(LogTransformer())), ("autoETS", AutoETS()), ]), ), ]) scenario = ForecasterFitPredictUnivariateWithX() scenario.run(pipe, method_sequence=["fit", "predict"])
ets_frcstr = ExponentialSmoothing(trend='additive', seasonal='additive', sp=12) ets_frcstr.fit(y_train) y_pred = ets_frcstr.predict(fh) plot_series(y_train, y_test, y_pred, labels=['Обучающая', 'т', 'п']) ets_frcstr.get_fitted_params() ets_frcstr.get_params() smape_loss(y_test, y_pred) auto_ets_frr = AutoETS() auto_ets_frr.fit(y_pred) auto_ets_frr.summary() arima_frr = AutoARIMA() arima_frr = ARIMA() forecaster = ARIMA( order=(1, 1, 0), seasonal_order=(0, 1, 0, 12), suppress_warnings=True )
def forecast(data, customer_id, start='2017-01', end='2019-04', model_type='NaiveForecaster', test_size_month=5, model_storage_path=''): """ Main function for build forecasting model on selected customer and time interval, save the model and plotting Parameters ---------- data: pandas DataFrame main dataset with customer_id, product_id and Timestamp customer_id: int start: string start year and month in '2020-01' format end: string end year and month in '2020-01' format *** this month will not be included *** model_type: type of model to use in forecasting select from : ['NaiveForecaster', 'PolynomialTrendForecaster', 'ThetaForecaster', 'KNeighborsRegressor', 'ExponentialSmoothing', 'AutoETS', 'AutoARIMA', 'TBATS', 'BATS', 'EnsembleForecaster'] test_size_month: number of month that will be excluded from end of interval to use as test dataset model_storage_path: string the folder that you want to store saved models Returns ------- sMAPE Loss: print plot: matplotlib figure plot train, test and predicted values """ y_train, y_test = temporal_train_test_split(prepare_data(data, customer_id, start=start, end=end), test_size=test_size_month) fh = ForecastingHorizon(y_test.index, is_relative=False) if model_type == 'NaiveForecaster': forecaster = NaiveForecaster(strategy="last", sp=12) elif model_type == 'PolynomialTrendForecaster': forecaster = PolynomialTrendForecaster(degree=2) elif model_type == 'ThetaForecaster': forecaster = ThetaForecaster(sp=6) elif model_type == 'KNeighborsRegressor': regressor = KNeighborsRegressor(n_neighbors=1) forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=12, strategy="recursive") elif model_type == 'ExponentialSmoothing': forecaster = ExponentialSmoothing(trend="add", seasonal="multiplicative", sp=12) elif model_type == 'AutoETS': forecaster = AutoETS(auto=True, sp=12, n_jobs=-1) elif model_type == 'AutoARIMA': forecaster = AutoARIMA(sp=12, suppress_warnings=True) elif model_type == 'TBATS': forecaster = TBATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'BATS': forecaster = BATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'EnsembleForecaster': forecaster = EnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)), ( "holt", ExponentialSmoothing(trend="add", damped_trend=False, seasonal="multiplicative", sp=12), ), ( "damped", ExponentialSmoothing(trend="add", damped_trend=True, seasonal="multiplicative", sp=12), ), ]) try: forecaster.fit(y_train) except: forecaster.fit(y_train + 1) y_pred = forecaster.predict(fh) dump( forecaster, f'{model_storage_path}/{customer_id}_{model_type}_{start}_{end}_{test_size_month}.model' ) print('sMAPE Loss :', smape_loss(y_pred, y_test)) plot = plot_series(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) return plot