def test_pred_errors_against_y_test(fh): """Check prediction performance on airline dataset. Y_test must lie in the prediction interval with coverage=0.1. Arguments --------- fh: ForecastingHorizon, fh at which to test prediction Raises ------ AssertionError - if point forecasts do not lie withing the prediction intervals """ y = load_airline() y_train, y_test = temporal_train_test_split(y) f = ThetaForecaster() f.fit(y_train, fh=fh) intervals = f.predict_interval(fh=fh, coverage=[0.1]) y_test = y_test.iloc[check_fh(fh) - 1] # Performance should be good enough that all point forecasts lie within the # prediction intervals. for ints in intervals: if ints[1] < 0.5: assert np.all(y_test > intervals[ints].values) else: assert np.all(y_test <= intervals[ints].values)
def forecast(self): from openweatherdata import series_to_list from sktime.forecasting.theta import ThetaForecaster import numpy forecaster = ThetaForecaster(sp=48) forecaster.fit(self.pd_past5days) self.pd_predictions = forecaster.predict(numpy.arange(1, 48)) self.predictions = series_to_list(self.pd_predictions) self.next(self.plot)
def test_forecaster_with_initial_level(): y = np.log1p(load_airline()) y_train, y_test = temporal_train_test_split(y) fh = np.arange(len(y_test)) + 1 f = ThetaForecaster(initial_level=0.1, sp=12) f.fit(y_train) y_pred = f.predict(fh=fh) np.testing.assert_allclose(y_pred, y_test, rtol=0.05)
def construct_M4_forecasters(sp, fh): kwargs = {"model": SEASONAL_MODEL, "sp": sp} if sp > 1 else {} theta_bc = make_pipeline( ConditionalDeseasonalizer(seasonality_test=seasonality_test_R, **kwargs), BoxCoxTransformer(bounds=(0, 1)), ThetaForecaster(deseasonalise=False)) """ MLP = make_pipeline( ConditionalDeseasonalizer(seasonality_test=seasonality_test_Python, **kwargs), Detrender(PolynomialTrendForecaster(degree=1, with_intercept=True)), RecursiveRegressionForecaster( regressor=MLPRegressor(hidden_layer_sizes=6, activation="identity", solver="adam", max_iter=100, learning_rate="adaptive", learning_rate_init=0.001), window_length=3) ) RNN = make_pipeline( ConditionalDeseasonalizer(seasonality_test=seasonality_test_Python, **kwargs), Detrender(PolynomialTrendForecaster(degree=1, with_intercept=True)), RecursiveTimeSeriesRegressionForecaster( regressor=SimpleRNNRegressor(nb_epochs=100), window_length=3) ) """ forecasters = { "Naive": NaiveForecaster(strategy="last"), "sNaive": NaiveForecaster(strategy="seasonal_last", sp=sp), "Naive2": deseasonalise(NaiveForecaster(strategy="last"), **kwargs), "SES": deseasonalise(ses, **kwargs), "Holt": deseasonalise(holt, **kwargs), "Damped": deseasonalise(damped, **kwargs), "Theta": deseasonalise(ThetaForecaster(deseasonalise=False), **kwargs), "ARIMA": AutoARIMA(suppress_warnings=True, error_action="ignore", sp=sp), "Com": deseasonalise( EnsembleForecaster([("ses", ses), ("holt", holt), ("damped", damped)]), **kwargs), # "MLP": MLP, # "RNN": RNN, "260": theta_bc, } return forecasters
def test_predictive_performance_on_airline(): y = np.log1p(load_airline()) y_train, y_test = temporal_train_test_split(y) fh = np.arange(len(y_test)) + 1 f = ThetaForecaster(sp=12) f.fit(y_train) y_pred = f.predict(fh=fh) # Performance on this particular dataset should be reasonably good. np.testing.assert_allclose(y_pred, y_test, rtol=0.05)
def test_multiplex_or_dunder(): """Test that the MultiplexForecaster magic "|" dunder methodbahves as expected. A MultiplexForecaster can be created by using the "|" dunder method on either forecaster or MultiplexForecaster objects. Here we test that it performs as expected on all the use cases, and raises the expected error in some others. """ # test a simple | example with two forecasters: multiplex_two_forecaster = AutoETS() | NaiveForecaster() assert isinstance(multiplex_two_forecaster, MultiplexForecaster) assert len(multiplex_two_forecaster.forecasters) == 2 # now test that | also works on two MultiplexForecasters: multiplex_one = MultiplexForecaster([("arima", AutoARIMA()), ("ets", AutoETS())]) multiplex_two = MultiplexForecaster([("theta", ThetaForecaster()), ("naive", NaiveForecaster())]) multiplex_two_multiplex = multiplex_one | multiplex_two assert isinstance(multiplex_two_multiplex, MultiplexForecaster) assert len(multiplex_two_multiplex.forecasters) == 4 # last we will check 3 forecaster with the same name - should check both that # MultiplexForecaster | forecaster works, and that ensure_unique_names works multiplex_same_name_three_test = (NaiveForecaster(strategy="last") | NaiveForecaster(strategy="mean") | NaiveForecaster(strategy="drift")) assert isinstance(multiplex_same_name_three_test, MultiplexForecaster) assert len(multiplex_same_name_three_test.forecasters) == 3 assert (len( set( multiplex_same_name_three_test._get_estimator_names( multiplex_same_name_three_test.forecasters))) == 3) # test we get a ValueError if we try to | with anything else: with pytest.raises(TypeError): multiplex_one | "this shouldn't work"
def test_multiplex_forecaster_alone(): """Test results of MultiplexForecaster. Because MultiplexForecaster is in many ways a wrapper for an underlying forecaster - we can confirm that if the selected_forecaster is set that the MultiplexForecaster performs as expected. """ from numpy.testing import assert_array_equal y = load_shampoo_sales() # Note - we select two forecasters which are deterministic. forecaster_tuples = [ ("naive", NaiveForecaster()), ("theta", ThetaForecaster()), ] forecaster_names = [name for name, _ in forecaster_tuples] forecasters = [forecaster for _, forecaster in forecaster_tuples] multiplex_forecaster = MultiplexForecaster(forecasters=forecaster_tuples) fh_test = [1, 2, 3] # for each of the forecasters - check that the wrapped forecaster predictions # agree with the unwrapped forecaster predictions! for ind, name in enumerate(forecaster_names): # make a copy to ensure we don't reference the same objectL test_forecaster = clone(forecasters[ind]) test_forecaster.fit(y) multiplex_forecaster.selected_forecaster = name # Note- MultiplexForecaster will make a copy of the forecaster before fitting. multiplex_forecaster.fit(y) y_pred_indiv = test_forecaster.predict(fh=fh_test) y_pred_multi = multiplex_forecaster.predict(fh=fh_test) assert_array_equal(y_pred_indiv, y_pred_multi)
def get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. Parameters ---------- parameter_set : str, default="default" Name of the set of test parameters to return, for use in tests. If no special parameters are defined for a value, will return `"default"` set. Returns ------- params : dict or list of dict, default={} Parameters to create testing instances of the class. Each dict are parameters to construct an "interesting" test instance, i.e., `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. `create_test_instance` uses the first (or only) dictionary in `params`. """ # imports from sktime.forecasting.naive import NaiveForecaster from sktime.forecasting.theta import ThetaForecaster params1 = {"forecasters": NaiveForecaster()} params2 = {"forecasters": ThetaForecaster()} return [params1, params2]
def test_forecaster_with_initial_level(): """Check prediction performance on airline dataset. Performance on this dataset should be reasonably good. Raises ------ AssertionError - if point forecasts do not lie close to the test data """ y = np.log1p(load_airline()) y_train, y_test = temporal_train_test_split(y) fh = np.arange(len(y_test)) + 1 f = ThetaForecaster(initial_level=0.1, sp=12) f.fit(y_train) y_pred = f.predict(fh=fh) np.testing.assert_allclose(y_pred, y_test, rtol=0.05)
def run_sktimes(dept_id, store_id): # create timeseries for fbprophet ts = CreateTimeSeries(dept_id, store_id) # sktime ensembler forecaster = EnsembleForecaster([ ('naive_ses', NaiveForecaster(sp=28, strategy="seasonal_last")), ('naive', NaiveForecaster(strategy="last")), ('theta_ses', ThetaForecaster(sp=28)), ('theta', ThetaForecaster()), ("exp_ses", ExponentialSmoothing(seasonal="additive", sp=28)), ("exp_damped", ExponentialSmoothing(trend='additive', damped=True, seasonal="additive", sp=28)) ]) forecaster.fit(ts.y + 1) y_pred = forecaster.predict(np.arange(1, 29)) return np.append(np.array([dept_id, store_id]), y_pred - 1)
def select_regressor(selection): regressors = { 'LR': LinearRegression(), 'KNN': KNeighborsRegressor(), 'RF': RandomForestRegressor(), 'GB': GradientBoostingRegressor(), 'XGBoost': XGBRegressor(verbosity = 0), 'SVM': LinearSVR(), 'Extra Trees': ExtraTreesRegressor(), 'Naive' : NaiveForecaster(strategy="last", sp=12), 'Theta': ThetaForecaster(sp=12), 'Exp_Smoothing': ExponentialSmoothing(trend="add", seasonal="multiplicative", sp=12), 'TBATS': TBATS(sp=12, use_trend=True, use_box_cox=False) } return regressors[selection]
def test_pred_errors_against_y_test(fh): y = load_airline() y_train, y_test = temporal_train_test_split(y) f = ThetaForecaster() f.fit(y_train, fh) y_pred = f.predict(return_pred_int=False) errors = f._compute_pred_errors(alpha=0.1) if isinstance(errors, pd.Series): errors = [errors] # make iterable y_test = y_test.iloc[check_fh(fh) - 1] for error in errors: assert np.all(y_test > y_pred - error) assert np.all(y_test < y_pred + error)
def test_pred_errors_against_y_test(fh): y = load_airline() y_train, y_test = temporal_train_test_split(y) f = ThetaForecaster() f.fit(y_train, fh) y_pred = f.predict(return_pred_int=False) intervals = f.compute_pred_int(y_pred, [0.1]) y_test = y_test.iloc[check_fh(fh) - 1] # Performance should be good enough that all point forecasts lie within the # prediction intervals. for ints in intervals: assert np.all(y_test > ints["lower"]) assert np.all(y_test < ints["upper"])
#pip install sktime import numpy as np from sktime.datasets import load_airline from sktime.forecasting.theta import ThetaForecaster from sktime.forecasting.model_selection import temporal_train_test_split from sktime.performance_metrics.forecasting import smape_loss y = load_airline() y_train, y_test = temporal_train_test_split(y) fh = np.arange(1, len(y_test) + 1) # forecasting horizon forecaster = ThetaForecaster(sp=12) # monthly seasonal periodicity forecaster.fit(y_train) y_pred = forecaster.predict(fh) smape_loss(y_test, y_pred)
return df, df2 if uploaded_file is not None: df, df2 = load_data(uploaded_file) # prepare models models = [] models.append(('LR', LinearRegression())) models.append(('KNN', KNeighborsRegressor())) models.append(('RF', RandomForestRegressor())) models.append(('GB', GradientBoostingRegressor())) models.append(('XGBoost', XGBRegressor(verbosity = 0))) models.append(('SVM', LinearSVR())) models.append(('Extra Trees', ExtraTreesRegressor())) models.append(('Naive', NaiveForecaster(strategy="last", sp=12))) models.append(('Theta', ThetaForecaster(sp=12))) models.append(('Exp_Smoothing', ExponentialSmoothing(trend="add", seasonal="additive", sp=12))) models.append(('TBATS', TBATS(sp=12, use_trend=True, use_box_cox=False))) forecast_horizon = st.sidebar.slider(label = 'Forecast Length (months)',min_value = 3, max_value = 36, value = 12) window_length = st.sidebar.slider(label = 'Sliding Window Length ',min_value = 1, value = 12) # evaluate each model in turn results1 = [] names = [] dn_forecast = [] dn_test =[] for name, model in models: if name == 'LR' or name == 'KNN' or name == 'RF' or name == 'GB' or name == 'XGBoost' or name == 'SVM' or name == 'Extra Trees': forecaster = ReducedRegressionForecaster(regressor=model, window_length=window_length,strategy='recursive')
forecaster = NaiveForecaster(strategy="last") forecaster.fit(y_train) cv = SlidingWindowSplitter(fh=1) y_pred = forecaster.update_predict(y_test, cv) plot_ys(y_train, y_test, y_pred) st.pyplot() st.write("smape_loss(y_test, y_pred):", smape_loss(y_test, y_pred)) st.write(''' * Prediction intervals 到目前为止,我们仅关注点预测。 在许多情况下,我们也对预测间隔感兴趣。 sktime的界面支持预测间隔,但我们尚未针对所有算法实现它们。在这里,我们使用Theta预测算法 ''') from sktime.forecasting.theta import ThetaForecaster forecaster = ThetaForecaster(sp=12) forecaster.fit(y_train) alpha = 0.05 # 95% prediction intervals y_pred, pred_ints = forecaster.predict(fh, return_pred_int=True, alpha=alpha) st.write("smape_loss(y_test, y_pred):", smape_loss(y_test, y_pred)) fig, ax = plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) ax.fill_between(y_pred.index, pred_ints["lower"], pred_ints["upper"], alpha=0.2, color="green", label=f"{1 - alpha}% prediction intervals")
def main(): df = datasets.load_airline( ) #Univariate, monthly records from 1949 to 60 (144 records) y_train, y_test = temporal_train_test_split( df, test_size=36) #36 months for testing forecaster = NaiveForecaster( strategy='seasonal_last', sp=12 ) #model strategy: last, mean, seasonal_last. sp=12months (yearly season) forecaster.fit(y_train) #fit fh = np.arange(1, len(y_test) + 1) #forecast horizon: array with the same lenght of y_test y_pred = forecaster.predict(fh) #pred forecaster2 = AutoARIMA(sp=12, suppress_warnings=True, trace=1) forecaster2.fit(y_train) y_pred2 = forecaster2.predict(fh) forecaster3 = ExponentialSmoothing(trend='add', damped='True', seasonal='multiplicative', sp=12) forecaster3.fit(y_train) y_pred3 = forecaster3.predict(fh) forecaster4 = ThetaForecaster(sp=12) forecaster4.fit(y_train) y_pred4 = forecaster4.predict(fh) forecaster5 = EnsembleForecaster([ ('NaiveForecaster', NaiveForecaster(strategy='seasonal_last', sp=12)), ('AutoARIMA', AutoARIMA(sp=12, suppress_warnings=True)), ('Exp Smoothing', ExponentialSmoothing(trend='add', damped='True', seasonal='multiplicative', sp=12)), ('Theta', ThetaForecaster(sp=12)) ]) forecaster5.fit(y_train) y_pred5 = forecaster5.predict(fh) plot_ys(y_train, y_test, y_pred, y_pred2, y_pred3, y_pred4, y_pred5, labels=[ 'Train', 'Test', 'Naive Forecaster', 'AutoARIMA', 'Exp Smoothing', 'Theta', 'Ensemble' ]) plt.xlabel('Months') plt.ylabel('Number of flights') plt.title( 'Time series of the number of international flights in function of time' ) plt.show() print('SMAPE Error for NaiveForecaster is:', 100 * round(smape_loss(y_test, y_pred), 3), '%') print('SMAPE Error for AutoARIMA is:', 100 * round(smape_loss(y_test, y_pred2), 3), '%') print('SMAPE Error for Exp Smoothing is:', 100 * round(smape_loss(y_test, y_pred3), 3), '%') print('SMAPE Error for Theta is:', 100 * round(smape_loss(y_test, y_pred4), 3), '%') print('SMAPE Error for Ensemble is:', 100 * round(smape_loss(y_test, y_pred5), 3), '%')
"ShapeletTransformClassifier": ["check_fit_idempotent"], "ContractedShapeletTransform": ["check_fit_idempotent"], } TRANSFORMER = StandardScaler() TRANSFORMERS = [ ("t1", RowTransformer(TRANSFORMER)), ("t2", RowTransformer(TRANSFORMER)), ] REGRESSOR = LinearRegression() TIME_SERIES_CLASSIFIER = TimeSeriesForest(random_state=1) TIME_SERIES_CLASSIFIERS = [("tsf1", TIME_SERIES_CLASSIFIER), ("tsf2", TIME_SERIES_CLASSIFIER)] FORECASTER = ExponentialSmoothing() FORECASTERS = [("ses1", FORECASTER), ("ses2", FORECASTER)] STEPS = [("t", Detrender(ThetaForecaster())), ("f", NaiveForecaster())] ESTIMATOR_TEST_PARAMS = { DirectRegressionForecaster: { "regressor": REGRESSOR }, RecursiveRegressionForecaster: { "regressor": REGRESSOR }, DirectTimeSeriesRegressionForecaster: { "regressor": make_pipeline(Tabularizer(), REGRESSOR) }, RecursiveTimeSeriesRegressionForecaster: { "regressor": make_pipeline(Tabularizer(), REGRESSOR) }, TransformedTargetForecaster: { "steps": STEPS
def forecast(data, customer_id, start='2017-01', end='2019-04', model_type='NaiveForecaster', test_size_month=5, model_storage_path=''): """ Main function for build forecasting model on selected customer and time interval, save the model and plotting Parameters ---------- data: pandas DataFrame main dataset with customer_id, product_id and Timestamp customer_id: int start: string start year and month in '2020-01' format end: string end year and month in '2020-01' format *** this month will not be included *** model_type: type of model to use in forecasting select from : ['NaiveForecaster', 'PolynomialTrendForecaster', 'ThetaForecaster', 'KNeighborsRegressor', 'ExponentialSmoothing', 'AutoETS', 'AutoARIMA', 'TBATS', 'BATS', 'EnsembleForecaster'] test_size_month: number of month that will be excluded from end of interval to use as test dataset model_storage_path: string the folder that you want to store saved models Returns ------- sMAPE Loss: print plot: matplotlib figure plot train, test and predicted values """ y_train, y_test = temporal_train_test_split(prepare_data(data, customer_id, start=start, end=end), test_size=test_size_month) fh = ForecastingHorizon(y_test.index, is_relative=False) if model_type == 'NaiveForecaster': forecaster = NaiveForecaster(strategy="last", sp=12) elif model_type == 'PolynomialTrendForecaster': forecaster = PolynomialTrendForecaster(degree=2) elif model_type == 'ThetaForecaster': forecaster = ThetaForecaster(sp=6) elif model_type == 'KNeighborsRegressor': regressor = KNeighborsRegressor(n_neighbors=1) forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=12, strategy="recursive") elif model_type == 'ExponentialSmoothing': forecaster = ExponentialSmoothing(trend="add", seasonal="multiplicative", sp=12) elif model_type == 'AutoETS': forecaster = AutoETS(auto=True, sp=12, n_jobs=-1) elif model_type == 'AutoARIMA': forecaster = AutoARIMA(sp=12, suppress_warnings=True) elif model_type == 'TBATS': forecaster = TBATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'BATS': forecaster = BATS(sp=12, use_trend=True, use_box_cox=False) elif model_type == 'EnsembleForecaster': forecaster = EnsembleForecaster([ ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)), ( "holt", ExponentialSmoothing(trend="add", damped_trend=False, seasonal="multiplicative", sp=12), ), ( "damped", ExponentialSmoothing(trend="add", damped_trend=True, seasonal="multiplicative", sp=12), ), ]) try: forecaster.fit(y_train) except: forecaster.fit(y_train + 1) y_pred = forecaster.predict(fh) dump( forecaster, f'{model_storage_path}/{customer_id}_{model_type}_{start}_{end}_{test_size_month}.model' ) print('sMAPE Loss :', smape_loss(y_pred, y_test)) plot = plot_series(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) return plot
( "transformer2", SeriesToSeriesRowTransformer(SERIES_TO_SERIES_TRANSFORMER, check_transformer=False), ), ] REGRESSOR = LinearRegression() TIME_SERIES_CLASSIFIER = TimeSeriesForest(n_estimators=3) TIME_SERIES_CLASSIFIERS = [ ("tsf1", TIME_SERIES_CLASSIFIER), ("tsf2", TIME_SERIES_CLASSIFIER), ] FORECASTER = ExponentialSmoothing() FORECASTERS = [("ses1", FORECASTER), ("ses2", FORECASTER)] STEPS = [ ("transformer", Detrender(ThetaForecaster())), ("forecaster", NaiveForecaster()), ] ESTIMATOR_TEST_PARAMS = { OnlineEnsembleForecaster: { "forecasters": FORECASTERS }, FeatureUnion: { "transformer_list": TRANSFORMERS }, DirectRegressionForecaster: { "regressor": REGRESSOR }, MultioutputRegressionForecaster: { "regressor": REGRESSOR },
import numpy as np import pandas as pd import pytest from sktime.datasets import load_airline from sktime.forecasting.model_selection import temporal_train_test_split from sktime.forecasting.theta import ThetaForecaster from sktime.performance_metrics.forecasting.probabilistic import PinballLoss list_of_metrics = [PinballLoss] # test data y = np.log1p(load_airline()) y_train, y_test = temporal_train_test_split(y) fh = np.arange(len(y_test)) + 1 f = ThetaForecaster(sp=12) f.fit(y_train) QUANTILE_PRED = f.predict_quantiles(fh=fh, alpha=[0.5]) INTERVAL_PRED = f.predict_interval(fh=fh, coverage=0.9) @pytest.mark.parametrize("score_average", [True, False]) @pytest.mark.parametrize("Metric", list_of_metrics) def test_output(Metric, score_average): """Test output is correct class.""" y_true = y_test loss = Metric.create_test_instance() loss.set_params(score_average=score_average) eval_loss = loss.evaluate(y_true, y_pred=QUANTILE_PRED) index_loss = loss.evaluate_by_index(y_true, y_pred=QUANTILE_PRED)