def test_sliding_window_splitter_start_with_empty_window( y, fh, window_length, step_length): """Test SlidingWindowSplitter.""" if _inputs_are_supported([fh, window_length, step_length]): cv = SlidingWindowSplitter( fh=fh, window_length=window_length, step_length=step_length, start_with_window=False, ) train_windows, test_windows, _, n_splits = _check_cv( cv, y, allow_empty_window=True) assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh))) n_incomplete = _get_n_incomplete_windows(window_length, step_length) train_windows = train_windows[n_incomplete:] assert np.vstack(train_windows).shape == ( n_splits - n_incomplete, _coerce_duration_to_int(duration=window_length, freq="D"), ) else: match = "Unsupported combination of types" with pytest.raises(TypeError, match=match): SlidingWindowSplitter( fh=fh, initial_window=None, window_length=window_length, step_length=step_length, start_with_window=False, )
def test_sliding_window_splitter_with_initial_window(y, fh, window_length, step_length, initial_window): """Test SlidingWindowSplitter.""" if _inputs_are_supported([fh, initial_window, window_length, step_length]): cv = SlidingWindowSplitter( fh=fh, window_length=window_length, step_length=step_length, initial_window=initial_window, start_with_window=True, ) train_windows, test_windows, _, n_splits = _check_cv(cv, y) assert train_windows[0].shape[0] == _coerce_duration_to_int( duration=initial_window, freq="D") assert np.vstack(train_windows[1:]).shape == ( n_splits - 1, _coerce_duration_to_int(duration=window_length, freq="D"), ) assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh))) else: match = "Unsupported combination of types" with pytest.raises(TypeError, match=match): SlidingWindowSplitter( fh=fh, initial_window=initial_window, window_length=window_length, step_length=step_length, start_with_window=True, )
def test_sliding_window_splitter_initial_window_smaller_than_window_raise_error(): y = _make_series() cv = SlidingWindowSplitter( fh=1, window_length=10, initial_window=5, ) message = "`initial_window` must greater than `window_length`" with pytest.raises(ValueError, match=message): next(cv.split(y))
def test_sliding_window_splitter_initial_window_start_with_empty_window_raises_error(): y = _make_series() cv = SlidingWindowSplitter( fh=1, initial_window=15, start_with_window=False, ) message = "`start_with_window` must be True if `initial_window` is given" with pytest.raises(ValueError, match=message): next(cv.split(y))
def test_update_predict_predicted_index( self, estimator_instance, n_columns, fh_int_oos, window_length, step_length, update_params, ): """Check predicted index in update_predict.""" y = _make_series(n_columns=n_columns, all_positive=True, index_type="datetime") y_train, y_test = temporal_train_test_split(y) cv = SlidingWindowSplitter( fh_int_oos, window_length=window_length, step_length=step_length, start_with_window=False, ) estimator_instance.fit(y_train, fh=fh_int_oos) y_pred = estimator_instance.update_predict(y_test, cv=cv, update_params=update_params) assert isinstance(y_pred, (pd.Series, pd.DataFrame)) expected = _get_expected_index_for_update_predict( y_test, fh_int_oos, step_length) actual = y_pred.index np.testing.assert_array_equal(actual, expected)
def transform(self, Z, X=None): self.check_is_fitted() z = check_series(Z, enforce_univariate=True) # warn if nan values in Series, as user might mix them # up with outliers otherwise if z.isnull().values.any(): warnings.warn("""Series contains nan values, more nan might be added if there are outliers""") cv = SlidingWindowSplitter(window_length=self.window_length, step_length=1, start_with_window=True) half_window_length = int(self.window_length / 2) z = _hampel_filter( z=z, cv=cv, n_sigma=self.n_sigma, half_window_length=half_window_length, k=self.k, ) # data post-processing if self.return_bool: z = z.apply(lambda x: True if np.isnan(x) else False) return z
def test_sliding_window_split_start_with_window(y, fh, window_length, step_length): # initiate rolling window cv iterator cv = SlidingWindowSplitter(fh=fh, window_length=window_length, step_length=step_length, start_with_window=True) # generate and keep splits training_windows, test_windows, n_splits, cutoffs = \ generate_and_check_windows( y, cv) # check training windows n_incomplete_windows = 0 # infer expected number of incomplete windows check_windows_dimensions(training_windows, n_incomplete_windows, window_length) # check training windows values training_windows = np.vstack(training_windows) # check against cutoffs np.testing.assert_array_equal(cutoffs, training_windows[:, -1]) # check values of first window np.testing.assert_array_equal(training_windows[0, :], np.arange(window_length)) # check against step length np.testing.assert_array_equal(training_windows[:, 0] // step_length, np.arange(n_splits)) # check test windows check_test_windows(test_windows, fh, cutoffs)
def update_predict( self, y, cv=None, X=None, update_params=True, ): """Make and update predictions iteratively over the test set. Parameters ---------- y : pd.Series cv : temporal cross-validation generator, optional (default=None) X : pd.DataFrame, optional (default=None) update_params : bool, optional (default=True) Returns ------- y_pred : pd.Series or pd.DataFrame """ if cv is not None: cv = check_cv(cv) else: cv = SlidingWindowSplitter( self.fh.to_relative(self.cutoff), window_length=self.window_length_, start_with_window=False, ) return self._predict_moving_cutoff(y, cv, X, update_params=update_params)
def _transform_series(self, Z): """ Parameters ---------- Z : pd.Series Returns ------- pd.Series """ # warn if nan values in Series, as user might mix them # up with outliers otherwise if Z.isnull().values.any(): warnings.warn("""Series contains nan values, more nan might be added if there are outliers""") cv = SlidingWindowSplitter(window_length=self.window_length, step_length=1, start_with_window=True) half_window_length = int(self.window_length / 2) Z = _hampel_filter( Z=Z, cv=cv, n_sigma=self.n_sigma, half_window_length=half_window_length, k=self.k, ) # data post-processing if self.return_bool: Z = Z.apply(lambda x: True if np.isnan(x) else False) return Z
def test_sliding_window_split_start_with_fh(y, fh, window_length, step_length): # initiate rolling window cv iterator cv = SlidingWindowSplitter( fh=fh, window_length=window_length, step_length=step_length, start_with_window=False, ) # generate and keep splits training_windows, test_windows, n_splits, cutoffs = generate_and_check_windows( y, cv) # check first windows assert len(training_windows[0]) == 0 assert len(training_windows[1]) == min(step_length, window_length) # check training windows n_incomplete_windows = np.int(np.ceil( window_length / step_length)) # infer expected number of incomplete # windows check_windows_dimensions(training_windows, n_incomplete_windows, window_length) # check test windows check_test_windows(test_windows, fh, cutoffs)
def update_predict(self, y_test, cv=None, X_test=None, update_params=False, return_pred_int=False, alpha=DEFAULT_ALPHA): """Make and update predictions iteratively over the test set. Parameters ---------- y_test : pd.Series cv : temporal cross-validation generator, optional (default=None) X_test : pd.DataFrame, optional (default=None) update_params : bool, optional (default=False) return_pred_int : bool, optional (default=False) alpha : int or list of ints, optional (default=None) Returns ------- y_pred : pd.Series or pd.DataFrame """ cv = check_cv(cv) if cv is not None else SlidingWindowSplitter( self.fh, window_length=self.window_length_) return self._predict_moving_cutoff(y_test, cv, X=X_test, update_params=update_params, return_pred_int=return_pred_int, alpha=alpha)
def test_evaluate_initial_window(): initial_window = 20 y = make_forecasting_problem(n_timepoints=30, index_type="int") forecaster = NaiveForecaster() fh = 1 cv = SlidingWindowSplitter(fh=fh, initial_window=initial_window) scoring = sMAPE() out = evaluate( forecaster=forecaster, y=y, cv=cv, strategy="update", scoring=scoring ) _check_evaluate_output(out, cv, y, scoring) assert out.loc[0, "len_train_window"] == initial_window # check scoring actual = out.loc[0, f"test_{scoring.name}"] train, test = next(cv.split(y)) f = clone(forecaster) f.fit(y.iloc[train], fh=fh) expected = scoring(y.iloc[test], f.predict()) np.testing.assert_equal(actual, expected)
def test_sliding_window_splitter(y, fh, window_length, step_length): cv = SlidingWindowSplitter( fh=fh, window_length=window_length, step_length=step_length, start_with_window=True, ) train_windows, test_windows, _, n_splits = _check_cv(cv, y) assert np.vstack(train_windows).shape == (n_splits, window_length) assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
def test_evaluate_no_exog_against_with_exog(): # Check that adding exogenous data produces different results y, X = load_longley() forecaster = ARIMA(suppress_warnings=True) cv = SlidingWindowSplitter() scoring = sMAPE() out_exog = evaluate(forecaster, cv, y, X=X, scoring=scoring) out_no_exog = evaluate(forecaster, cv, y, X=None, scoring=scoring) scoring_name = f"test_{scoring.name}" assert np.all(out_exog[scoring_name] != out_no_exog[scoring_name])
def test_sliding_window_transform_against_cv(n_timepoints, window_length, fh, scitype): """Test sliding window transform against cv.""" fh = check_fh(fh) y = pd.Series(_make_y(0, n_timepoints)) cv = SlidingWindowSplitter(fh=fh, window_length=window_length) xa, ya = _get_windows(cv, y) yb, xb = _sliding_window_transform(y, window_length, fh, scitype=scitype) np.testing.assert_array_equal(ya, yb) if scitype == "time-series-regressor": xb = xb.squeeze(axis=1) np.testing.assert_array_equal(xa, xb)
def test_evaluate_no_exog_against_with_exog(): """Check that adding exogenous data produces different results.""" y, X = load_longley() forecaster = ARIMA(suppress_warnings=True) cv = SlidingWindowSplitter() scoring = MeanAbsolutePercentageError(symmetric=True) out_exog = evaluate(forecaster, cv, y, X=X, scoring=scoring) out_no_exog = evaluate(forecaster, cv, y, X=None, scoring=scoring) scoring_name = f"test_{scoring.name}" assert np.all(out_exog[scoring_name] != out_no_exog[scoring_name])
def test_update_predict_predicted_indices(Forecaster, fh, window_length, step_length, y): y_train, y_test = temporal_train_test_split(y) cv = SlidingWindowSplitter(fh, window_length=window_length, step_length=step_length) f = _construct_instance(Forecaster) f.fit(y_train, fh=fh) try: y_pred = f.update_predict(y_test, cv=cv) check_update_predict_y_pred(y_pred, y_test, fh, step_length) except NotImplementedError: pass
def test_update_predict_predicted_indices(Forecaster, fh, window_length, step_length): y = make_forecasting_problem(all_positive=True, index_type="datetime") y_train, y_test = temporal_train_test_split(y) cv = SlidingWindowSplitter(fh, window_length=window_length, step_length=step_length) f = _construct_instance(Forecaster) f.fit(y_train, fh=fh) try: y_pred = f.update_predict(y_test, cv=cv) _check_update_predict_y_pred(y_pred, y_test, fh, step_length) except NotImplementedError: pass
def fit(self, y, X=None, fh=None): """Fit to training data. Parameters ---------- y : pd.Series Target time series to which to fit the forecaster. fh : int, list or np.array, optional (default=None) The forecasters horizon with the steps ahead to to predict. X : pd.DataFrame, optional (default=None) Exogenous variables are ignored Returns ------- self : returns an instance of self. """ self._set_y_X(y, X) if X is not None: raise NotImplementedError( "Exogenous variables `X` are not yet supported.") self._set_fh(fh) if len(self.fh.to_in_sample(self.cutoff)) > 0: raise NotImplementedError( "In-sample predictions are not implemented") self.step_length_ = check_step_length(self.step_length) self.window_length_ = check_window_length(self.window_length) # for the direct reduction strategy, a separate forecaster is fitted # for each step ahead of the forecasting horizon self._cv = SlidingWindowSplitter( fh=self.fh.to_relative(self.cutoff), window_length=self.window_length_, step_length=self.step_length_, start_with_window=True, ) # transform data using rolling window split X, Y_train = self._transform(y, X) # iterate over forecasting horizon self.regressors_ = [] for i in range(len(self.fh)): y = Y_train[:, i] regressor = clone(self.regressor) regressor.fit(X, y) self.regressors_.append(regressor) self._is_fitted = True return self
def test_sliding_window_splitter(y, fh, window_length, step_length): """Test SlidingWindowSplitter.""" cv = SlidingWindowSplitter( fh=fh, window_length=window_length, step_length=step_length, start_with_window=True, ) train_windows, test_windows, _, n_splits = _check_cv(cv, y) assert np.vstack(train_windows).shape == ( n_splits, _coerce_duration_to_int(duration=window_length, freq="D"), ) assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
def test_raises_not_fitted_error(Forecaster): f = _construct_instance(Forecaster) with pytest.raises(NotFittedError): f.update(y_test, update_params=False) with pytest.raises(NotFittedError): cv = SlidingWindowSplitter(fh=1, window_length=1) f.update_predict(y_test, cv=cv) try: with pytest.raises(NotFittedError): f.get_fitted_params() except NotImplementedError: pass
def test_sliding_window_splitter_with_initial_window( y, fh, window_length, step_length, initial_window ): cv = SlidingWindowSplitter( fh=fh, window_length=window_length, step_length=step_length, initial_window=initial_window, start_with_window=True, ) train_windows, test_windows, _, n_splits = _check_cv(cv, y) assert train_windows[0].shape[0] == initial_window assert np.vstack(train_windows[1:]).shape == (n_splits - 1, window_length) assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
def fit(self, y_train, fh=None, X_train=None): """Fit to training data. Parameters ---------- y_train : pd.Series Target time series to which to fit the forecaster. fh : int, list or np.array, optional (default=None) The forecasters horizon with the steps ahead to to predict. X_train : pd.DataFrame, optional (default=None) Exogenous variables are ignored Returns ------- self : returns an instance of self. """ # input checks if X_train is not None: raise NotImplementedError() self._set_oh(y_train) self._set_fh(fh) if np.any(self.fh <= 0): raise NotImplementedError( "in-sample predictions are not implemented") self.step_length_ = check_step_length(self.step_length) self.window_length_ = check_window_length(self.window_length) # for the direct reduction strategy, a separate forecaster is fitted # for each step ahead of the forecasting horizon self._cv = SlidingWindowSplitter(fh=self.fh, window_length=self.window_length_, step_length=self.step_length_, start_with_window=True) # transform data using rolling window split X_train, Y_train = self._transform(y_train, X_train) # iterate over forecasting horizon self.regressors_ = [] for i in range(len(self.fh)): y_train = Y_train[:, i] regressor = clone(self.regressor) regressor.fit(X_train, y_train) self.regressors_.append(regressor) self._is_fitted = True return self
def test_sliding_window_splitter_start_with_empty_window( y, fh, window_length, step_length ): cv = SlidingWindowSplitter( fh=fh, window_length=window_length, step_length=step_length, start_with_window=False, ) train_windows, test_windows, _, n_splits = _check_cv(cv, y, allow_empty_window=True) assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh))) n_incomplete = _get_n_incomplete_windows(window_length, step_length) train_windows = train_windows[n_incomplete:] assert np.vstack(train_windows).shape == (n_splits - n_incomplete, window_length)
def fit(self, y_train, fh=None, X_train=None): """Fit to training data. Parameters ---------- y_train : pd.Series Target time series to which to fit the forecaster. fh : int, list or np.array, optional (default=None) The forecasters horizon with the steps ahead to to predict. X_train : pd.DataFrame, optional (default=None) Exogenous variables are ignored Returns ------- self : returns an instance of self. """ # input checks if X_train is not None: raise NotImplementedError() # set values self._set_y_X(y_train, X_train) self._set_fh(fh) # Set this and then call the super method, that should be enought I think ..... self._nbr_dependent = y_train.shape[1] self.step_length_ = check_step_length(self.step_length) self.window_length_ = check_window_length(self.window_length) # set up cv iterator, for recursive strategy, a single estimator # is fit for a one-step-ahead forecasting horizon and then called # iteratively to predict multiple steps ahead self._cv = SlidingWindowSplitter( fh=1, window_length=self.window_length_, step_length=self.step_length_, start_with_window=True, ) # transform data into tabular form X_train_tab, y_train_tab = self._transform(y_train, X_train) # fit base regressor regressor = clone(self.regressor) regressor.fit(X_train_tab, y_train_tab) self.regressor_ = regressor self._is_fitted = True return self
def fit(self, y, X=None, fh=None): """Fit to training data. Parameters ---------- y : pd.Series Target time series to which to fit the forecaster. fh : int, list or np.array, optional (default=None) The forecasters horizon with the steps ahead to to predict. X : pd.DataFrame, optional (default=None) Exogenous variables are ignored Returns ------- self : returns an instance of self. """ self._set_y_X(y, X) if X is not None: raise NotImplementedError( "Exogenous variables `X` are not yet supported.") self._set_fh(fh) if len(self.fh.to_in_sample(self.cutoff)) > 0: raise NotImplementedError( "In-sample predictions are not implemented") self.step_length_ = check_step_length(self.step_length) self.window_length_ = check_window_length(self.window_length) # for the multioutput reduction strategy, a single forecaster is fitted # simultaneously to all the future steps in the forecasting horizon # by reducing to a forecaster that can handle multi-dimensional outputs self._cv = SlidingWindowSplitter( fh=self.fh.to_relative(self.cutoff), window_length=self.window_length_, step_length=self.step_length_, start_with_window=True, ) # transform data using rolling window split X, Y_train = self._transform(y, X) # fit regressor to training data regressor = clone(self.regressor) regressor.fit(X, Y_train) self.regressor_ = regressor self._is_fitted = True return self
def test_raises_not_fitted_error(self, estimator_instance): """Test that calling post-fit methods before fit raises error.""" # We here check extra method of the forecaster API: update and update_predict. with pytest.raises(NotFittedError): estimator_instance.update(y_test, update_params=False) with pytest.raises(NotFittedError): cv = SlidingWindowSplitter(fh=1, window_length=1, start_with_window=False) estimator_instance.update_predict(y_test, cv=cv) try: with pytest.raises(NotFittedError): estimator_instance.get_fitted_params() except NotImplementedError: pass
def test_sliding_window_splitter_with_incompatible_initial_window_and_window_length( y, fh, window_length, step_length, initial_window): """Test SlidingWindowSplitter with incompatible initial_window and window_length.""" if not _windows_are_incompatible(initial_window, window_length): pytest.skip( "Compatible initial_window and window_length are tested elsewhere." ) cv = SlidingWindowSplitter( fh=fh, window_length=window_length, step_length=step_length, initial_window=initial_window, start_with_window=True, ) match = "The `initial_window` and `window_length` types are incompatible" with pytest.raises(ValueError, match=match): _check_cv(cv, y)
def test_raises_not_fitted_error(Forecaster): # We here check extra method of the forecaster API: update and update_predict. f = _construct_instance(Forecaster) # predict is check in test suite for all estimators with pytest.raises(NotFittedError): f.update(y_test, update_params=False) with pytest.raises(NotFittedError): cv = SlidingWindowSplitter(fh=1, window_length=1) f.update_predict(y_test, cv=cv) try: with pytest.raises(NotFittedError): f.get_fitted_params() except NotImplementedError: pass
def _check_update_predict_predicted_index(Forecaster, fh, window_length, step_length, update_params): y = make_forecasting_problem(all_positive=True, index_type="datetime") y_train, y_test = temporal_train_test_split(y) cv = SlidingWindowSplitter( fh, window_length=window_length, step_length=step_length, start_with_window=False, ) f = _construct_instance(Forecaster) f.fit(y_train, fh=fh) y_pred = f.update_predict(y_test, cv=cv, update_params=update_params) assert isinstance(y_pred, (pd.Series, pd.DataFrame)) expected = _get_expected_index_for_update_predict(y_test, fh, step_length) actual = y_pred.index np.testing.assert_array_equal(actual, expected)