Пример #1
0
def test_sliding_window_splitter_start_with_empty_window(
        y, fh, window_length, step_length):
    """Test SlidingWindowSplitter."""
    if _inputs_are_supported([fh, window_length, step_length]):
        cv = SlidingWindowSplitter(
            fh=fh,
            window_length=window_length,
            step_length=step_length,
            start_with_window=False,
        )
        train_windows, test_windows, _, n_splits = _check_cv(
            cv, y, allow_empty_window=True)

        assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))

        n_incomplete = _get_n_incomplete_windows(window_length, step_length)
        train_windows = train_windows[n_incomplete:]

        assert np.vstack(train_windows).shape == (
            n_splits - n_incomplete,
            _coerce_duration_to_int(duration=window_length, freq="D"),
        )
    else:
        match = "Unsupported combination of types"
        with pytest.raises(TypeError, match=match):
            SlidingWindowSplitter(
                fh=fh,
                initial_window=None,
                window_length=window_length,
                step_length=step_length,
                start_with_window=False,
            )
Пример #2
0
def test_sliding_window_splitter_with_initial_window(y, fh, window_length,
                                                     step_length,
                                                     initial_window):
    """Test SlidingWindowSplitter."""
    if _inputs_are_supported([fh, initial_window, window_length, step_length]):
        cv = SlidingWindowSplitter(
            fh=fh,
            window_length=window_length,
            step_length=step_length,
            initial_window=initial_window,
            start_with_window=True,
        )
        train_windows, test_windows, _, n_splits = _check_cv(cv, y)

        assert train_windows[0].shape[0] == _coerce_duration_to_int(
            duration=initial_window, freq="D")
        assert np.vstack(train_windows[1:]).shape == (
            n_splits - 1,
            _coerce_duration_to_int(duration=window_length, freq="D"),
        )
        assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
    else:
        match = "Unsupported combination of types"
        with pytest.raises(TypeError, match=match):
            SlidingWindowSplitter(
                fh=fh,
                initial_window=initial_window,
                window_length=window_length,
                step_length=step_length,
                start_with_window=True,
            )
Пример #3
0
def test_sliding_window_splitter_initial_window_smaller_than_window_raise_error():
    y = _make_series()
    cv = SlidingWindowSplitter(
        fh=1,
        window_length=10,
        initial_window=5,
    )
    message = "`initial_window` must greater than `window_length`"
    with pytest.raises(ValueError, match=message):
        next(cv.split(y))
Пример #4
0
def test_sliding_window_splitter_initial_window_start_with_empty_window_raises_error():
    y = _make_series()
    cv = SlidingWindowSplitter(
        fh=1,
        initial_window=15,
        start_with_window=False,
    )
    message = "`start_with_window` must be True if `initial_window` is given"
    with pytest.raises(ValueError, match=message):
        next(cv.split(y))
Пример #5
0
 def test_update_predict_predicted_index(
     self,
     estimator_instance,
     n_columns,
     fh_int_oos,
     window_length,
     step_length,
     update_params,
 ):
     """Check predicted index in update_predict."""
     y = _make_series(n_columns=n_columns,
                      all_positive=True,
                      index_type="datetime")
     y_train, y_test = temporal_train_test_split(y)
     cv = SlidingWindowSplitter(
         fh_int_oos,
         window_length=window_length,
         step_length=step_length,
         start_with_window=False,
     )
     estimator_instance.fit(y_train, fh=fh_int_oos)
     y_pred = estimator_instance.update_predict(y_test,
                                                cv=cv,
                                                update_params=update_params)
     assert isinstance(y_pred, (pd.Series, pd.DataFrame))
     expected = _get_expected_index_for_update_predict(
         y_test, fh_int_oos, step_length)
     actual = y_pred.index
     np.testing.assert_array_equal(actual, expected)
Пример #6
0
    def transform(self, Z, X=None):
        self.check_is_fitted()
        z = check_series(Z, enforce_univariate=True)

        # warn if nan values in Series, as user might mix them
        # up with outliers otherwise
        if z.isnull().values.any():
            warnings.warn("""Series contains nan values, more nan might be
                added if there are outliers""")

        cv = SlidingWindowSplitter(window_length=self.window_length,
                                   step_length=1,
                                   start_with_window=True)
        half_window_length = int(self.window_length / 2)

        z = _hampel_filter(
            z=z,
            cv=cv,
            n_sigma=self.n_sigma,
            half_window_length=half_window_length,
            k=self.k,
        )

        # data post-processing
        if self.return_bool:
            z = z.apply(lambda x: True if np.isnan(x) else False)

        return z
Пример #7
0
def test_sliding_window_split_start_with_window(y, fh, window_length,
                                                step_length):
    # initiate rolling window cv iterator
    cv = SlidingWindowSplitter(fh=fh,
                               window_length=window_length,
                               step_length=step_length,
                               start_with_window=True)

    # generate and keep splits
    training_windows, test_windows, n_splits, cutoffs = \
        generate_and_check_windows(
            y, cv)

    # check training windows
    n_incomplete_windows = 0  # infer expected number of incomplete windows
    check_windows_dimensions(training_windows, n_incomplete_windows,
                             window_length)

    # check training windows values
    training_windows = np.vstack(training_windows)

    # check against cutoffs
    np.testing.assert_array_equal(cutoffs, training_windows[:, -1])

    # check values of first window
    np.testing.assert_array_equal(training_windows[0, :],
                                  np.arange(window_length))

    # check against step length
    np.testing.assert_array_equal(training_windows[:, 0] // step_length,
                                  np.arange(n_splits))

    # check test windows
    check_test_windows(test_windows, fh, cutoffs)
Пример #8
0
    def update_predict(
        self,
        y,
        cv=None,
        X=None,
        update_params=True,
    ):
        """Make and update predictions iteratively over the test set.

        Parameters
        ----------
        y : pd.Series
        cv : temporal cross-validation generator, optional (default=None)
        X : pd.DataFrame, optional (default=None)
        update_params : bool, optional (default=True)

        Returns
        -------
        y_pred : pd.Series or pd.DataFrame
        """
        if cv is not None:
            cv = check_cv(cv)
        else:
            cv = SlidingWindowSplitter(
                self.fh.to_relative(self.cutoff),
                window_length=self.window_length_,
                start_with_window=False,
            )
        return self._predict_moving_cutoff(y,
                                           cv,
                                           X,
                                           update_params=update_params)
Пример #9
0
    def _transform_series(self, Z):
        """
        Parameters
        ----------
        Z : pd.Series

        Returns
        -------
        pd.Series
        """
        # warn if nan values in Series, as user might mix them
        # up with outliers otherwise
        if Z.isnull().values.any():
            warnings.warn("""Series contains nan values, more nan might be
                added if there are outliers""")

        cv = SlidingWindowSplitter(window_length=self.window_length,
                                   step_length=1,
                                   start_with_window=True)
        half_window_length = int(self.window_length / 2)

        Z = _hampel_filter(
            Z=Z,
            cv=cv,
            n_sigma=self.n_sigma,
            half_window_length=half_window_length,
            k=self.k,
        )

        # data post-processing
        if self.return_bool:
            Z = Z.apply(lambda x: True if np.isnan(x) else False)

        return Z
Пример #10
0
def test_sliding_window_split_start_with_fh(y, fh, window_length, step_length):
    # initiate rolling window cv iterator
    cv = SlidingWindowSplitter(
        fh=fh,
        window_length=window_length,
        step_length=step_length,
        start_with_window=False,
    )

    # generate and keep splits
    training_windows, test_windows, n_splits, cutoffs = generate_and_check_windows(
        y, cv)

    # check first windows
    assert len(training_windows[0]) == 0
    assert len(training_windows[1]) == min(step_length, window_length)

    # check training windows
    n_incomplete_windows = np.int(np.ceil(
        window_length / step_length))  # infer expected number of incomplete
    # windows
    check_windows_dimensions(training_windows, n_incomplete_windows,
                             window_length)

    # check test windows
    check_test_windows(test_windows, fh, cutoffs)
Пример #11
0
    def update_predict(self,
                       y_test,
                       cv=None,
                       X_test=None,
                       update_params=False,
                       return_pred_int=False,
                       alpha=DEFAULT_ALPHA):
        """Make and update predictions iteratively over the test set.

        Parameters
        ----------
        y_test : pd.Series
        cv : temporal cross-validation generator, optional (default=None)
        X_test : pd.DataFrame, optional (default=None)
        update_params : bool, optional (default=False)
        return_pred_int : bool, optional (default=False)
        alpha : int or list of ints, optional (default=None)

        Returns
        -------
        y_pred : pd.Series or pd.DataFrame
        """
        cv = check_cv(cv) if cv is not None else SlidingWindowSplitter(
            self.fh, window_length=self.window_length_)
        return self._predict_moving_cutoff(y_test,
                                           cv,
                                           X=X_test,
                                           update_params=update_params,
                                           return_pred_int=return_pred_int,
                                           alpha=alpha)
Пример #12
0
def test_evaluate_initial_window():
    initial_window = 20
    y = make_forecasting_problem(n_timepoints=30, index_type="int")
    forecaster = NaiveForecaster()
    fh = 1
    cv = SlidingWindowSplitter(fh=fh, initial_window=initial_window)
    scoring = sMAPE()
    out = evaluate(
        forecaster=forecaster, y=y, cv=cv, strategy="update", scoring=scoring
    )
    _check_evaluate_output(out, cv, y, scoring)
    assert out.loc[0, "len_train_window"] == initial_window

    # check scoring
    actual = out.loc[0, f"test_{scoring.name}"]
    train, test = next(cv.split(y))
    f = clone(forecaster)
    f.fit(y.iloc[train], fh=fh)
    expected = scoring(y.iloc[test], f.predict())
    np.testing.assert_equal(actual, expected)
Пример #13
0
def test_sliding_window_splitter(y, fh, window_length, step_length):
    cv = SlidingWindowSplitter(
        fh=fh,
        window_length=window_length,
        step_length=step_length,
        start_with_window=True,
    )
    train_windows, test_windows, _, n_splits = _check_cv(cv, y)

    assert np.vstack(train_windows).shape == (n_splits, window_length)
    assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
Пример #14
0
def test_evaluate_no_exog_against_with_exog():
    # Check that adding exogenous data produces different results
    y, X = load_longley()
    forecaster = ARIMA(suppress_warnings=True)
    cv = SlidingWindowSplitter()
    scoring = sMAPE()

    out_exog = evaluate(forecaster, cv, y, X=X, scoring=scoring)
    out_no_exog = evaluate(forecaster, cv, y, X=None, scoring=scoring)

    scoring_name = f"test_{scoring.name}"
    assert np.all(out_exog[scoring_name] != out_no_exog[scoring_name])
Пример #15
0
def test_sliding_window_transform_against_cv(n_timepoints, window_length, fh, scitype):
    """Test sliding window transform against cv."""
    fh = check_fh(fh)
    y = pd.Series(_make_y(0, n_timepoints))
    cv = SlidingWindowSplitter(fh=fh, window_length=window_length)
    xa, ya = _get_windows(cv, y)
    yb, xb = _sliding_window_transform(y, window_length, fh, scitype=scitype)
    np.testing.assert_array_equal(ya, yb)
    if scitype == "time-series-regressor":
        xb = xb.squeeze(axis=1)

    np.testing.assert_array_equal(xa, xb)
Пример #16
0
def test_evaluate_no_exog_against_with_exog():
    """Check that adding exogenous data produces different results."""
    y, X = load_longley()
    forecaster = ARIMA(suppress_warnings=True)
    cv = SlidingWindowSplitter()
    scoring = MeanAbsolutePercentageError(symmetric=True)

    out_exog = evaluate(forecaster, cv, y, X=X, scoring=scoring)
    out_no_exog = evaluate(forecaster, cv, y, X=None, scoring=scoring)

    scoring_name = f"test_{scoring.name}"
    assert np.all(out_exog[scoring_name] != out_no_exog[scoring_name])
Пример #17
0
def test_update_predict_predicted_indices(Forecaster, fh, window_length,
                                          step_length, y):
    y_train, y_test = temporal_train_test_split(y)
    cv = SlidingWindowSplitter(fh,
                               window_length=window_length,
                               step_length=step_length)
    f = _construct_instance(Forecaster)
    f.fit(y_train, fh=fh)
    try:
        y_pred = f.update_predict(y_test, cv=cv)
        check_update_predict_y_pred(y_pred, y_test, fh, step_length)
    except NotImplementedError:
        pass
Пример #18
0
def test_update_predict_predicted_indices(Forecaster, fh, window_length,
                                          step_length):
    y = make_forecasting_problem(all_positive=True, index_type="datetime")
    y_train, y_test = temporal_train_test_split(y)
    cv = SlidingWindowSplitter(fh,
                               window_length=window_length,
                               step_length=step_length)
    f = _construct_instance(Forecaster)
    f.fit(y_train, fh=fh)
    try:
        y_pred = f.update_predict(y_test, cv=cv)
        _check_update_predict_y_pred(y_pred, y_test, fh, step_length)
    except NotImplementedError:
        pass
Пример #19
0
    def fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        Returns
        -------
        self : returns an instance of self.
        """
        self._set_y_X(y, X)
        if X is not None:
            raise NotImplementedError(
                "Exogenous variables `X` are not yet supported.")
        self._set_fh(fh)
        if len(self.fh.to_in_sample(self.cutoff)) > 0:
            raise NotImplementedError(
                "In-sample predictions are not implemented")

        self.step_length_ = check_step_length(self.step_length)
        self.window_length_ = check_window_length(self.window_length)

        # for the direct reduction strategy, a separate forecaster is fitted
        # for each step ahead of the forecasting horizon
        self._cv = SlidingWindowSplitter(
            fh=self.fh.to_relative(self.cutoff),
            window_length=self.window_length_,
            step_length=self.step_length_,
            start_with_window=True,
        )

        # transform data using rolling window split
        X, Y_train = self._transform(y, X)

        # iterate over forecasting horizon
        self.regressors_ = []
        for i in range(len(self.fh)):
            y = Y_train[:, i]
            regressor = clone(self.regressor)
            regressor.fit(X, y)
            self.regressors_.append(regressor)

        self._is_fitted = True
        return self
Пример #20
0
def test_sliding_window_splitter(y, fh, window_length, step_length):
    """Test SlidingWindowSplitter."""
    cv = SlidingWindowSplitter(
        fh=fh,
        window_length=window_length,
        step_length=step_length,
        start_with_window=True,
    )
    train_windows, test_windows, _, n_splits = _check_cv(cv, y)

    assert np.vstack(train_windows).shape == (
        n_splits,
        _coerce_duration_to_int(duration=window_length, freq="D"),
    )
    assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
Пример #21
0
def test_raises_not_fitted_error(Forecaster):
    f = _construct_instance(Forecaster)

    with pytest.raises(NotFittedError):
        f.update(y_test, update_params=False)

    with pytest.raises(NotFittedError):
        cv = SlidingWindowSplitter(fh=1, window_length=1)
        f.update_predict(y_test, cv=cv)

    try:
        with pytest.raises(NotFittedError):
            f.get_fitted_params()
    except NotImplementedError:
        pass
Пример #22
0
def test_sliding_window_splitter_with_initial_window(
    y, fh, window_length, step_length, initial_window
):
    cv = SlidingWindowSplitter(
        fh=fh,
        window_length=window_length,
        step_length=step_length,
        initial_window=initial_window,
        start_with_window=True,
    )
    train_windows, test_windows, _, n_splits = _check_cv(cv, y)

    assert train_windows[0].shape[0] == initial_window
    assert np.vstack(train_windows[1:]).shape == (n_splits - 1, window_length)
    assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
Пример #23
0
    def fit(self, y_train, fh=None, X_train=None):
        """Fit to training data.

        Parameters
        ----------
        y_train : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X_train : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        Returns
        -------
        self : returns an instance of self.
        """
        # input checks
        if X_train is not None:
            raise NotImplementedError()

        self._set_oh(y_train)
        self._set_fh(fh)
        if np.any(self.fh <= 0):
            raise NotImplementedError(
                "in-sample predictions are not implemented")

        self.step_length_ = check_step_length(self.step_length)
        self.window_length_ = check_window_length(self.window_length)

        # for the direct reduction strategy, a separate forecaster is fitted
        # for each step ahead of the forecasting horizon
        self._cv = SlidingWindowSplitter(fh=self.fh,
                                         window_length=self.window_length_,
                                         step_length=self.step_length_,
                                         start_with_window=True)

        # transform data using rolling window split
        X_train, Y_train = self._transform(y_train, X_train)

        # iterate over forecasting horizon
        self.regressors_ = []
        for i in range(len(self.fh)):
            y_train = Y_train[:, i]
            regressor = clone(self.regressor)
            regressor.fit(X_train, y_train)
            self.regressors_.append(regressor)

        self._is_fitted = True
        return self
Пример #24
0
def test_sliding_window_splitter_start_with_empty_window(
    y, fh, window_length, step_length
):
    cv = SlidingWindowSplitter(
        fh=fh,
        window_length=window_length,
        step_length=step_length,
        start_with_window=False,
    )
    train_windows, test_windows, _, n_splits = _check_cv(cv, y, allow_empty_window=True)

    assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))

    n_incomplete = _get_n_incomplete_windows(window_length, step_length)
    train_windows = train_windows[n_incomplete:]
    assert np.vstack(train_windows).shape == (n_splits - n_incomplete, window_length)
Пример #25
0
    def fit(self, y_train, fh=None, X_train=None):
        """Fit to training data.

        Parameters
        ----------
        y_train : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X_train : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        Returns
        -------
        self : returns an instance of self.
        """
        # input checks
        if X_train is not None:
            raise NotImplementedError()

        # set values
        self._set_y_X(y_train, X_train)
        self._set_fh(fh)
        # Set this and then call the super method, that should be enought I think .....
        self._nbr_dependent = y_train.shape[1]

        self.step_length_ = check_step_length(self.step_length)
        self.window_length_ = check_window_length(self.window_length)

        # set up cv iterator, for recursive strategy, a single estimator
        # is fit for a one-step-ahead forecasting horizon and then called
        # iteratively to predict multiple steps ahead
        self._cv = SlidingWindowSplitter(
            fh=1,
            window_length=self.window_length_,
            step_length=self.step_length_,
            start_with_window=True,
        )

        # transform data into tabular form
        X_train_tab, y_train_tab = self._transform(y_train, X_train)
        # fit base regressor
        regressor = clone(self.regressor)
        regressor.fit(X_train_tab, y_train_tab)
        self.regressor_ = regressor

        self._is_fitted = True
        return self
Пример #26
0
    def fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        Returns
        -------
        self : returns an instance of self.
        """
        self._set_y_X(y, X)
        if X is not None:
            raise NotImplementedError(
                "Exogenous variables `X` are not yet supported.")
        self._set_fh(fh)
        if len(self.fh.to_in_sample(self.cutoff)) > 0:
            raise NotImplementedError(
                "In-sample predictions are not implemented")

        self.step_length_ = check_step_length(self.step_length)
        self.window_length_ = check_window_length(self.window_length)

        # for the multioutput reduction strategy, a single forecaster is fitted
        # simultaneously to all the future steps in the forecasting horizon
        # by reducing to a forecaster that can handle multi-dimensional outputs
        self._cv = SlidingWindowSplitter(
            fh=self.fh.to_relative(self.cutoff),
            window_length=self.window_length_,
            step_length=self.step_length_,
            start_with_window=True,
        )

        # transform data using rolling window split
        X, Y_train = self._transform(y, X)

        # fit regressor to training data
        regressor = clone(self.regressor)
        regressor.fit(X, Y_train)
        self.regressor_ = regressor

        self._is_fitted = True
        return self
Пример #27
0
    def test_raises_not_fitted_error(self, estimator_instance):
        """Test that calling post-fit methods before fit raises error."""
        # We here check extra method of the forecaster API: update and update_predict.
        with pytest.raises(NotFittedError):
            estimator_instance.update(y_test, update_params=False)

        with pytest.raises(NotFittedError):
            cv = SlidingWindowSplitter(fh=1,
                                       window_length=1,
                                       start_with_window=False)
            estimator_instance.update_predict(y_test, cv=cv)

        try:
            with pytest.raises(NotFittedError):
                estimator_instance.get_fitted_params()
        except NotImplementedError:
            pass
Пример #28
0
def test_sliding_window_splitter_with_incompatible_initial_window_and_window_length(
        y, fh, window_length, step_length, initial_window):
    """Test SlidingWindowSplitter with incompatible initial_window and window_length."""
    if not _windows_are_incompatible(initial_window, window_length):
        pytest.skip(
            "Compatible initial_window and window_length are tested elsewhere."
        )
    cv = SlidingWindowSplitter(
        fh=fh,
        window_length=window_length,
        step_length=step_length,
        initial_window=initial_window,
        start_with_window=True,
    )
    match = "The `initial_window` and `window_length` types are incompatible"
    with pytest.raises(ValueError, match=match):
        _check_cv(cv, y)
Пример #29
0
def test_raises_not_fitted_error(Forecaster):
    # We here check extra method of the forecaster API: update and update_predict.
    f = _construct_instance(Forecaster)

    # predict is check in test suite for all estimators
    with pytest.raises(NotFittedError):
        f.update(y_test, update_params=False)

    with pytest.raises(NotFittedError):
        cv = SlidingWindowSplitter(fh=1, window_length=1)
        f.update_predict(y_test, cv=cv)

    try:
        with pytest.raises(NotFittedError):
            f.get_fitted_params()
    except NotImplementedError:
        pass
Пример #30
0
def _check_update_predict_predicted_index(Forecaster, fh, window_length,
                                          step_length, update_params):
    y = make_forecasting_problem(all_positive=True, index_type="datetime")
    y_train, y_test = temporal_train_test_split(y)
    cv = SlidingWindowSplitter(
        fh,
        window_length=window_length,
        step_length=step_length,
        start_with_window=False,
    )
    f = _construct_instance(Forecaster)
    f.fit(y_train, fh=fh)
    y_pred = f.update_predict(y_test, cv=cv, update_params=update_params)
    assert isinstance(y_pred, (pd.Series, pd.DataFrame))
    expected = _get_expected_index_for_update_predict(y_test, fh, step_length)
    actual = y_pred.index
    np.testing.assert_array_equal(actual, expected)