Exemplo n.º 1
0
def test_single_window_split(y, fh, window_length):
    cv = SingleWindowSplitter(fh=fh, window_length=window_length)
    training_windows, test_windows, n_splits, cutoffs = generate_and_check_windows(
        y, cv)

    training_window = training_windows[0]
    test_window = test_windows[0]

    assert n_splits == 1
    assert training_window.shape[0] == window_length
    assert training_window[-1] == cutoffs[0]
    assert test_window.shape[0] == len(check_fh(fh))
    np.testing.assert_array_equal(test_window,
                                  training_window[-1] + check_fh(fh))
Exemplo n.º 2
0
def test_sliding_window_splitter_start_with_empty_window(
        y, fh, window_length, step_length):
    """Test SlidingWindowSplitter."""
    if _inputs_are_supported([fh, window_length, step_length]):
        cv = SlidingWindowSplitter(
            fh=fh,
            window_length=window_length,
            step_length=step_length,
            start_with_window=False,
        )
        train_windows, test_windows, _, n_splits = _check_cv(
            cv, y, allow_empty_window=True)

        assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))

        n_incomplete = _get_n_incomplete_windows(window_length, step_length)
        train_windows = train_windows[n_incomplete:]

        assert np.vstack(train_windows).shape == (
            n_splits - n_incomplete,
            _coerce_duration_to_int(duration=window_length, freq="D"),
        )
    else:
        match = "Unsupported combination of types"
        with pytest.raises(TypeError, match=match):
            SlidingWindowSplitter(
                fh=fh,
                initial_window=None,
                window_length=window_length,
                step_length=step_length,
                start_with_window=False,
            )
Exemplo n.º 3
0
def test_sliding_window_splitter_with_initial_window(y, fh, window_length,
                                                     step_length,
                                                     initial_window):
    """Test SlidingWindowSplitter."""
    if _inputs_are_supported([fh, initial_window, window_length, step_length]):
        cv = SlidingWindowSplitter(
            fh=fh,
            window_length=window_length,
            step_length=step_length,
            initial_window=initial_window,
            start_with_window=True,
        )
        train_windows, test_windows, _, n_splits = _check_cv(cv, y)

        assert train_windows[0].shape[0] == _coerce_duration_to_int(
            duration=initial_window, freq="D")
        assert np.vstack(train_windows[1:]).shape == (
            n_splits - 1,
            _coerce_duration_to_int(duration=window_length, freq="D"),
        )
        assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
    else:
        match = "Unsupported combination of types"
        with pytest.raises(TypeError, match=match):
            SlidingWindowSplitter(
                fh=fh,
                initial_window=initial_window,
                window_length=window_length,
                step_length=step_length,
                start_with_window=True,
            )
Exemplo n.º 4
0
def test_single_window_splitter_default_window_length(y, fh):
    """Test SingleWindowSplitter."""
    cv = SingleWindowSplitter(fh=fh)
    train_windows, test_windows, cutoffs, n_splits = _check_cv(cv, y)

    train_window = train_windows[0]
    test_window = test_windows[0]

    assert n_splits == 1
    checked_fh = check_fh(fh)
    assert test_window.shape[0] == len(checked_fh)

    fh = cv.get_fh()
    if fh.is_all_in_sample():
        assert train_window.shape[0] == len(y)
    else:
        if array_is_int(checked_fh):
            assert train_window.shape[0] == len(y) - checked_fh.max()
        else:
            assert train_window.shape[0] == len(
                y[y.index <= y.index.max() - checked_fh.max()])

    if array_is_int(checked_fh):
        test_window_expected = train_window[-1] + checked_fh
    else:
        test_window_expected = np.array([
            y.index.get_loc(y.index[train_window[-1]] + x) for x in checked_fh
        ])
    np.testing.assert_array_equal(test_window, test_window_expected)
Exemplo n.º 5
0
def test_expanding_window_splitter_start_with_empty_window(
        y, fh, initial_window, step_length):
    """Test ExpandingWindowSplitter."""
    if _inputs_are_supported([fh, initial_window, step_length]):
        cv = ExpandingWindowSplitter(
            fh=fh,
            initial_window=initial_window,
            step_length=step_length,
            start_with_window=True,
        )
        train_windows, test_windows, _, n_splits = _check_cv(cv, y)
        assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))

        n_incomplete = _get_n_incomplete_windows(initial_window, step_length)
        train_windows = train_windows[n_incomplete:]
        _check_expanding_windows(train_windows)
    else:
        match = "Unsupported combination of types"
        with pytest.raises(TypeError, match=match):
            ExpandingWindowSplitter(
                fh=fh,
                initial_window=initial_window,
                step_length=step_length,
                start_with_window=True,
            )
Exemplo n.º 6
0
    def _set_fh(self, fh):
        """Check, set and update the forecasting horizon.

        Parameters
        ----------
        fh : None, int, list or np.ndarray
        """
        if fh is None:
            if self.is_fitted:
                # if no fh passed and there is none already, raise error
                if self._fh is None:
                    raise ValueError(
                        "The forecasting horizon `fh` must be passed "
                        "either to `fit` or `predict`, "
                        "but was found in neither.")
                # otherwise if no fh passed, but there is one already,
                # we can simply use that one
        else:
            # If fh is passed, validate first, then check if there is one
            # already,
            # and overwrite

            # A warning should only be raised if fh passed to fit is
            # overwritten, but no warning is required when no fh has been provided in
            # fit, and different fhs are passed to predict, but this requires
            # to keep track of whether fh has been passed to fit or not, hence not
            # implemented for cutoff.
            fh = check_fh(fh)
            self._fh = fh
Exemplo n.º 7
0
def _get_expected_index_for_update_predict(y, fh, step_length):
    """Helper function to compute expected time index from `update_predict`"""
    # time points at which to make predictions
    fh = check_fh(fh)
    index = y.index

    # only works with date-time index
    assert isinstance(index, pd.DatetimeIndex)
    assert hasattr(index, "freq") and index.freq is not None
    assert fh.is_relative

    freq = index.freq
    start = index[0] - 1 * freq  # initial cutoff
    end = index[-1]  # last point to predict

    # generate date-time range
    cutoffs = pd.date_range(start, end)

    # only predict at time points if all steps in fh can be predicted before
    # the end of y_test
    cutoffs = cutoffs[cutoffs + max(fh) * freq <= max(index)]

    # apply step length and recast to ignore inferred freq value
    cutoffs = cutoffs[::step_length]
    cutoffs = pd.DatetimeIndex(cutoffs, freq=None)

    # generate all predicted time points, including duplicates from overlapping fh steps
    pred_index = pd.DatetimeIndex([])
    for step in fh:
        values = cutoffs + step * freq
        pred_index = pred_index.append(values)

    # return unique and sorted index
    return pred_index.unique().sort_values()
Exemplo n.º 8
0
def test_single_window_splitter(y, fh, window_length):
    """Test SingleWindowSplitter."""
    if _inputs_are_supported([fh, window_length]):
        cv = SingleWindowSplitter(fh=fh, window_length=window_length)
        train_windows, test_windows, cutoffs, n_splits = _check_cv(cv, y)

        train_window = train_windows[0]
        test_window = test_windows[0]
        assert n_splits == 1
        assert train_window.shape[0] == _coerce_duration_to_int(
            duration=window_length, freq="D")
        checked_fh = check_fh(fh)
        assert test_window.shape[0] == len(checked_fh)

        if array_is_int(checked_fh):
            test_window_expected = train_window[-1] + checked_fh
        else:
            test_window_expected = np.array([
                y.index.get_loc(y.index[train_window[-1]] + x)
                for x in checked_fh
            ])
        np.testing.assert_array_equal(test_window, test_window_expected)
    else:
        with pytest.raises(TypeError,
                           match="Unsupported combination of types"):
            SingleWindowSplitter(fh=fh, window_length=window_length)
Exemplo n.º 9
0
def test_pred_errors_against_y_test(fh):
    """Check prediction performance on airline dataset.

    Y_test must lie in the prediction interval with coverage=0.1.

    Arguments
    ---------
    fh: ForecastingHorizon, fh at which to test prediction

    Raises
    ------
    AssertionError - if point forecasts do not lie withing the prediction intervals
    """
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)

    f = ThetaForecaster()
    f.fit(y_train, fh=fh)

    intervals = f.predict_interval(fh=fh, coverage=[0.1])

    y_test = y_test.iloc[check_fh(fh) - 1]

    # Performance should be good enough that all point forecasts lie within the
    # prediction intervals.
    for ints in intervals:
        if ints[1] < 0.5:
            assert np.all(y_test > intervals[ints].values)
        else:
            assert np.all(y_test <= intervals[ints].values)
Exemplo n.º 10
0
def test_sliding_window_tranform_tabular(n_timepoints, window_length,
                                         n_variables, fh):
    y, X = _make_y_X(n_timepoints=n_timepoints, n_variables=n_variables)
    fh = check_fh(fh, enforce_relative=True)
    fh_max = fh[-1]
    effective_window_length = window_length + fh_max - 1

    yt, Xt = _sliding_window_transform(y,
                                       window_length=window_length,
                                       fh=fh,
                                       X=X,
                                       scitype="tabular-regressor")
    assert yt.shape == (n_timepoints - effective_window_length, len(fh))

    # Check y values for first step in fh.
    actual = yt[:, 0]
    start = window_length + fh[0] - 1
    end = start + n_timepoints - window_length - fh_max + 1
    expected = y[np.arange(start, end)]
    np.testing.assert_array_equal(actual, expected)

    # The transformed Xt array contains lagged values for each variable, excluding the
    # n_variables + 1 contemporaneous values for the exogenous variables.
    # assert Xt.shape == (yt.shape[0], (window_length * n_variables) + n_variables - 1)
    assert Xt.shape == (yt.shape[0], window_length * n_variables)
    assert np.all(Xt < yt[:, [0]])
Exemplo n.º 11
0
def test_dummy_regressor_mean_prediction_endogenous_only(
    fh, window_length, strategy, scitype
):
    """Test dummy regressor mean prediction endogenous_only.

    The DummyRegressor ignores the input feature data X, hence we can use it for
    testing reduction from forecasting to both tabular and time series regression.
    The DummyRegressor also supports the 'multioutput' strategy.
    """
    y = make_forecasting_problem()
    fh = check_fh(fh)
    y_train, y_test = temporal_train_test_split(y, fh=fh)

    regressor = DummyRegressor(strategy="mean")
    forecaster = make_reduction(
        regressor, scitype=scitype, window_length=window_length, strategy=strategy
    )
    forecaster.fit(y_train, fh=fh)
    actual = forecaster.predict()

    if strategy == "recursive":
        # For the recursive strategy, we always use the first-step ahead as the
        # target vector in the regression problem during training, regardless of the
        # actual forecasting horizon.
        effective_window_length = window_length
    else:
        # For the other strategies, we split the data taking into account the steps
        # ahead we want to predict.
        effective_window_length = window_length + max(fh) - 1

    # In the sliding-window transformation, the first values of the target series
    # make up the first window and are not used in the transformed target vector. So
    # the expected result should be the mean of the remaining values.
    expected = np.mean(y_train[effective_window_length:])
    np.testing.assert_array_almost_equal(actual, expected)
Exemplo n.º 12
0
def _split_by_fh(y, fh, X=None):
    """Helper function to split time series with forecasting horizon handling both
    relative and absolute horizons"""
    if X is not None:
        check_equal_time_index(y, X)
    fh = check_fh(fh)
    idx = fh.to_pandas()
    index = y.index

    if fh.is_relative:
        if not fh.is_all_out_of_sample():
            raise ValueError("`fh` must only contain out-of-sample values")
        max_step = idx.max()
        steps = fh.to_indexer()
        train = index[:-max_step]
        test = index[-max_step:]

        y_test = y.loc[test[steps]]

    else:
        min_step, max_step = idx.min(), idx.max()
        train = index[index < min_step]
        test = index[(index <= max_step) & (min_step <= index)]

        y_test = y.loc[idx]

    y_train = y.loc[train]
    if X is None:
        return y_train, y_test

    else:
        X_train = X.loc[train]
        X_test = X.loc[test]
        return y_train, y_test, X_train, X_test
Exemplo n.º 13
0
def test_sliding_window_tranform_panel(n_timepoints, window_length,
                                       n_variables, fh):
    y, X = _make_y_X(n_timepoints=n_timepoints, n_variables=n_variables)
    fh = check_fh(fh, enforce_relative=True)
    fh_max = fh[-1]
    effective_window_length = window_length + fh_max - 1

    yt, Xt = _sliding_window_transform(y,
                                       window_length=window_length,
                                       X=X,
                                       fh=fh,
                                       scitype="time-series-regressor")
    assert yt.shape == (n_timepoints - effective_window_length, len(fh))

    # Check y values.
    actual = yt[:, 0]
    start = window_length + fh[0] - 1
    end = start + n_timepoints - window_length - fh_max + 1
    expected = y[np.arange(start, end)]
    np.testing.assert_array_equal(actual, expected)

    # Given the input data, all of the value in the transformed Xt array should be
    # smaller than the transformed yt target array.
    assert Xt.shape == (yt.shape[0], n_variables, window_length)
    assert np.all(Xt < yt[:, np.newaxis, [0]])
Exemplo n.º 14
0
def test_strategy_mean_seasonal(fh, sp, window_length):
    if (window_length is not None
            and window_length > sp) or (window_length is None):
        f = NaiveForecaster(strategy="mean",
                            sp=sp,
                            window_length=window_length)
        f.fit(y_train)
        y_pred = f.predict(fh)

        # check predicted index
        _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)

        if window_length is None:
            window_length = len(y_train)

        # check values
        fh = check_fh(fh)  # get well formatted fh
        reps = int(np.ceil(max(fh) / sp))
        last_window = y_train.iloc[-window_length:].to_numpy().astype(float)
        last_window = np.pad(
            last_window,
            (sp - len(last_window) % sp, 0),
            "constant",
            constant_values=np.nan,
        )

        last_window = last_window.reshape(int(np.ceil(len(last_window) / sp)),
                                          sp)
        expected = np.tile(np.nanmean(last_window, axis=0), reps=reps)[fh - 1]
        np.testing.assert_array_equal(y_pred, expected)
Exemplo n.º 15
0
def test_single_window_splitter_default_window_length(y, fh):
    cv = SingleWindowSplitter(fh=fh)
    train_windows, test_windows, cutoffs, n_splits = _check_cv(cv, y)

    train_window = train_windows[0]
    test_window = test_windows[0]

    assert n_splits == 1
    assert test_window.shape[0] == len(check_fh(fh))

    fh = cv.get_fh()
    if fh.is_all_in_sample():
        assert train_window.shape[0] == len(y)
    else:
        assert train_window.shape[0] == len(y) - fh.max()

    np.testing.assert_array_equal(test_window, train_window[-1] + check_fh(fh))
Exemplo n.º 16
0
    def get_fh(self) -> ForecastingHorizon:
        """Return the forecasting horizon.

        Returns
        -------
        fh : ForecastingHorizon
            The forecasting horizon
        """
        return check_fh(self.fh)
Exemplo n.º 17
0
    def _split_windows(self, y):
        window_length = check_window_length(self.window_length)
        fh = check_fh(self.fh)
        check_fh_is_relative(fh)

        end = self._get_end(y) - 1
        start = 0 if window_length is None else end - window_length
        training_window = np.arange(start, end)
        test_window = end + fh - 1
        yield training_window, test_window
Exemplo n.º 18
0
def test_expanding_window_splitter(y, fh, initial_window, step_length):
    cv = ExpandingWindowSplitter(
        fh=fh,
        initial_window=initial_window,
        step_length=step_length,
        start_with_window=True,
    )
    train_windows, test_windows, _, n_splits = _check_cv(cv, y)
    assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
    assert train_windows[0].shape[0] == initial_window
    _check_expanding_windows(train_windows)
Exemplo n.º 19
0
def test_sliding_window_splitter(y, fh, window_length, step_length):
    cv = SlidingWindowSplitter(
        fh=fh,
        window_length=window_length,
        step_length=step_length,
        start_with_window=True,
    )
    train_windows, test_windows, _, n_splits = _check_cv(cv, y)

    assert np.vstack(train_windows).shape == (n_splits, window_length)
    assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
Exemplo n.º 20
0
def test_sliding_window_transform_against_cv(n_timepoints, window_length, fh, scitype):
    """Test sliding window transform against cv."""
    fh = check_fh(fh)
    y = pd.Series(_make_y(0, n_timepoints))
    cv = SlidingWindowSplitter(fh=fh, window_length=window_length)
    xa, ya = _get_windows(cv, y)
    yb, xb = _sliding_window_transform(y, window_length, fh, scitype=scitype)
    np.testing.assert_array_equal(ya, yb)
    if scitype == "time-series-regressor":
        xb = xb.squeeze(axis=1)

    np.testing.assert_array_equal(xa, xb)
Exemplo n.º 21
0
def test_expanding_window_splitter(y, fh, initial_window, step_length):
    """Test ExpandingWindowSplitter."""
    cv = ExpandingWindowSplitter(
        fh=fh,
        initial_window=initial_window,
        step_length=step_length,
        start_with_window=True,
    )
    train_windows, test_windows, _, n_splits = _check_cv(cv, y)
    assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
    assert train_windows[0].shape[0] == _coerce_duration_to_int(
        duration=initial_window, freq="D")
    _check_expanding_windows(train_windows)
Exemplo n.º 22
0
def _check_cutoffs_against_test_windows(cutoffs, windows, fh, y):
    # We check for the last value. Some windows may be incomplete, with no first
    # value, whereas the last value will always be there.
    fh = check_fh(fh)
    if is_int(fh[-1]):
        expected = np.array([window[-1] - fh[-1] for window in windows])
    elif array_is_timedelta_or_date_offset(fh):
        expected = np.array([(y.index[window[-1]] - fh[-1]).to_datetime64()
                             for window in windows])
    else:
        raise ValueError(
            f"Provided `fh` type is not supported: {type(fh[-1])}")
    np.testing.assert_array_equal(cutoffs, expected)
Exemplo n.º 23
0
    def _split_windows(self, y):
        step_length = check_step_length(self.step_length)
        window_length = check_window_length(self.window_length)
        fh = check_fh(self.fh)
        check_fh_is_relative(fh)

        end = self._get_end(y)
        start = self._get_start()
        for split_point in range(start, end, step_length):
            training_window = np.arange(split_point - window_length,
                                        split_point)
            test_window = split_point + fh - 1
            yield training_window, test_window
Exemplo n.º 24
0
def test_pred_errors_against_y_test(fh):
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)
    f = ThetaForecaster()
    f.fit(y_train, fh)
    y_pred = f.predict(return_pred_int=False)
    errors = f._compute_pred_errors(alpha=0.1)
    if isinstance(errors, pd.Series):
        errors = [errors]  # make iterable
    y_test = y_test.iloc[check_fh(fh) - 1]
    for error in errors:
        assert np.all(y_test > y_pred - error)
        assert np.all(y_test < y_pred + error)
Exemplo n.º 25
0
def test_strategy_last_seasonal(fh, sp):
    f = NaiveForecaster(strategy="last", sp=sp)
    f.fit(y_train)
    y_pred = f.predict(fh)

    # check predicted index
    _assert_correct_pred_time_index(y_pred.index, y_train.index[-1], fh)

    # check values
    fh = check_fh(fh)  # get well formatted fh
    reps = int(np.ceil(max(fh) / sp))
    expected = np.tile(y_train.iloc[-sp:], reps=reps)[fh - 1]
    np.testing.assert_array_equal(y_pred, expected)
Exemplo n.º 26
0
def test_score(Forecaster, fh):
    # Check score method
    f = _construct_instance(Forecaster)
    f.fit(y_train, fh=fh)
    y_pred = f.predict()

    fh_idx = check_fh(fh).to_indexer()  # get zero based index
    expected = smape_loss(y_pred, y_test.iloc[fh_idx])

    # compare with actual score
    f = _construct_instance(Forecaster)
    f.fit(y_train, fh=fh)
    actual = f.score(y_test.iloc[fh_idx], fh=fh)
    assert actual == expected
Exemplo n.º 27
0
    def _set_fh(self, fh):
        """Check, set and update the forecasting horizon.

        Parameters
        ----------
        fh : None, int, list, np.ndarray
        """

        msg = (
            f"This is because fitting of the `"
            f"{self.__class__.__name__}` "
            f"depends on `fh`. "
        )

        if hasattr(self, "is_fitted"):
            is_fitted = self.is_fitted
        else:
            raise AttributeError("No `is_fitted` attribute found")

        if fh is None:
            if is_fitted:
                # intended workflow, no fh is passed when the forecaster is
                # already fitted
                pass
            else:
                # fh must be passed when forecaster is not fitted yet
                raise ValueError(
                    "The forecasting horizon `fh` must be passed to "
                    "`fit`, "
                    "but none was found. " + msg
                )
        else:
            fh = check_fh(fh)
            if is_fitted:
                if not np.array_equal(fh, self._fh):
                    # raise error if existing fh and new one don't match
                    raise ValueError(
                        "A different forecasting horizon `fh` has been "
                        "provided from "
                        "the one seen in `fit`. If you want to change the "
                        "forecasting "
                        "horizon, please re-fit the forecaster. " + msg
                    )
                # if existing one and new match, ignore new one
                pass
            else:
                # intended workflow: fh is passed when forecaster is not
                # fitted yet
                self._fh = fh
Exemplo n.º 28
0
def test_expanding_window_splitter_start_with_empty_window(
        y, fh, initial_window, step_length):
    """Test ExpandingWindowSplitter."""
    cv = ExpandingWindowSplitter(
        fh=fh,
        initial_window=initial_window,
        step_length=step_length,
        start_with_window=True,
    )
    train_windows, test_windows, _, n_splits = _check_cv(cv, y)
    assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))

    n_incomplete = _get_n_incomplete_windows(initial_window, step_length)
    train_windows = train_windows[n_incomplete:]
    _check_expanding_windows(train_windows)
Exemplo n.º 29
0
def test_sliding_window_splitter(y, fh, window_length, step_length):
    """Test SlidingWindowSplitter."""
    cv = SlidingWindowSplitter(
        fh=fh,
        window_length=window_length,
        step_length=step_length,
        start_with_window=True,
    )
    train_windows, test_windows, _, n_splits = _check_cv(cv, y)

    assert np.vstack(train_windows).shape == (
        n_splits,
        _coerce_duration_to_int(duration=window_length, freq="D"),
    )
    assert np.vstack(test_windows).shape == (n_splits, len(check_fh(fh)))
Exemplo n.º 30
0
def test_strategy_drift_unit_slope(fh, window_length):
    # drift strategy for constant slope 1
    if window_length != 1:
        f = NaiveForecaster(strategy="drift", window_length=window_length)
        f.fit(y_train)
        y_pred = f.predict(fh)

        if window_length is None:
            window_length = len(y_train)

        # get well formatted fh values
        fh = check_fh(fh)

        expected = y_train.iloc[-1] + np.arange(0, max(fh) + 1)[fh]
        np.testing.assert_array_equal(y_pred, expected)