Exemplo n.º 1
0
def test_predict_residuals(Forecaster, index_type, fh_type, is_relative,
                           steps):
    """Check that predict_residuals method works as expected."""
    f = Forecaster.create_test_instance()
    n_columns_list = _get_n_columns(f.get_tag("scitype:y"))

    for n_columns in n_columns_list:
        f = Forecaster.create_test_instance()
        y_train = _make_series(n_columns=n_columns,
                               index_type=index_type,
                               n_timepoints=50)
        cutoff = y_train.index[-1]
        fh = _make_fh(cutoff, steps, fh_type, is_relative)
        try:
            f.fit(y_train, fh=fh)
            y_pred = f.predict()

            y_test = _make_series(n_columns=n_columns,
                                  index_type=index_type,
                                  n_timepoints=len(y_pred))
            y_test.index = y_pred.index
            y_res = f.predict_residuals(y_test)
            _assert_correct_pred_time_index(y_res.index,
                                            y_train.index[-1],
                                            fh=fh)
        except NotImplementedError:
            pass
Exemplo n.º 2
0
def make_forecasting_problem(
    n_timepoints=50,
    all_positive=True,
    index_type=None,
    make_X=False,
    n_columns=2,
    random_state=None,
):
    y = _make_series(
        n_timepoints=n_timepoints,
        n_columns=1,
        all_positive=all_positive,
        index_type=index_type,
        random_state=random_state,
    )

    if not make_X:
        return y

    X = _make_series(
        n_timepoints=n_timepoints,
        n_columns=n_columns,
        all_positive=all_positive,
        index_type=index_type,
        random_state=random_state,
    )
    X.index = y.index
    return y, X
Exemplo n.º 3
0
    def test_predict_residuals(self, estimator_instance, n_columns,
                               index_fh_comb, fh_int):
        """Check that predict_residuals method works as expected."""
        index_type, fh_type, is_relative = index_fh_comb
        if fh_type == "timedelta":
            # workaround to ensure check_estimator without breaking e.g. debugging
            return None
            # todo: ensure check_estimator works with pytest.skip like below
            # pytest.skip(
            #    "ForecastingHorizon with timedelta values "
            #     "is currently experimental and not supported everywhere"
            # )
        y_train = _make_series(n_columns=n_columns,
                               index_type=index_type,
                               n_timepoints=50)
        cutoff = y_train.index[-1]
        fh = _make_fh(cutoff, fh_int, fh_type, is_relative)
        try:
            estimator_instance.fit(y_train, fh=fh)
            y_pred = estimator_instance.predict()

            y_test = _make_series(n_columns=n_columns,
                                  index_type=index_type,
                                  n_timepoints=len(y_pred))
            y_test.index = y_pred.index
            y_res = estimator_instance.predict_residuals(y_test)
            _assert_correct_pred_time_index(y_res.index,
                                            y_train.index[-1],
                                            fh=fh)
        except NotImplementedError:
            pass
Exemplo n.º 4
0
def test_metric_output(metric, multioutput, n_columns):
    """Test output is correct class."""
    y_pred = _make_series(n_columns=n_columns,
                          n_timepoints=20,
                          random_state=21)
    y_true = _make_series(n_columns=n_columns,
                          n_timepoints=20,
                          random_state=42)

    # coerce to DataFrame since _make_series does not return consisten output type
    y_pred = pd.DataFrame(y_pred)
    y_true = pd.DataFrame(y_true)

    res = metric(
        y_true=y_true,
        y_pred=y_pred,
        multioutput=multioutput,
        y_pred_benchmark=y_pred,
        y_train=y_true,
    )

    if multioutput == "uniform_average":
        assert isinstance(res, float)
    elif multioutput == "raw_values":
        assert isinstance(res, np.ndarray)
        assert res.ndim == 1
        assert len(res) == len(y_true.columns)
Exemplo n.º 5
0
def test_get_alignment_loc(Aligner):
    """Test that get_alignment returns an alignment (loc)."""
    f = Aligner.create_test_instance()

    X = [_make_series(n_columns=2), _make_series(n_columns=2)]
    align = f.fit(X).get_alignment_loc()

    check_raise(align, mtype="alignment_loc", scitype="Alignment")
Exemplo n.º 6
0
def make_forecasting_problem(
    n_timepoints=50,
    all_positive=True,
    index_type=None,
    make_X=False,
    n_columns=1,
    random_state=None,
):
    """Return test data for forecasting tests.

    Parameters
    ----------
    n_timepoints : int, optional
        Lenght of data, by default 50
    all_positive : bool, optional
        Only positive values or not, by default True
    index_type : e.g. pd.PeriodIndex, optional
        pandas Index type, by default None
    make_X : bool, optional
        Should X data also be returned, by default False
    n_columns : int, optional
        Number of columns of y, by default 1
    random_state : inst, str, float, optional
        Set seed of random state, by default None

    Returns
    -------
    ps.Series, pd.DataFrame
        y, if not make_X
        y, X if make_X
    """
    y = _make_series(
        n_timepoints=n_timepoints,
        n_columns=n_columns,
        all_positive=all_positive,
        index_type=index_type,
        random_state=random_state,
    )

    if not make_X:
        return y

    X = _make_series(
        n_timepoints=n_timepoints,
        n_columns=2,
        all_positive=all_positive,
        index_type=index_type,
        random_state=random_state,
    )
    X.index = y.index
    return y, X
Exemplo n.º 7
0
    def test_y_multivariate_raises_error(self, estimator_instance):
        """Test that wrong y scitype raises error (uni/multivariate not supported)."""
        if estimator_instance.get_tag("scitype:y") == "univariate":
            y = _make_series(n_columns=2)
            with pytest.raises(ValueError, match=r"univariate"):
                estimator_instance.fit(y, fh=FH0)

        if estimator_instance.get_tag("scitype:y") == "multivariate":
            y = _make_series(n_columns=1)
            with pytest.raises(ValueError, match=r"two or more variables"):
                estimator_instance.fit(y, fh=FH0)

        if estimator_instance.get_tag("scitype:y") == "both":
            pass
Exemplo n.º 8
0
def test_y_multivariate_raises_error(Forecaster):
    """Test that wrong y scitype raises error (uni/multivariate if not supported)."""
    f = _construct_instance(Forecaster)

    if f.get_tag("scitype:y") == "univariate":

        y = _make_series(n_columns=2)
        with pytest.raises(ValueError, match=r"univariate"):
            f.fit(y, fh=FH0)

    elif f.get_tag("scitype:y") == "multivariate":

        y = _make_series(n_columns=1)
        with pytest.raises(ValueError, match=r"2 or more variables"):
            f.fit(y, fh=FH0)
Exemplo n.º 9
0
def test_X_invalid_type_raises_error(Forecaster, X):
    """Test that invalid X input types raise error."""
    f = _construct_instance(Forecaster)
    if f.get_tag("scitype:y") == "univariate" or f.get_tag(
            "scitype:y") == "both":
        y_train = _make_series(n_columns=1)

    elif f.get_tag("scitype:y") == "multivariate":
        y_train = _make_series(n_columns=2)
    try:
        with pytest.raises(TypeError, match=r"type"):
            f.fit(y_train, X, fh=FH0)
    except NotImplementedError as e:
        msg = str(e).lower()
        assert "exogenous" in msg
Exemplo n.º 10
0
def test_predict_quantiles(Forecaster, fh, alpha):
    """Check prediction quantiles returned by predict.

    Arguments
    ---------
    Forecaster: BaseEstimator class descendant, forecaster to test
    fh: ForecastingHorizon, fh at which to test prediction
    alpha: float, alpha at which to make prediction intervals

    Raises
    ------
    AssertionError - if Forecaster test instance has "capability:pred_int"
            and pred. int are not returned correctly when asking predict for them
    AssertionError - if Forecaster test instance does not have "capability:pred_int"
            and no NotImplementedError is raised when asking predict for pred.int
    """
    f = Forecaster.create_test_instance()
    n_columns_list = _get_n_columns(f.get_tag("scitype:y"))
    for n_columns in n_columns_list:
        f = Forecaster.create_test_instance()
        y_train = _make_series(n_columns=n_columns)
        f.fit(y_train, fh=fh)
        try:
            quantiles = f.predict_quantiles(fh=fh, alpha=alpha)
            _check_predict_quantiles(quantiles, y_train, fh, alpha)
        except NotImplementedError:
            pass
Exemplo n.º 11
0
    def test__y_and_cutoff(self, estimator_instance, n_columns):
        """Check cutoff and _y."""
        # check _y and cutoff is None after construction
        f = estimator_instance

        y = _make_series(n_columns=n_columns)
        y_train, y_test = temporal_train_test_split(y, train_size=0.75)

        # check that _y and cutoff are empty when estimator is constructed
        assert f._y is None
        assert f.cutoff is None

        # check that _y and cutoff is updated during fit
        f.fit(y_train, fh=FH0)
        # assert isinstance(f._y, pd.Series)
        # action:uncomments the line above
        # why: fails for multivariates cause they are DataFrames
        # solution: look for a general solution for Series and DataFrames
        assert len(f._y) > 0
        assert f.cutoff == y_train.index[-1]

        # check data pointers
        np.testing.assert_array_equal(f._y.index, y_train.index)

        # check that _y and cutoff is updated during update
        f.update(y_test, update_params=False)
        np.testing.assert_array_equal(f._y.index,
                                      np.append(y_train.index, y_test.index))
        assert f.cutoff == y_test.index[-1]
Exemplo n.º 12
0
 def test_update_predict_predicted_index(
     self,
     estimator_instance,
     n_columns,
     fh_int_oos,
     window_length,
     step_length,
     update_params,
 ):
     """Check predicted index in update_predict."""
     y = _make_series(n_columns=n_columns,
                      all_positive=True,
                      index_type="datetime")
     y_train, y_test = temporal_train_test_split(y)
     cv = SlidingWindowSplitter(
         fh_int_oos,
         window_length=window_length,
         step_length=step_length,
         start_with_window=False,
     )
     estimator_instance.fit(y_train, fh=fh_int_oos)
     y_pred = estimator_instance.update_predict(y_test,
                                                cv=cv,
                                                update_params=update_params)
     assert isinstance(y_pred, (pd.Series, pd.DataFrame))
     expected = _get_expected_index_for_update_predict(
         y_test, fh_int_oos, step_length)
     actual = y_pred.index
     np.testing.assert_array_equal(actual, expected)
Exemplo n.º 13
0
    def test_predict_quantiles(self, estimator_instance, n_columns, fh_int_oos,
                               alpha):
        """Check prediction quantiles returned by predict.

        Arguments
        ---------
        Forecaster: BaseEstimator class descendant, forecaster to test
        fh: ForecastingHorizon, fh at which to test prediction
        alpha: float, alpha at which to make prediction intervals

        Raises
        ------
        AssertionError - if Forecaster test instance has "capability:pred_int"
                and pred. int are not returned correctly when asking predict for them
        AssertionError - if Forecaster test instance does not have "capability:pred_int"
                and no NotImplementedError is raised when asking predict for pred.int
        """
        y_train = _make_series(n_columns=n_columns)
        estimator_instance.fit(y_train, fh=fh_int_oos)
        try:
            quantiles = estimator_instance.predict_quantiles(fh=fh_int_oos,
                                                             alpha=alpha)
            self._check_predict_quantiles(quantiles, y_train, fh_int_oos,
                                          alpha)
        except NotImplementedError:
            pass
Exemplo n.º 14
0
    def test_predict_interval(self, estimator_instance, n_columns, fh_int_oos,
                              alpha):
        """Check prediction intervals returned by predict.

        Arguments
        ---------
        Forecaster: BaseEstimator class descendant, forecaster to test
        fh: ForecastingHorizon, fh at which to test prediction
        alpha: float, coverage at which to make prediction intervals

        Raises
        ------
        AssertionError - if Forecaster test instance has "capability:pred_int"
                and pred. int are not returned correctly when asking predict for them
        AssertionError - if Forecaster test instance does not have "capability:pred_int"
                and no NotImplementedError is raised when asking predict for pred.int
        """
        y_train = _make_series(n_columns=n_columns)
        estimator_instance.fit(y_train, fh=fh_int_oos)
        if estimator_instance.get_tag("capability:pred_int"):

            pred_ints = estimator_instance.predict_interval(fh_int_oos,
                                                            coverage=alpha)
            assert check_is_mtype(pred_ints,
                                  mtype="pred_interval",
                                  scitype="Proba")

        else:
            with pytest.raises(NotImplementedError,
                               match="prediction intervals"):
                estimator_instance.predict_interval(fh_int_oos, coverage=alpha)
Exemplo n.º 15
0
    def test_predict_time_index_with_X(self, estimator_instance, n_columns,
                                       index_fh_comb, fh_int_oos):
        """Check that predicted time index matches forecasting horizon."""
        index_type, fh_type, is_relative = index_fh_comb
        if fh_type == "timedelta":
            return None
            # todo: ensure check_estimator works with pytest.skip like below
            # pytest.skip(
            #    "ForecastingHorizon with timedelta values "
            #     "is currently experimental and not supported everywhere"
            # )
        z, X = make_forecasting_problem(index_type=index_type, make_X=True)

        # Some estimators may not support all time index types and fh types, hence we
        # need to catch NotImplementedErrors.
        y = _make_series(n_columns=n_columns, index_type=index_type)
        cutoff = y.index[len(y) // 2]
        fh = _make_fh(cutoff, fh_int_oos, fh_type, is_relative)

        y_train, _, X_train, X_test = temporal_train_test_split(y, X, fh=fh)

        try:
            estimator_instance.fit(y_train, X_train, fh=fh)
            y_pred = estimator_instance.predict(X=X_test)
            _assert_correct_pred_time_index(y_pred.index, y_train.index[-1],
                                            fh)
        except NotImplementedError:
            pass
Exemplo n.º 16
0
    def test_predict_time_index(self, estimator_instance, n_columns,
                                index_fh_comb, fh_int):
        """Check that predicted time index matches forecasting horizon."""
        index_type, fh_type, is_relative = index_fh_comb
        if fh_type == "timedelta":
            return None
            # todo: ensure check_estimator works with pytest.skip like below
            # pytest.skip(
            #    "ForecastingHorizon with timedelta values "
            #     "is currently experimental and not supported everywhere"
            # )
        y_train = _make_series(n_columns=n_columns,
                               index_type=index_type,
                               n_timepoints=50)
        cutoff = y_train.index[-1]
        fh = _make_fh(cutoff, fh_int, fh_type, is_relative)

        try:
            estimator_instance.fit(y_train, fh=fh)
            y_pred = estimator_instance.predict()
            _assert_correct_pred_time_index(y_pred.index,
                                            y_train.index[-1],
                                            fh=fh_int)
        except NotImplementedError:
            pass
Exemplo n.º 17
0
def test_split_by_fh(index_type, fh_type, is_relative, values):
    """Test temporal_train_test_split."""
    y = _make_series(20, index_type=index_type)
    cutoff = y.index[10]
    fh = _make_fh(cutoff, values, fh_type, is_relative)
    split = temporal_train_test_split(y, fh=fh)
    _check_train_test_split_y(fh, split)
Exemplo n.º 18
0
def test_predict_pred_interval(Forecaster, fh, alpha):
    """Check prediction intervals returned by predict.

    Arguments
    ---------
    Forecaster: BaseEstimator class descendant, forecaster to test
    fh: ForecastingHorizon, fh at which to test prediction
    alpha: float, alpha at which to make prediction intervals

    Raises
    ------
    AssertionError - if Forecaster test instance has "capability:pred_int"
            and pred. int are not returned correctly when asking predict for them
    AssertionError - if Forecaster test instance does not have "capability:pred_int"
            and no NotImplementedError is raised when asking predict for pred.int
    """
    f = _construct_instance(Forecaster)
    n_columns_list = _get_n_columns(f.get_tag("scitype:y"))

    for n_columns in n_columns_list:
        f = _construct_instance(Forecaster)
        y_train = _make_series(n_columns=n_columns)
        f.fit(y_train, fh=fh)
        if f.get_tag("capability:pred_int"):
            y_pred, pred_ints = f.predict(return_pred_int=True, alpha=alpha)
            _check_pred_ints(pred_ints, y_train, y_pred, fh)

        else:
            with pytest.raises(NotImplementedError, match="prediction intervals"):
                f.predict(return_pred_int=True, alpha=alpha)
def test_oh_setting(Forecaster):
    """Check cuttoff and _y."""
    # check _y and cutoff is None after construction
    f = Forecaster.create_test_instance()
    n_columns_list = _get_n_columns(f.get_tag("scitype:y"))

    for n_columns in n_columns_list:
        f = Forecaster.create_test_instance()
        y = _make_series(n_columns=n_columns)
        y_train, y_test = temporal_train_test_split(y, train_size=0.75)

        assert f._y is None
        assert f.cutoff is None

        # check that _y and cutoff is updated during fit
        f.fit(y_train, fh=FH0)
        # assert isinstance(f._y, pd.Series)
        # action:uncomments the line above
        # why: fails for multivariates cause they are DataFrames
        # solution: look for a general solution for Series and DataFrames
        assert len(f._y) > 0
        assert f.cutoff == y_train.index[-1]

        # check data pointers
        np.testing.assert_array_equal(f._y.index, y_train.index)

        # check that _y and cutoff is updated during update
        f.update(y_test, update_params=False)
        np.testing.assert_array_equal(f._y.index,
                                      np.append(y_train.index, y_test.index))
        assert f.cutoff == y_test.index[-1]
Exemplo n.º 20
0
def test_predict_time_index_with_X(Forecaster, index_type, fh_type,
                                   is_relative, steps):
    """Check that predicted time index matches forecasting horizon."""
    f = _construct_instance(Forecaster)
    n_columns_list = _get_n_columns(f.get_tag("scitype:y"))

    z, X = make_forecasting_problem(index_type=index_type, make_X=True)

    # Some estimators may not support all time index types and fh types, hence we
    # need to catch NotImplementedErrors.
    for n_columns in n_columns_list:
        f = _construct_instance(Forecaster)
        y = _make_series(n_columns=n_columns, index_type=index_type)
        cutoff = y.index[len(y) // 2]
        fh = _make_fh(cutoff, steps, fh_type, is_relative)

        y_train, y_test, X_train, X_test = temporal_train_test_split(y,
                                                                     X,
                                                                     fh=fh)

        try:
            f.fit(y_train, X_train, fh=fh)
            y_pred = f.predict(X=X_test)
            _assert_correct_pred_time_index(y_pred.index, y_train.index[-1],
                                            fh)
        except NotImplementedError:
            pass
Exemplo n.º 21
0
def test_wrong_power_type_raises_error(power_transformer, _power):
    y = _make_series(n_timepoints=75)

    # Test input types
    match = f"Expected `power` to be int or float, but found {type(_power)}."
    with pytest.raises(ValueError, match=match):
        transformer = power_transformer(power=_power)
        transformer.fit(y)
Exemplo n.º 22
0
 def test__y_when_refitting(self, estimator_instance, n_columns):
     """Test that _y is updated when forecaster is refitted."""
     y_train = _make_series(n_columns=n_columns)
     estimator_instance.fit(y_train, fh=FH0)
     estimator_instance.fit(y_train[3:], fh=FH0)
     # using np.squeeze to make the test flexible to shape differeces like
     # (50,) and (50, 1)
     assert np.all(
         np.squeeze(estimator_instance._y) == np.squeeze(y_train[3:]))
Exemplo n.º 23
0
def test_get_aligned(Aligner):
    """Test that get_aligned returns list of series with same columns."""
    f = Aligner.create_test_instance()

    X = [_make_series(n_columns=2), _make_series(n_columns=2)]
    n = len(X)
    X_aligned = f.fit(X).get_aligned()

    msg = f"{Aligner.__name__}.get_aligned must return list of pd.DataFrame"
    msg += ", same length as X in fit"
    col_msg = f"{Aligner.__name__}.get_aligned series must have same columns as in X"
    assert isinstance(X_aligned, list), msg
    assert len(X_aligned) == n, msg

    for i in range(n):
        Xi = X_aligned[i]
        assert isinstance(Xi, pd.DataFrame), msg
        assert set(Xi.columns) == set(X[i].columns), col_msg
Exemplo n.º 24
0
def test_sliding_window_splitter_initial_window_start_with_empty_window_raises_error():
    y = _make_series()
    cv = SlidingWindowSplitter(
        fh=1,
        initial_window=15,
        start_with_window=False,
    )
    message = "`start_with_window` must be True if `initial_window` is given"
    with pytest.raises(ValueError, match=message):
        next(cv.split(y))
Exemplo n.º 25
0
def test_metric_output_direct(metric, multioutput, n_columns):
    """Test output is of correct type, dependent on multioutput.

    Also tests that ways to call the metric yield equivalent results:
        1. using the __call__ dunder
        (currently just one way, but built for comparison and extensibility)
    """
    y_pred = _make_series(n_columns=n_columns,
                          n_timepoints=20,
                          random_state=21)
    y_true = _make_series(n_columns=n_columns,
                          n_timepoints=20,
                          random_state=42)

    # coerce to DataFrame since _make_series does not return consisten output type
    y_pred = pd.DataFrame(y_pred)
    y_true = pd.DataFrame(y_true)

    res = dict()

    res[1] = metric(multioutput=multioutput)(
        y_true=y_true,
        y_pred=y_pred,
        y_pred_benchmark=y_pred,
        y_train=y_true,
    )

    res[2] = metric(multioutput=multioutput)(
        y_true=y_true,
        y_pred=y_pred,
        y_pred_benchmark=y_pred,
        y_train=y_true,
    )

    if multioutput == "uniform_average":
        assert all(isinstance(x, float) for x in res.values())
    elif multioutput == "raw_values":
        assert all(isinstance(x, np.ndarray) for x in res.values())
        assert all(x.ndim == 1 for x in res.values())
        assert all(len(x) == len(y_true.columns) for x in res.values())

    # assert results from all options are equal
    assert np.allclose(res[1], res[2])
Exemplo n.º 26
0
 def test_X_invalid_type_raises_error(self, estimator_instance, n_columns,
                                      X):
     """Test that invalid X input types raise error."""
     y_train = _make_series(n_columns=n_columns)
     try:
         with pytest.raises(TypeError, match=r"type"):
             estimator_instance.fit(y_train, X, fh=FH0)
     except NotImplementedError as e:
         msg = str(e).lower()
         assert "exogenous" in msg
Exemplo n.º 27
0
 def test_update_predict_single(self, estimator_instance, n_columns,
                                fh_int_oos, update_params):
     """Check correct time index of update-predict."""
     y = _make_series(n_columns=n_columns)
     y_train, y_test = temporal_train_test_split(y)
     estimator_instance.fit(y_train, fh=fh_int_oos)
     y_pred = estimator_instance.update_predict_single(
         y_test, update_params=update_params)
     _assert_correct_pred_time_index(y_pred.index, y_test.index[-1],
                                     fh_int_oos)
Exemplo n.º 28
0
class ForecasterFitPredictUnivariateNoXLateFh(ForecasterTestScenario):
    """Fit/predict only, univariate y, no X, no fh in predict."""

    _tags = {"univariate_y": True, "fh_passed_in_fit": False}

    args = {
        "fit": {"y": _make_series(n_timepoints=20, random_state=RAND_SEED)},
        "predict": {"fh": 1},
    }
    default_method_sequence = ["fit", "predict"]
Exemplo n.º 29
0
def test_sliding_window_splitter_initial_window_smaller_than_window_raise_error():
    y = _make_series()
    cv = SlidingWindowSplitter(
        fh=1,
        window_length=10,
        initial_window=5,
    )
    message = "`initial_window` must greater than `window_length`"
    with pytest.raises(ValueError, match=message):
        next(cv.split(y))
def test_fh_in_predict_opt(Forecaster):
    """Check if fh is optional in predict."""
    f = Forecaster.create_test_instance()
    n_columns_list = _get_n_columns(f.get_tag("scitype:y"))

    for n_columns in n_columns_list:
        f = Forecaster.create_test_instance()
        y_train = _make_series(n_columns=n_columns)
        f.fit(y_train)
        f.predict(FH0)
        np.testing.assert_array_equal(f.fh, FH0)