示例#1
0
def test_pipeline():
    """Test results of TransformedTargetForecaster."""
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)

    forecaster = TransformedTargetForecaster([
        ("t1", ExponentTransformer()),
        ("t2", TabularToSeriesAdaptor(MinMaxScaler())),
        ("forecaster", NaiveForecaster()),
    ])
    fh = np.arange(len(y_test)) + 1
    forecaster.fit(y_train, fh=fh)
    actual = forecaster.predict()

    def compute_expected_y_pred(y_train, fh):
        # fitting
        yt = y_train.copy()
        t1 = ExponentTransformer()
        yt = t1.fit_transform(yt)
        t2 = TabularToSeriesAdaptor(MinMaxScaler())
        yt = t2.fit_transform(yt)
        forecaster = NaiveForecaster()
        forecaster.fit(yt, fh=fh)

        # predicting
        y_pred = forecaster.predict()
        y_pred = t2.inverse_transform(y_pred)
        y_pred = t1.inverse_transform(y_pred)
        return y_pred

    expected = compute_expected_y_pred(y_train, fh)
    np.testing.assert_array_equal(actual, expected)
示例#2
0
def test_predict_time_index_with_X(Forecaster, index_type, fh_type,
                                   is_relative, steps):
    """Check that predicted time index matches forecasting horizon."""
    f = _construct_instance(Forecaster)
    n_columns_list = _get_n_columns(f.get_tag("scitype:y"))

    z, X = make_forecasting_problem(index_type=index_type, make_X=True)

    # Some estimators may not support all time index types and fh types, hence we
    # need to catch NotImplementedErrors.
    for n_columns in n_columns_list:
        f = _construct_instance(Forecaster)
        y = _make_series(n_columns=n_columns, index_type=index_type)
        cutoff = y.index[len(y) // 2]
        fh = _make_fh(cutoff, steps, fh_type, is_relative)

        y_train, y_test, X_train, X_test = temporal_train_test_split(y,
                                                                     X,
                                                                     fh=fh)

        try:
            f.fit(y_train, X_train, fh=fh)
            y_pred = f.predict(X=X_test)
            _assert_correct_pred_time_index(y_pred.index, y_train.index[-1],
                                            fh)
        except NotImplementedError:
            pass
 def test_update_predict_predicted_index(
     self,
     estimator_instance,
     n_columns,
     fh_int_oos,
     window_length,
     step_length,
     update_params,
 ):
     """Check predicted index in update_predict."""
     y = _make_series(n_columns=n_columns,
                      all_positive=True,
                      index_type="datetime")
     y_train, y_test = temporal_train_test_split(y)
     cv = SlidingWindowSplitter(
         fh_int_oos,
         window_length=window_length,
         step_length=step_length,
         start_with_window=False,
     )
     estimator_instance.fit(y_train, fh=fh_int_oos)
     y_pred = estimator_instance.update_predict(y_test,
                                                cv=cv,
                                                update_params=update_params)
     assert isinstance(y_pred, (pd.Series, pd.DataFrame))
     expected = _get_expected_index_for_update_predict(
         y_test, fh_int_oos, step_length)
     actual = y_pred.index
     np.testing.assert_array_equal(actual, expected)
示例#4
0
def test_pipeline():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)

    forecaster = TransformedTargetForecaster([
        ("t1", Deseasonalizer(sp=12, model="multiplicative")),
        ("t2", Detrender(PolynomialTrendForecaster(degree=1))),
        ("forecaster", NaiveForecaster()),
    ])
    fh = np.arange(len(y_test)) + 1
    forecaster.fit(y_train, fh)
    actual = forecaster.predict()

    def compute_expected_y_pred(y_train, fh):
        # fitting
        yt = y_train.copy()
        t1 = Deseasonalizer(sp=12, model="multiplicative")
        yt = t1.fit_transform(yt)
        t2 = Detrender(PolynomialTrendForecaster(degree=1))
        yt = t2.fit_transform(yt)
        forecaster = NaiveForecaster()
        forecaster.fit(yt, fh)

        # predicting
        y_pred = forecaster.predict()
        y_pred = t2.inverse_transform(y_pred)
        y_pred = t1.inverse_transform(y_pred)
        return y_pred

    expected = compute_expected_y_pred(y_train, fh)
    np.testing.assert_array_equal(actual, expected)
示例#5
0
def test_weights_for_airline_averaging():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)

    forecaster = OnlineEnsembleForecaster([
        ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)),
        (
            "holt",
            ExponentialSmoothing(trend="add",
                                 damped=False,
                                 seasonal="multiplicative",
                                 sp=12),
        ),
        (
            "damped",
            ExponentialSmoothing(trend="add",
                                 damped=True,
                                 seasonal="multiplicative",
                                 sp=12),
        ),
    ])

    forecaster.fit(y_train)

    expected = np.array([1 / 3, 1 / 3, 1 / 3])
    np.testing.assert_allclose(forecaster.weights, expected, rtol=1e-8)
    def test__y_and_cutoff(self, estimator_instance, n_columns):
        """Check cutoff and _y."""
        # check _y and cutoff is None after construction
        f = estimator_instance

        y = _make_series(n_columns=n_columns)
        y_train, y_test = temporal_train_test_split(y, train_size=0.75)

        # check that _y and cutoff are empty when estimator is constructed
        assert f._y is None
        assert f.cutoff is None

        # check that _y and cutoff is updated during fit
        f.fit(y_train, fh=FH0)
        # assert isinstance(f._y, pd.Series)
        # action:uncomments the line above
        # why: fails for multivariates cause they are DataFrames
        # solution: look for a general solution for Series and DataFrames
        assert len(f._y) > 0
        assert f.cutoff == y_train.index[-1]

        # check data pointers
        np.testing.assert_array_equal(f._y.index, y_train.index)

        # check that _y and cutoff is updated during update
        f.update(y_test, update_params=False)
        np.testing.assert_array_equal(f._y.index,
                                      np.append(y_train.index, y_test.index))
        assert f.cutoff == y_test.index[-1]
    def test_predict_time_index_with_X(self, estimator_instance, n_columns,
                                       index_fh_comb, fh_int_oos):
        """Check that predicted time index matches forecasting horizon."""
        index_type, fh_type, is_relative = index_fh_comb
        if fh_type == "timedelta":
            return None
            # todo: ensure check_estimator works with pytest.skip like below
            # pytest.skip(
            #    "ForecastingHorizon with timedelta values "
            #     "is currently experimental and not supported everywhere"
            # )
        z, X = make_forecasting_problem(index_type=index_type, make_X=True)

        # Some estimators may not support all time index types and fh types, hence we
        # need to catch NotImplementedErrors.
        y = _make_series(n_columns=n_columns, index_type=index_type)
        cutoff = y.index[len(y) // 2]
        fh = _make_fh(cutoff, fh_int_oos, fh_type, is_relative)

        y_train, _, X_train, X_test = temporal_train_test_split(y, X, fh=fh)

        try:
            estimator_instance.fit(y_train, X_train, fh=fh)
            y_pred = estimator_instance.predict(X=X_test)
            _assert_correct_pred_time_index(y_pred.index, y_train.index[-1],
                                            fh)
        except NotImplementedError:
            pass
示例#8
0
def test_dummy_regressor_mean_prediction_endogenous_only(
        fh, window_length, strategy, scitype):
    # The DummyRegressor ignores the input feature data X, hence we can use it for
    # testing reduction from forecasting to both tabular and time series regression.
    # The DummyRegressor also supports the 'multioutput' strategy.
    y = make_forecasting_problem()
    fh = check_fh(fh)
    y_train, y_test = temporal_train_test_split(y, fh=fh)

    regressor = DummyRegressor(strategy="mean")
    forecaster = make_reduction(regressor,
                                scitype=scitype,
                                window_length=window_length,
                                strategy=strategy)
    forecaster.fit(y_train, fh=fh)
    actual = forecaster.predict()

    if strategy == "recursive":
        # For the recursive strategy, we always use the first-step ahead as the
        # target vector in the regression problem during training, regardless of the
        # actual forecasting horizon.
        effective_window_length = window_length
    else:
        # For the other strategies, we split the data taking into account the steps
        # ahead we want to predict.
        effective_window_length = window_length + max(fh) - 1

    # In the sliding-window transformation, the first values of the target series
    # make up the first window and are not used in the transformed target vector. So
    # the expected result should be the mean of the remaining values.
    expected = np.mean(y_train[effective_window_length:])
    np.testing.assert_array_almost_equal(actual, expected)
def test_oh_setting(Forecaster):
    """Check cuttoff and _y."""
    # check _y and cutoff is None after construction
    f = _construct_instance(Forecaster)
    n_columns_list = _get_n_columns(f.get_tag("scitype:y"))

    for n_columns in n_columns_list:
        f = _construct_instance(Forecaster)
        y = _make_series(n_columns=n_columns)
        y_train, y_test = temporal_train_test_split(y, train_size=0.75)

        assert f._y is None
        assert f.cutoff is None

        # check that _y and cutoff is updated during fit
        f.fit(y_train, fh=FH0)
        # assert isinstance(f._y, pd.Series)
        # action:uncomments the line above
        # why: fails for multivariates cause they are DataFrames
        # solution: look for a general solution for Series and DataFrames
        assert len(f._y) > 0
        assert f.cutoff == y_train.index[-1]

        # check data pointers
        np.testing.assert_array_equal(f._y.index, y_train.index)

        # check that _y and cutoff is updated during update
        f.update(y_test, update_params=False)
        np.testing.assert_array_equal(f._y.index,
                                      np.append(y_train.index, y_test.index))
        assert f.cutoff == y_test.index[-1]
示例#10
0
def test_y_test_index_input():
    y = make_forecasting_problem()
    y_train, y_test = temporal_train_test_split(y, train_size=0.75)

    # check if y_test.index can be passed as absolute horizon
    fh = FH(y_test.index, relative=False)
    cutoff = y_train.index[-1]
    np.testing.assert_array_equal(fh.relative(cutoff),
                                  np.arange(len(y_test)) + 1)
示例#11
0
def test_forecaster_with_initial_level():
    y = np.log1p(load_airline())
    y_train, y_test = temporal_train_test_split(y)
    fh = np.arange(len(y_test)) + 1

    f = ThetaForecaster(initial_level=0.1, sp=12)
    f.fit(y_train)
    y_pred = f.predict(fh=fh)

    np.testing.assert_allclose(y_pred, y_test, rtol=0.05)
示例#12
0
 def test_update_predict_single(self, estimator_instance, n_columns,
                                fh_int_oos, update_params):
     """Check correct time index of update-predict."""
     y = _make_series(n_columns=n_columns)
     y_train, y_test = temporal_train_test_split(y)
     estimator_instance.fit(y_train, fh=fh_int_oos)
     y_pred = estimator_instance.update_predict_single(
         y_test, update_params=update_params)
     _assert_correct_pred_time_index(y_pred.index, y_test.index[-1],
                                     fh_int_oos)
示例#13
0
def test_predictive_performance_on_airline():
    y = np.log1p(load_airline())
    y_train, y_test = temporal_train_test_split(y)
    fh = np.arange(len(y_test)) + 1

    f = ThetaForecaster(sp=12)
    f.fit(y_train)
    y_pred = f.predict(fh=fh)

    # Performance on this particular dataset should be reasonably good.
    np.testing.assert_allclose(y_pred, y_test, rtol=0.05)
示例#14
0
def test_update_predict_single(Forecaster, fh, update_params):
    """Check correct time index of update-predict."""
    f = _construct_instance(Forecaster)
    n_columns_list = _get_n_columns(f.get_tag("scitype:y"))

    for n_columns in n_columns_list:
        f = _construct_instance(Forecaster)
        y = _make_series(n_columns=n_columns)
        y_train, y_test = temporal_train_test_split(y)
        f.fit(y_train, fh=fh)
        y_pred = f.update_predict_single(y_test, update_params=update_params)
        _assert_correct_pred_time_index(y_pred.index, y_test.index[-1], fh)
示例#15
0
def test_fh(index_type, fh_type, is_relative, steps):
    # generate data
    y = make_forecasting_problem(index_type=index_type)
    assert isinstance(y.index, INDEX_TYPE_LOOKUP.get(index_type))

    # split data
    y_train, y_test = temporal_train_test_split(y, test_size=10)

    # choose cutoff point
    cutoff = y_train.index[-1]

    # generate fh
    fh = _make_fh(cutoff, steps, fh_type, is_relative)
    assert isinstance(fh.to_pandas(), INDEX_TYPE_LOOKUP.get(fh_type))

    # get expected outputs
    if isinstance(steps, int):
        steps = np.array([steps])
    fh_relative = pd.Int64Index(steps).sort_values()
    fh_absolute = y.index[np.where(y.index == cutoff)[0] + steps].sort_values()
    fh_indexer = fh_relative - 1
    fh_oos = fh.to_pandas()[fh_relative > 0]
    is_oos = len(fh_oos) == len(fh)
    fh_ins = fh.to_pandas()[fh_relative <= 0]
    is_ins = len(fh_ins) == len(fh)

    # check outputs
    # check relative representation
    _assert_index_equal(fh_absolute, fh.to_absolute(cutoff).to_pandas())
    assert not fh.to_absolute(cutoff).is_relative

    # check relative representation
    _assert_index_equal(fh_relative, fh.to_relative(cutoff).to_pandas())
    assert fh.to_relative(cutoff).is_relative

    # check index-like representation
    _assert_index_equal(fh_indexer, fh.to_indexer(cutoff))

    # check in-sample representation
    # we only compare the numpy array here because the expected solution is
    # formatted in a slightly different way than the generated solution
    np.testing.assert_array_equal(
        fh_ins.to_numpy(), fh.to_in_sample(cutoff).to_pandas()
    )
    assert fh.to_in_sample(cutoff).is_relative == is_relative
    assert fh.is_all_in_sample(cutoff) == is_ins

    # check out-of-sample representation
    np.testing.assert_array_equal(
        fh_oos.to_numpy(), fh.to_out_of_sample(cutoff).to_pandas()
    )
    assert fh.to_out_of_sample(cutoff).is_relative == is_relative
    assert fh.is_all_out_of_sample(cutoff) == is_oos
示例#16
0
def test_update_predict_predicted_indices(Forecaster, fh, window_length,
                                          step_length, y):
    y_train, y_test = temporal_train_test_split(y)
    cv = SlidingWindowSplitter(fh,
                               window_length=window_length,
                               step_length=step_length)
    f = _construct_instance(Forecaster)
    f.fit(y_train, fh=fh)
    try:
        y_pred = f.update_predict(y_test, cv=cv)
        check_update_predict_y_pred(y_pred, y_test, fh, step_length)
    except NotImplementedError:
        pass
示例#17
0
def test_pred_errors_against_y_test(fh):
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)
    f = ThetaForecaster()
    f.fit(y_train, fh)
    y_pred = f.predict(return_pred_int=False)
    errors = f._compute_pred_errors(alpha=0.1)
    if isinstance(errors, pd.Series):
        errors = [errors]  # make iterable
    y_test = y_test.iloc[check_fh(fh) - 1]
    for error in errors:
        assert np.all(y_test > y_pred - error)
        assert np.all(y_test < y_pred + error)
示例#18
0
def test_reductions_airline_data(forecaster, expected):
    """
    test reduction forecasters by making prediction on airline dataset
    using linear estimators. predictions compared with values calculated by Lovkush
    Agarwal on their local machine in Mar 2021
    """
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    actual = forecaster.fit(y_train, fh=fh).predict(fh)

    np.testing.assert_almost_equal(actual, expected)
示例#19
0
def calculate_smape(df_, regressor, forecast_horizon, window_length):
    df = df_.copy()
    df.fillna(method = 'ffill', inplace = True)
    y = df.iloc[:,-1].reset_index(drop=True)
    y_train, y_test = temporal_train_test_split(y, test_size = 12)
    fh = np.arange(y_test.shape[0]) + 1
    regressor = select_regressor(regressor)
    forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=window_length,
                                             strategy='recursive')
    forecaster.fit(y_train, fh=fh)
    y_pred = forecaster.predict(fh)
    
    return smape_loss(y_pred, y_test)
示例#20
0
def test_factory_method_direct():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    regressor = LinearRegression()
    f1 = ReducedForecaster(regressor, scitype="regressor", strategy="direct")
    f2 = DirectRegressionForecaster(regressor)

    actual = f1.fit(y_train, fh=fh).predict(fh)
    expected = f2.fit(y_train, fh=fh).predict(fh)

    np.testing.assert_array_equal(actual, expected)
示例#21
0
def test_split_by_fh(index_type, fh_type, is_relative, values):
    """Test temporal_train_test_split."""
    if fh_type == "timedelta":
        return None
        # todo: ensure check_estimator works with pytest.skip like below
        # pytest.skip(
        #    "ForecastingHorizon with timedelta values "
        #     "is currently experimental and not supported everywhere"
        # )
    y = _make_series(20, index_type=index_type)
    cutoff = y.index[10]
    fh = _make_fh(cutoff, values, fh_type, is_relative)
    split = temporal_train_test_split(y, fh=fh)
    _check_train_test_split_y(fh, split)
示例#22
0
def test_update_predict_predicted_indices(Forecaster, fh, window_length,
                                          step_length):
    y = make_forecasting_problem(all_positive=True, index_type="datetime")
    y_train, y_test = temporal_train_test_split(y)
    cv = SlidingWindowSplitter(fh,
                               window_length=window_length,
                               step_length=step_length)
    f = _construct_instance(Forecaster)
    f.fit(y_train, fh=fh)
    try:
        y_pred = f.update_predict(y_test, cv=cv)
        _check_update_predict_y_pred(y_pred, y_test, fh, step_length)
    except NotImplementedError:
        pass
示例#23
0
    def test_score(self, estimator_instance, n_columns, fh_int_oos):
        """Check score method."""
        y = _make_series(n_columns=n_columns)
        y_train, y_test = temporal_train_test_split(y)
        estimator_instance.fit(y_train, fh=fh_int_oos)
        y_pred = estimator_instance.predict()

        fh_idx = check_fh(fh_int_oos).to_indexer()  # get zero based index
        expected = mean_absolute_percentage_error(y_pred,
                                                  y_test.iloc[fh_idx],
                                                  symmetric=True)

        # compare expected score with actual score
        actual = estimator_instance.score(y_test.iloc[fh_idx], fh=fh_int_oos)
        assert actual == expected
示例#24
0
def graph_model_exp_smoothing():
	if from_excel:
		y_train, y_test, y_pred = get_data_from_excel("ExpSmoothing.PM10")
		write_model_graph(y_train, y_test, y_pred, "Exponential Smoothing")
	else:
		ts: pd.DataFrame = get_time_series(get_engine(), "zurich", "Zch_Stampfenbachstrasse")[-1100:-900]
		ts.drop(columns=["date", "Zch_Stampfenbachstrasse.PM2.5"], inplace=True)
		ts_imputed = impute_simple_imputer(ts, False)
		ts_smooth = moving_average(ts_imputed, False)
		y, x = expsmoothing.transform_data(ts_smooth, False)
		
		y_train, y_test, x_train, x_test = temporal_train_test_split(y, x, test_size=fh)
		model = expsmoothing.train_model_expSmooting(y_train, x_train, False)
		y_pred = model.predict(X=x_test, fh=np.linspace(1, fh, fh))
		
		write_model_graph(y_train, y_test, y_pred, "Exponential Smoothing")
示例#25
0
def test_multioutput_direct_equivalence_tabular_linear_regression(fh):
    # multioutput and direct strategies with linear regression
    # regressor should produce same predictions
    y, X = make_forecasting_problem(make_X=True)
    y_train, y_test, X_train, X_test = temporal_train_test_split(y, X, fh=fh)

    estimator = LinearRegression()
    direct = make_reduction(estimator, strategy="direct")
    multioutput = make_reduction(estimator, strategy="multioutput")

    y_pred_direct = direct.fit(y_train, X_train, fh=fh).predict(fh, X_test)
    y_pred_multioutput = multioutput.fit(y_train, X_train,
                                         fh=fh).predict(fh, X_test)

    np.testing.assert_array_almost_equal(y_pred_direct.to_numpy(),
                                         y_pred_multioutput.to_numpy())
示例#26
0
def test_factory_method_ts_direct():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    ts_regressor = Pipeline([("tabularize", Tabularizer()),
                             ("model", LinearRegression())])
    f1 = ReducedForecaster(ts_regressor,
                           scitype="ts_regressor",
                           strategy="direct")
    f2 = DirectTimeSeriesRegressionForecaster(ts_regressor)

    actual = f1.fit(y_train, fh=fh).predict(fh)
    expected = f2.fit(y_train, fh=fh).predict(fh)

    np.testing.assert_array_equal(actual, expected)
示例#27
0
def _check_update_predict_predicted_index(Forecaster, fh, window_length,
                                          step_length, update_params):
    y = make_forecasting_problem(all_positive=True, index_type="datetime")
    y_train, y_test = temporal_train_test_split(y)
    cv = SlidingWindowSplitter(
        fh,
        window_length=window_length,
        step_length=step_length,
        start_with_window=False,
    )
    f = _construct_instance(Forecaster)
    f.fit(y_train, fh=fh)
    y_pred = f.update_predict(y_test, cv=cv, update_params=update_params)
    assert isinstance(y_pred, (pd.Series, pd.DataFrame))
    expected = _get_expected_index_for_update_predict(y_test, fh, step_length)
    actual = y_pred.index
    np.testing.assert_array_equal(actual, expected)
示例#28
0
def test_VAR_against_statsmodels():
    """Compares Sktime's and Statsmodel's VAR."""
    train, test = temporal_train_test_split(df)
    sktime_model = VAR()
    fh = ForecastingHorizon([1, 3, 4, 5, 7, 9])
    sktime_model.fit(train)
    y_pred = sktime_model.predict(fh=fh)

    stats = _VAR(train)
    stats_fit = stats.fit()
    fh_int = fh.to_relative(train.index[-1])
    lagged = stats_fit.k_ar
    y_pred_stats = stats_fit.forecast(train.values[-lagged:], steps=fh_int[-1])
    new_arr = []
    for i in fh_int:
        new_arr.append(y_pred_stats[i - 1])
    assert_allclose(y_pred, new_arr)
示例#29
0
    def prepare_model(self,
                      timeseries: pd.DataFrame,
                      output: bool = True) -> AutoARIMA:
        if output:
            logger.info("Running script...")

        y, x = transform_data(timeseries, output)
        y_train, y_test, x_train, x_test = temporal_train_test_split(
            y, x, test_size=0.1)
        self.model = train_model_autoarima(y_train, x_train, output)
        y_test = pd.Series(data=np.delete(y_test, 0))
        x_test = pd.DataFrame(data=x_test[:-1])
        score = eval_model_mape(self.model, y_test, x_test, output)
        if output:
            logger.info(f"Score of model: {score:.04f}")
            logger.info(f"Completed script in {timer_script}")
        return self.model
示例#30
0
def test_skip_inverse_transform():
    """Test transformers with skip-inverse-transform tag in pipeline."""
    y = load_airline()
    # add nan and outlier
    y.iloc[3] = np.nan
    y.iloc[4] = y.iloc[4] * 20

    y_train, y_test = temporal_train_test_split(y)
    forecaster = TransformedTargetForecaster([
        ("t1", HampelFilter(window_length=12)),
        ("t2", Imputer(method="mean")),
        ("forecaster", NaiveForecaster()),
    ])
    fh = np.arange(len(y_test)) + 1
    forecaster.fit(y_train, fh=fh)
    y_pred = forecaster.predict()
    assert isinstance(y_pred, pd.Series)