Exemplo n.º 1
0
def _make_fh(cutoff, steps, fh_type, is_relative):
    """Construct forecasting horizons for testing."""
    from sktime.forecasting.tests._config import INDEX_TYPE_LOOKUP

    fh_class = INDEX_TYPE_LOOKUP[fh_type]

    if isinstance(steps, (int, np.integer)):
        steps = np.array([steps], dtype=int)

    elif isinstance(steps, pd.Timedelta):
        steps = [steps]

    if is_relative:
        return ForecastingHorizon(fh_class(steps), is_relative=is_relative)

    else:
        kwargs = {}

        if fh_type == "datetime":
            steps *= cutoff.freq

        if fh_type == "period":
            kwargs = {"freq": cutoff.freq}

        values = cutoff + steps
        return ForecastingHorizon(fh_class(values, **kwargs), is_relative)
Exemplo n.º 2
0
    def _inverse_transform(self, X, y=None):
        """Logic used by `inverse_transform` to reverse transformation on `X`.

        Parameters
        ----------
        X : pd.Series or pd.DataFrame
            Data to be inverse transformed
        y : ignored argument for interface compatibility
            Additional data, e.g., labels for transformation

        Returns
        -------
        Xt : pd.Series or pd.DataFrame, same type as X
            inverse transformed version of X
        """
        Z = X
        is_df = isinstance(Z, pd.DataFrame)
        is_contained_by_fit_z, pad_z_inv = self._check_inverse_transform_index(
            Z)

        # If `Z` is entirely contained in fitted `_Z` we can just return
        # the values from the timeseires stored in `fit` as a shortcut
        if is_contained_by_fit_z:
            Z_inv = self._Z.loc[Z.index, :] if is_df else self._Z.loc[Z.index]

        else:
            Z_inv = Z.copy()
            for i, lag_info in enumerate(
                    zip(self._lags[::-1], self._prior_cum_lags[::-1])):
                lag, prior_cum_lag = lag_info
                _lags = self._lags[::-1][i + 1:]
                _transformed = _diff_transform(self._Z, _lags)

                # Determine index values for initial values needed to reverse
                # the differencing for the specified lag
                if pad_z_inv:
                    cutoff = Z_inv.index[0]
                else:
                    cutoff = Z_inv.index[prior_cum_lag + lag]
                fh = ForecastingHorizon(np.arange(-1, -(lag + 1), -1))
                index = fh.to_absolute(cutoff).to_pandas()

                if is_df:
                    prior_n_timepoint_values = _transformed.loc[index, :]
                else:
                    prior_n_timepoint_values = _transformed.loc[index]
                if pad_z_inv:
                    Z_inv = pd.concat([prior_n_timepoint_values, Z_inv])
                else:
                    Z_inv.update(prior_n_timepoint_values)

                Z_inv = _inverse_diff(Z_inv, lag)

        if pad_z_inv:
            Z_inv = Z_inv.loc[Z.index, :] if is_df else Z_inv.loc[Z.index]

        Xt = Z_inv

        return Xt
Exemplo n.º 3
0
def test_to_absolute_freq(freqstr):
    """Test conversion when anchorings included in frequency."""
    train = pd.Series(1,
                      index=pd.date_range("2021-10-06",
                                          freq=freqstr,
                                          periods=3))
    fh = ForecastingHorizon([1, 2, 3])
    abs_fh = fh.to_absolute(train.index[-1])
    assert abs_fh._values.freqstr == freqstr
Exemplo n.º 4
0
def test_to_absolute_int(idx: int, freq: str):
    """Test converting between relative and absolute."""
    # Converts from relative to absolute and back to relative
    train = pd.Series(1,
                      index=pd.date_range("2021-10-06", freq=freq, periods=5))
    fh = ForecastingHorizon([1, 2, 3])
    absolute_int = fh.to_absolute_int(start=train.index[0],
                                      cutoff=train.index[idx])
    assert_array_equal(fh + idx, absolute_int)
Exemplo n.º 5
0
    def _inverse_transform(self, Z, X=None):
        """Logic used by `inverse_transform` to reverse transformation on  `Z`.

        Parameters
        ----------
        Z : pd.Series or pd.DataFrame
            A time series to reverse the transformation on.

        Returns
        -------
        Z_inv : pd.Series or pd.DataFrame
            The reconstructed timeseries after the transformation has been reversed.
        """
        is_df = isinstance(Z, pd.DataFrame)
        is_contained_by_fit_z, pad_z_inv = self._check_inverse_transform_index(
            Z)

        # If `Z` is entirely contained in fitted `_Z` we can just return
        # the values from the timeseires stored in `fit` as a shortcut
        if is_contained_by_fit_z:
            Z_inv = self._Z.loc[Z.index, :] if is_df else self._Z.loc[Z.index]

        else:
            Z_inv = Z.copy()
            for i, lag_info in enumerate(
                    zip(self._lags[::-1], self._prior_cum_lags[::-1])):
                lag, prior_cum_lag = lag_info
                _lags = self._lags[::-1][i + 1:]
                _transformed = _diff_transform(self._Z, _lags)

                # Determine index values for initial values needed to reverse
                # the differencing for the specified lag
                if pad_z_inv:
                    cutoff = Z_inv.index[0]
                else:
                    cutoff = Z_inv.index[prior_cum_lag + lag]
                fh = ForecastingHorizon(np.arange(-1, -(lag + 1), -1))
                index = fh.to_absolute(cutoff).to_pandas()

                if is_df:
                    prior_n_timepoint_values = _transformed.loc[index, :]
                else:
                    prior_n_timepoint_values = _transformed.loc[index]
                if pad_z_inv:
                    Z_inv = pd.concat([prior_n_timepoint_values, Z_inv])
                else:
                    Z_inv.update(prior_n_timepoint_values)

                Z_inv = _inverse_diff(Z_inv, lag)

        if pad_z_inv:
            Z_inv = Z_inv.loc[Z.index, :] if is_df else Z_inv.loc[Z.index]

        return Z_inv
Exemplo n.º 6
0
def test_to_relative(freq: str):
    """Test conversion to relative.

    Fixes bug in
    https://github.com/alan-turing-institute/sktime/issues/1935#issue-1114814142
    """
    freq = "2H"
    t = pd.date_range(start="2021-01-01", freq=freq, periods=5)
    fh_abs = ForecastingHorizon(t, is_relative=False)
    fh_rel = fh_abs.to_relative(cutoff=t.min())
    assert_array_equal(fh_rel, np.arange(5))
Exemplo n.º 7
0
def test_relative_to_relative(freqstr):
    """Test converting between relative and absolute."""
    # Converts from relative to absolute and back to relative
    train = pd.Series(1,
                      index=pd.date_range("2021-10-06",
                                          freq=freqstr,
                                          periods=3))
    fh = ForecastingHorizon([1, 2, 3])
    abs_fh = fh.to_absolute(train.index[-1])

    converted_rel_fh = abs_fh.to_relative(train.index[-1])
    assert_array_equal(fh, converted_rel_fh)
Exemplo n.º 8
0
def test_estimator_fh(freqstr):
    """Test model fitting with anchored frequency."""
    train = pd.Series(
        np.random.uniform(low=2000, high=7000, size=(104, )),
        index=pd.date_range("2019-01-02", freq=freqstr, periods=104),
    )
    forecaster = AutoETS(auto=True, sp=52, n_jobs=-1, restrict=True)
    forecaster.fit(train)
    pred = forecaster.predict(np.arange(1, 27))
    expected_fh = ForecastingHorizon(np.arange(1, 27)).to_absolute(
        train.index[-1])
    assert_array_equal(pred.index.to_numpy(), expected_fh.to_numpy())
Exemplo n.º 9
0
def test_absolute_to_absolute_with_integer_horizon(freqstr):
    """Test converting between absolute and relative with integer horizon."""
    # Converts from absolute to relative and back to absolute
    train = pd.Series(1,
                      index=pd.date_range("2021-10-06",
                                          freq=freqstr,
                                          periods=3))
    fh = ForecastingHorizon([1, 2, 3])
    abs_fh = fh.to_absolute(train.index[-1])

    converted_abs_fh = abs_fh.to_relative(train.index[-1]).to_absolute(
        train.index[-1])
    assert_array_equal(abs_fh, converted_abs_fh)
    assert converted_abs_fh._values.freqstr == freqstr
Exemplo n.º 10
0
def test_relative_to_relative_with_timedelta_horizon(freqstr):
    """Test converting between relative and absolute with timedelta horizons."""
    # Converts from relative to absolute and back to relative
    train = pd.Series(1,
                      index=pd.date_range("2021-10-06",
                                          freq=freqstr,
                                          periods=3))
    count, unit = _get_intervals_count_and_unit(freq=freqstr)
    fh = ForecastingHorizon(
        pd.timedelta_range(pd.to_timedelta(count, unit=unit),
                           freq=freqstr,
                           periods=3))
    abs_fh = fh.to_absolute(train.index[-1])

    converted_rel_fh = abs_fh.to_relative(train.index[-1])
    assert_array_equal(converted_rel_fh, np.arange(1, 4))
Exemplo n.º 11
0
def _get_end(y: ACCEPTED_Y_TYPES, fh: ForecastingHorizon) -> int:
    """Compute the end of the last training window for a forecasting horizon.

    Parameters
    ----------
    y : pd.Series, pd.DataFrame, np.ndarray, or pd.Index
        coerced and checked version of input y
    fh : int, timedelta, list or np.array of ints or timedeltas

    Returns
    -------
    end : int
        end of the training window
    """
    # `fh` is assumed to be ordered and checked by `_check_fh` and `window_length` by
    # `check_window_length`.
    n_timepoints = y.shape[0]

    # For purely in-sample forecasting horizons, the last split point is the end of the
    # training data.
    if fh.is_all_in_sample():
        end = n_timepoints + 1

    # Otherwise, the last point must ensure that the last horizon is within the data.
    else:
        fh_max = fh[-1]
        end = n_timepoints - fh_max + 1

    return end
Exemplo n.º 12
0
def _get_end(y_index: pd.Index, fh: ForecastingHorizon) -> int:
    """Compute the end of the last training window for a forecasting horizon.

    For a time series index `y_index`, `y_index[end]` will give
    the index of the training window.
    Correspondingly, for a time series `y` with index `y_index`,
    `y.iloc[end]` or `y.loc[y_index[end]]`
    will provide the last index of the training window.

    Parameters
    ----------
    y_index : pd.Index
        Index of time series
    fh : int, timedelta, list or np.ndarray of ints or timedeltas

    Returns
    -------
    end : int
        0-indexed integer end of the training window
    """
    # `fh` is assumed to be ordered and checked by `_check_fh` and `window_length` by
    # `check_window_length`.
    n_timepoints = y_index.shape[0]
    assert isinstance(y_index, pd.Index)

    # For purely in-sample forecasting horizons, the last split point is the end of the
    # training data.
    # Otherwise, the last point must ensure that the last horizon is within the data.
    null = 0 if array_is_int(fh) else pd.Timedelta(0)
    fh_offset = null if fh.is_all_in_sample() else fh[-1]
    if array_is_int(fh):
        return n_timepoints - fh_offset - 1
    else:
        return y_index.get_loc(y_index[-1] - fh_offset)
Exemplo n.º 13
0
def _impute_with_forecaster(forecaster, Z):
    """Use a given forecaster for imputation by in-sample predictions.

    Parameters
    ----------
    forecaster: Forecaster
        Forecaster to use for imputation
    Z : pd.Series or pd.DataFrame
        Series to impute.

    Returns
    -------
    zt : pd.Series or pd.DataFrame
        Series with imputed values.
    """
    if isinstance(Z, pd.Series):
        series = [Z]
    elif isinstance(Z, pd.DataFrame):
        series = [Z[column] for column in Z]

    for z in series:
        if _has_missing_values(z):
            # define fh based on index of missing values
            na_index = z.index[z.isna()]
            fh = ForecastingHorizon(values=na_index, is_relative=False)

            # fill NaN before fitting with ffill and backfill (heuristic)
            forecaster.fit(
                y=z.fillna(method="ffill").fillna(method="backfill"), fh=fh)

            # replace missing values with predicted values
            z[na_index] = forecaster.predict()
    return Z
Exemplo n.º 14
0
    def _transform(self, X, y=None):
        """Transform X and return a transformed version.

        private _transform containing the core logic, called from transform

        Parameters
        ----------
        X : pd.Series or pd.DataFrame
            Data to be transformed
        y : ignored argument for interface compatibility
            Additional data, e.g., labels for transformation

        Returns
        -------
        theta_lines: pd.Series or pd.DataFrame
            Transformed series
            pd.Series, with single Theta-line, if self.theta is float
            pd.DataFrame of shape: [len(X), len(self.theta)], if self.theta is tuple
        """
        z = X
        theta = _check_theta(self.theta)

        forecaster = PolynomialTrendForecaster()
        forecaster.fit(z)
        fh = ForecastingHorizon(z.index, is_relative=False)
        trend = forecaster.predict(fh)

        theta_lines = np.zeros((z.shape[0], len(theta)))
        for i, theta in enumerate(theta):
            theta_lines[:, i] = _theta_transform(z, trend, theta)
        if isinstance(self.theta, (float, int)):
            return pd.Series(theta_lines.flatten(), index=z.index)
        else:
            return pd.DataFrame(theta_lines, columns=self.theta, index=z.index)
Exemplo n.º 15
0
    def transform(self, Z, X=None):
        """Transform data.

        Parameters
        ----------
        Z : pd.Series
            Series to transform.
        X : pd.DataFrame, optional (default=None)
            Exogenous data used in transformation.

        Returns
        -------
        theta_lines: ndarray or pd.DataFrame
            Transformed series: single Theta-line or a pd.DataFrame of
            shape: len(Z)*len(self.theta).
        """
        self.check_is_fitted()
        z = check_series(Z, enforce_univariate=True)
        theta = _check_theta(self.theta)

        forecaster = PolynomialTrendForecaster()
        forecaster.fit(z)
        fh = ForecastingHorizon(z.index, is_relative=False)
        trend = forecaster.predict(fh)

        theta_lines = np.zeros((z.shape[0], len(theta)))
        for i, theta in enumerate(theta):
            theta_lines[:, i] = _theta_transform(z, trend, theta)
        if isinstance(self.theta, (float, int)):
            return pd.Series(theta_lines.flatten(), index=z.index)
        else:
            return pd.DataFrame(theta_lines, columns=self.theta, index=z.index)
Exemplo n.º 16
0
def check_fh(fh, enforce_relative=False):
    """Validate forecasting horizon.

    Parameters
    ----------
    fh : int, list, np.array, pd.Index or ForecastingHorizon
        Forecasting horizon specifying the time points to predict.
    enforce_relative : bool, optional (default=False)
        If True, checks if fh is relative.

    Returns
    -------
    fh : ForecastingHorizon
        Validated forecasting horizon.
    """
    # Convert to ForecastingHorizon
    from sktime.forecasting.base import ForecastingHorizon

    if not isinstance(fh, ForecastingHorizon):
        fh = ForecastingHorizon(fh, is_relative=True)

    # Check if non-empty, note we check for empty values here, rather than
    # during construction of ForecastingHorizon because ForecastingHorizon
    # can be empty in some cases, but users should not create forecasting horizons
    # with no values
    if len(fh) == 0:
        raise ValueError(f"`fh` must not be empty, but found: {fh}")

    if enforce_relative and not fh.is_relative:
        raise ValueError("`fh` must be relative, but found absolute `fh`")

    return fh
Exemplo n.º 17
0
def test_VAR_against_statsmodels():
    """Compares Sktime's and Statsmodel's VAR."""
    train, test = temporal_train_test_split(df)
    sktime_model = VAR()
    fh = ForecastingHorizon([1, 3, 4, 5, 7, 9])
    sktime_model.fit(train)
    y_pred = sktime_model.predict(fh=fh)

    stats = _VAR(train)
    stats_fit = stats.fit()
    fh_int = fh.to_relative(train.index[-1])
    lagged = stats_fit.k_ar
    y_pred_stats = stats_fit.forecast(train.values[-lagged:], steps=fh_int[-1])
    new_arr = []
    for i in fh_int:
        new_arr.append(y_pred_stats[i - 1])
    assert_allclose(y_pred, new_arr)
Exemplo n.º 18
0
def test_window_splitter_in_sample_fh_greater_than_window_length(CV):
    y = np.arange(10)
    fh = ForecastingHorizon([-5, -3])
    window_length = 3
    cv = CV(fh, window_length)
    train_windows, test_windows, cutoffs, n_splits = _check_cv(cv, y)
    np.testing.assert_array_equal(test_windows[0], np.array([0, 2]))
    np.testing.assert_array_equal(train_windows[0], np.array([3, 4, 5]))
Exemplo n.º 19
0
 def _transform(self, y, X=None):
     # For the recursive strategy, the forecasting horizon for the sliding-window
     # transform is simply a one-step ahead horizon, regardless of the horizon
     # used during prediction.
     fh = ForecastingHorizon([1])
     return _sliding_window_transform(
         y, self.window_length_, fh, X, scitype=self._estimator_scitype
     )
Exemplo n.º 20
0
def test_window_splitter_in_sample_fh_smaller_than_window_length(CV):
    """Test WindowSplitter."""
    y = np.arange(10)
    fh = ForecastingHorizon([-2, 0])
    window_length = 3
    cv = CV(fh, window_length)
    train_windows, test_windows, cutoffs, n_splits = _check_cv(cv, y)
    np.testing.assert_array_equal(test_windows[0], np.array([0, 2]))
    np.testing.assert_array_equal(train_windows[0], np.array([0, 1, 2]))
Exemplo n.º 21
0
def test_strategy_mean_seasonal_additional_combinations(n, window_length, sp):
    """Check time series of n * window_length with a 1:n-1 train/test split,
    for different combinations of the period and seasonal periodicity.
    The time series contains perfectly cyclic data.
    """

    # given <window_length> hours of data with a seasonal periodicity of <sp> hours
    freq = pd.Timedelta("1H")
    data = pd.Series(
        index=pd.date_range("2021-06-01 00:00",
                            periods=n * window_length,
                            freq=freq,
                            closed="left"),
        data=([float(i) for i in range(1, sp + 1)] * n *
              window_length)[:n * window_length],
    )

    # Split into train and test data
    train_data = data[:window_length]
    test_data = data[window_length:]

    # Forecast data does not retain the original frequency
    test_data.index.freq = None

    # For example, for n=2, periods=4 and sp=3:

    # print(train_data)
    # 2021-06-01 00:00:00    1.0
    # 2021-06-01 01:00:00    2.0
    # 2021-06-01 02:00:00    3.0
    # 2021-06-01 03:00:00    1.0
    # Freq: H, dtype: int64

    # print(test_data)
    # 2021-06-01 04:00:00    2.0  # (value of 3 hours earlier)
    # 2021-06-01 05:00:00    3.0  # (value of 3 hours earlier)
    # 2021-06-01 06:00:00    1.0  # (mean value of 3 and 6 hours earlier)
    # 2021-06-01 07:00:00    2.0  # (value of 6 hours earlier)
    # dtype: float64

    # let's forecast the next <2 x period> hours with a periodicity of <sp> hours
    fh = ForecastingHorizon(test_data.index, is_relative=False)
    model = NaiveForecaster(strategy="mean", sp=sp)
    model.fit(train_data)
    forecast_data = model.predict(fh)

    if sp < window_length:
        # We expect a perfect forecast given our perfectly cyclic data
        pd.testing.assert_series_equal(forecast_data, test_data)
    else:
        # We expect a few forecasts yield NaN values
        for i in range(1 + len(test_data) // sp):
            test_data[i * sp:i * sp + sp - window_length] = np.nan
        pd.testing.assert_series_equal(forecast_data, test_data)
Exemplo n.º 22
0
def test_auto_arima():
    """Test bug in 805.

    https://github.com/alan-turing-institute/sktime/issues/805#issuecomment-891848228.
    """
    time_index = pd.date_range("January 1, 2021", periods=8, freq="1D")
    X = pd.DataFrame(
        np.random.randint(0, 4, 24).reshape(8, 3),
        columns=["First", "Second", "Third"],
        index=time_index,
    )
    y = pd.Series([1, 3, 2, 4, 5, 2, 3, 1], index=time_index)

    fh_ = ForecastingHorizon(X.index[5:], is_relative=False)

    a_clf = AutoARIMA(start_p=2, start_q=2, max_p=5, max_q=5)
    clf = a_clf.fit(X=X[:5], y=y[:5])
    y_pred_sk = clf.predict(fh=fh_, X=X[5:])

    pd.testing.assert_index_equal(
        y_pred_sk.index, pd.date_range("January 6, 2021", periods=3,
                                       freq="1D"))

    time_index = pd.date_range("January 1, 2021", periods=8, freq="2D")
    X = pd.DataFrame(
        np.random.randint(0, 4, 24).reshape(8, 3),
        columns=["First", "Second", "Third"],
        index=time_index,
    )
    y = pd.Series([1, 3, 2, 4, 5, 2, 3, 1], index=time_index)

    fh = ForecastingHorizon(X.index[5:], is_relative=False)

    a_clf = AutoARIMA(start_p=2, start_q=2, max_p=5, max_q=5)
    clf = a_clf.fit(X=X[:5], y=y[:5])
    y_pred_sk = clf.predict(fh=fh, X=X[5:])

    pd.testing.assert_index_equal(
        y_pred_sk.index, pd.date_range("January 11, 2021",
                                       periods=3,
                                       freq="2D"))
Exemplo n.º 23
0
    def _check_inverse_transform_index(self, Z):
        """Check fitted series contains indices needed in inverse_transform."""
        first_idx = Z.index.min()
        orig_first_idx, orig_last_idx = self._Z.index.min(), self._Z.index.max(
        )

        is_contained_by_fitted_z = False
        is_future = False

        if first_idx < orig_first_idx:
            msg = [
                "Some indices of `Z` are prior to timeseries used in `fit`.",
                "Reconstruction via `inverse_transform` is not possible.",
            ]
            raise ValueError(" ".join(msg))

        elif Z.index.difference(self._Z.index).shape[0] == 0:
            is_contained_by_fitted_z = True

        elif first_idx > orig_last_idx:
            is_future = True

        pad_z_inv = self.drop_na or is_future

        cutoff = Z.index[0] if pad_z_inv else Z.index[
            self._cumulative_lags[-1]]
        fh = ForecastingHorizon(
            np.arange(-1, -(self._cumulative_lags[-1] + 1), -1))
        index = fh.to_absolute(cutoff).to_pandas()
        index_diff = index.difference(self._Z.index)

        if index_diff.shape[0] != 0 and not is_contained_by_fitted_z:
            msg = [
                f"Inverse transform requires indices {index}",
                "to have been stored in `fit()`,",
                f"but the indices {index_diff} were not found.",
            ]
            raise ValueError(" ".join(msg))

        return is_contained_by_fitted_z, pad_z_inv
Exemplo n.º 24
0
def test_cutoff_window_splitter(y, cutoffs, fh, window_length):
    """Test CutoffSplitter."""
    cv = CutoffSplitter(cutoffs, fh=fh, window_length=window_length)
    if _cutoffs_fh_window_length_types_are_supported(
            cutoffs=cutoffs,
            fh=ForecastingHorizon(fh),
            window_length=window_length):
        train_windows, test_windows, cutoffs, n_splits = _check_cv(cv, y)
        np.testing.assert_array_equal(cutoffs, cv.get_cutoffs(y))
    else:
        match = "Unsupported combination of types"
        with pytest.raises(TypeError, match=match):
            _check_cv(cv, y)
Exemplo n.º 25
0
def test_reductions_airline_data(forecaster, expected):
    """
    test reduction forecasters by making prediction on airline dataset
    using linear estimators. predictions compared with values calculated by Lovkush
    Agarwal on their local machine in Mar 2021
    """
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    actual = forecaster.fit(y_train, fh=fh).predict(fh)

    np.testing.assert_almost_equal(actual, expected)
Exemplo n.º 26
0
def predict(Xpred=None, data_pars={}, compute_pars={}, out_pars={}, **kw):
    global model, session

    if Xpred is None:
        data_pars['train'] = False
        Xpred = get_dataset(data_pars, task_type="predict")

    Xpred_fh = ForecastingHorizon(Xpred.index, is_relative=False)

    ypred = model.model.predict(Xpred_fh)

    ypred_proba = None  ### No proba
    return ypred, ypred_proba
Exemplo n.º 27
0
def test_factory_method_direct():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    regressor = LinearRegression()
    f1 = ReducedForecaster(regressor, scitype="regressor", strategy="direct")
    f2 = DirectRegressionForecaster(regressor)

    actual = f1.fit(y_train, fh=fh).predict(fh)
    expected = f2.fit(y_train, fh=fh).predict(fh)

    np.testing.assert_array_equal(actual, expected)
Exemplo n.º 28
0
    def _fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional, default=None
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional, default=None
            Exogenous variables are ignored.

        Returns
        -------
        self : returns an instance of self.
        """
        _, forecasters = self._check_forecasters()
        self.regressor_ = check_regressor(regressor=self.regressor,
                                          random_state=self.random_state)

        # get training data for meta-model
        if X is not None:
            y_train, y_test, X_train, X_test = temporal_train_test_split(
                y, X, test_size=self.test_size)
        else:
            y_train, y_test = temporal_train_test_split(
                y, test_size=self.test_size)
            X_train, X_test = None, None

        # fit ensemble models
        fh_regressor = ForecastingHorizon(y_test.index, is_relative=False)
        self._fit_forecasters(forecasters, y_train, X_train, fh_regressor)
        X_meta = pd.concat(self._predict_forecasters(fh_regressor, X_test),
                           axis=1)

        # fit meta-model (regressor) on predictions of ensemble models
        # with y_test as endog/target
        self.regressor_.fit(X=X_meta, y=y_test)

        # check if regressor is a sklearn.Pipeline
        if isinstance(self.regressor_, Pipeline):
            # extract regressor from pipeline to access its attributes
            self.weights_ = _get_weights(self.regressor_.steps[-1][1])
        else:
            self.weights_ = _get_weights(self.regressor_)

        # fit forecasters with all data
        self._fit_forecasters(forecasters, y, X, fh)

        return self
Exemplo n.º 29
0
def test_factory_method_ts_direct():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    ts_regressor = Pipeline([("tabularize", Tabularizer()),
                             ("model", LinearRegression())])
    f1 = ReducedForecaster(ts_regressor,
                           scitype="ts_regressor",
                           strategy="direct")
    f2 = DirectTimeSeriesRegressionForecaster(ts_regressor)

    actual = f1.fit(y_train, fh=fh).predict(fh)
    expected = f2.fit(y_train, fh=fh).predict(fh)

    np.testing.assert_array_equal(actual, expected)
Exemplo n.º 30
0
def _get_end(y: ACCEPTED_Y_TYPES, fh: ForecastingHorizon) -> int:
    """Compute the end of the last training window for a forecasting horizon."""
    # `fh` is assumed to be ordered and checked by `_check_fh` and `window_length` by
    # `check_window_length`.
    n_timepoints = y.shape[0]

    # For purely in-sample forecasting horizons, the last split point is the end of the
    # training data.
    if fh.is_all_in_sample():
        end = n_timepoints + 1

    # Otherwise, the last point must ensure that the last horizon is within the data.
    else:
        fh_max = fh[-1]
        end = n_timepoints - fh_max + 1

    return end