示例#1
0
    def fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        Returns
        -------
        self : returns an instance of self.
        """
        y, _ = check_y_X(y, X)
        sp = check_sp(self.sp)
        if sp > 1 and not self.deseasonalize:
            warn("`sp` is ignored when `deseasonalise`=False")

        if self.deseasonalize:
            self.deseasonalizer_ = Deseasonalizer(sp=self.sp,
                                                  model="multiplicative")
            y = self.deseasonalizer_.fit_transform(y)

        # fit exponential smoothing forecaster
        # find theta lines: Theta lines are just SES + drift
        super(ThetaForecaster, self).fit(y, fh=fh)
        self.initial_level_ = self._fitted_forecaster.params["smoothing_level"]

        # compute trend
        self.trend_ = self._compute_trend(y)
        self._is_fitted = True
        return self
def test_deseasonalised_values(sp):
    transformer = Deseasonalizer(sp=sp)
    transformer.fit(y_train)
    actual = transformer.transform(y_train)

    r = seasonal_decompose(y_train, period=sp)
    expected = y_train - r.seasonal
    np.testing.assert_array_equal(actual, expected)
示例#3
0
    def compute_expected_y_pred(y_train, fh):
        # fitting
        yt = y_train.copy()
        t1 = Deseasonalizer(sp=12, model="multiplicative")
        yt = t1.fit_transform(yt)
        t2 = Detrender(PolynomialTrendForecaster(degree=1))
        yt = t2.fit_transform(yt)
        forecaster = NaiveForecaster()
        forecaster.fit(yt, fh=fh)

        # predicting
        y_pred = forecaster.predict()
        y_pred = t2.inverse_transform(y_pred)
        y_pred = t1.inverse_transform(y_pred)
        return y_pred
示例#4
0
def test_pipeline():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y)

    forecaster = TransformedTargetForecaster([
        ("t1", Deseasonalizer(sp=12, model="multiplicative")),
        ("t2", Detrender(PolynomialTrendForecaster(degree=1))),
        ("forecaster", NaiveForecaster()),
    ])
    fh = np.arange(len(y_test)) + 1
    forecaster.fit(y_train, fh=fh)
    actual = forecaster.predict()

    def compute_expected_y_pred(y_train, fh):
        # fitting
        yt = y_train.copy()
        t1 = Deseasonalizer(sp=12, model="multiplicative")
        yt = t1.fit_transform(yt)
        t2 = Detrender(PolynomialTrendForecaster(degree=1))
        yt = t2.fit_transform(yt)
        forecaster = NaiveForecaster()
        forecaster.fit(yt, fh=fh)

        # predicting
        y_pred = forecaster.predict()
        y_pred = t2.inverse_transform(y_pred)
        y_pred = t1.inverse_transform(y_pred)
        return y_pred

    expected = compute_expected_y_pred(y_train, fh)
    np.testing.assert_array_equal(actual, expected)
示例#5
0
class ThetaForecaster(ExponentialSmoothing):
    """Theta method for forecasting.

    The theta method as defined in [1]_ is equivalent to simple exponential
    smoothing (SES) with drift (as demonstrated in [2]_).

    The series is tested for seasonality using the test outlined in A&N. If
    deemed seasonal, the series is seasonally adjusted using a classical
    multiplicative decomposition before applying the theta method. The
    resulting forecasts are then reseasonalised.

    In cases where SES results in a constant forecast, the theta forecaster
    will revert to predicting the SES constant plus a linear trend derived
    from the training data.

    Prediction intervals are computed using the underlying state space model.

    Parameters
    ----------
    initial_level : float, optional
        The alpha value of the simple exponential smoothing, if the value is
        set then
        this will be used, otherwise it will be estimated from the data.
    deseasonalize : bool, optional (default=True)
        If True, data is seasonally adjusted.
    sp : int, optional (default=1)
        The number of observations that constitute a seasonal period for a
        multiplicative deseasonaliser, which is used if seasonality is
        detected in the
        training data. Ignored if a deseasonaliser transformer is provided.
        Default is
        1 (no seasonality).

    Attributes
    ----------
    initial_level_ : float
        The estimated alpha value of the SES fit.
    drift_ : float
        The estimated drift of the fitted model.
    se_ : float
        The standard error of the predictions. Used to calculate prediction
        intervals.

    References
    ----------
    .. [1] Assimakopoulos, V. and Nikolopoulos, K. The theta model: a
       decomposition approach to forecasting. International Journal of
       Forecasting 16, 521-530, 2000.
       https://www.sciencedirect.com/science/article/pii/S0169207000000662

    .. [2] `Hyndman, Rob J., and Billah, Baki. Unmasking the Theta method.
       International J. Forecasting, 19, 287-290, 2003.
       https://www.sciencedirect.com/science/article/pii/S0169207001001431

    Examples
    --------
    >>> from sktime.datasets import load_airline
    >>> from sktime.forecasting.theta import ThetaForecaster
    >>> y = load_airline()
    >>> forecaster = ThetaForecaster(sp=12)
    >>> forecaster.fit(y)
    ThetaForecaster(...)
    >>> y_pred = forecaster.predict(fh=[1,2,3])
    """

    _fitted_param_names = ("initial_level", "smoothing_level")
    _tags = {
        "scitype:y": "univariate",
        "ignores-exogeneous-X": True,
        "capability:pred_int": True,
        "requires-fh-in-fit": False,
        "handles-missing-data": False,
    }

    def __init__(self, initial_level=None, deseasonalize=True, sp=1):

        self.sp = sp
        self.deseasonalize = deseasonalize
        self.deseasonalizer_ = None
        self.trend_ = None
        self.initial_level_ = None
        self.drift_ = None
        self.se_ = None
        super(ThetaForecaster, self).__init__(initial_level=initial_level, sp=sp)

    def _fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored

        Returns
        -------
        self : returns an instance of self.
        """
        sp = check_sp(self.sp)
        if sp > 1 and not self.deseasonalize:
            warn("`sp` is ignored when `deseasonalise`=False")

        if self.deseasonalize:
            self.deseasonalizer_ = Deseasonalizer(sp=self.sp, model="multiplicative")
            y = self.deseasonalizer_.fit_transform(y)

        self.initialization_method = "known" if self.initial_level else "estimated"
        # fit exponential smoothing forecaster
        # find theta lines: Theta lines are just SES + drift
        super(ThetaForecaster, self)._fit(y, fh=fh)
        self.initial_level_ = self._fitted_forecaster.params["smoothing_level"]

        # compute and store historical residual standard error
        self.sigma_ = np.sqrt(self._fitted_forecaster.sse / (len(y) - 1))

        # compute trend
        self.trend_ = self._compute_trend(y)

        return self

    def _predict(self, fh, X=None):
        """Make forecasts.

        Parameters
        ----------
        fh : array-like
            The forecasters horizon with the steps ahead to to predict.
            Default is
            one-step ahead forecast, i.e. np.array([1]).
        X : pd.DataFrame, optional (default=None)
            Exogenous time series

        Returns
        -------
        y_pred : pandas.Series
            Returns series of predicted values.
        """
        y_pred = super(ThetaForecaster, self)._predict(fh, X)

        # Add drift.
        drift = self._compute_drift()
        y_pred += drift

        if self.deseasonalize:
            y_pred = self.deseasonalizer_.inverse_transform(y_pred)

        return y_pred

    @staticmethod
    def _compute_trend(y):
        # Trend calculated through least squares regression.
        coefs = _fit_trend(y.values.reshape(1, -1), order=1)
        return coefs[0, 0] / 2

    def _compute_drift(self):
        fh = self.fh.to_relative(self.cutoff)
        if np.isclose(self.initial_level_, 0.0):
            # SES was constant, so revert to simple trend
            drift = self.trend_ * fh
        else:
            # Calculate drift from SES parameters
            n_timepoints = len(self._y)
            drift = self.trend_ * (
                fh
                + (1 - (1 - self.initial_level_) ** n_timepoints) / self.initial_level_
            )

        return drift

    def _predict_quantiles(self, fh, X=None, alpha=None):
        """Compute/return prediction quantiles for a forecast.

        private _predict_quantiles containing the core logic,
            called from predict_quantiles and predict_interval

        Parameters
        ----------
        fh : int, list, np.array or ForecastingHorizon
            Forecasting horizon
        X : pd.DataFrame, optional (default=None)
            Exogenous time series
        alpha : list of float, optional (default=[0.5])
            A list of probabilities at which quantile forecasts are computed.

        Returns
        -------
        quantiles : pd.DataFrame
            Column has multi-index: first level is variable name from y in fit,
                second level being the values of alpha passed to the function.
            Row index is fh. Entries are quantile forecasts, for var in col index,
                at quantile probability in second col index, for the row index.
        """
        # prepare return data frame
        index = pd.MultiIndex.from_product([["Quantiles"], alpha])
        pred_quantiles = pd.DataFrame(columns=index)

        sem = self.sigma_ * np.sqrt(
            self.fh.to_relative(self.cutoff) * self.initial_level_**2 + 1
        )

        y_pred = self._predict(fh, X)

        # we assume normal additive noise with sem variance
        for a in alpha:
            pred_quantiles[("Quantiles", a)] = y_pred + norm.ppf(a) * sem
        # todo: should this not increase with the horizon?
        # i.e., sth like norm.ppf(a) * sem * fh.to_absolute(cutoff) ?
        # I've just refactored this so will leave it for now

        return pred_quantiles

    def _update(self, y, X=None, update_params=True):
        super(ThetaForecaster, self)._update(
            y, X, update_params=False
        )  # use custom update_params routine
        if update_params:
            if self.deseasonalize:
                y = self.deseasonalizer_.transform(self._y)  # use updated y
            self.initial_level_ = self._fitted_forecaster.params["smoothing_level"]
            self.trend_ = self._compute_trend(y)
        return self
示例#6
0
class ThetaForecaster(ExponentialSmoothing):
    """
    Theta method of forecasting.

    The theta method as defined in [1]_ is equivalent to simple exponential
    smoothing
    (SES) with drift. This is demonstrated in [2]_.

    The series is tested for seasonality using the test outlined in A&N. If
    deemed
    seasonal, the series is seasonally adjusted using a classical
    multiplicative
    decomposition before applying the theta method. The resulting forecasts
    are then
    reseasonalised.

    In cases where SES results in a constant forecast, the theta forecaster
    will revert
    to predicting the SES constant plus a linear trend derived from the
    training data.

    Prediction intervals are computed using the underlying state space model.

    Parameters
    ----------

    initial_level : float, optional
        The alpha value of the simple exponential smoothing, if the value is
        set then
        this will be used, otherwise it will be estimated from the data.

    deseasonalize : bool, optional (default=True)
        If True, data is seasonally adjusted.

    sp : int, optional (default=1)
        The number of observations that constitute a seasonal period for a
        multiplicative deseasonaliser, which is used if seasonality is
        detected in the
        training data. Ignored if a deseasonaliser transformer is provided.
        Default is
        1 (no seasonality).

    Attributes
    ----------

    initial_level_ : float
        The estimated alpha value of the SES fit.

    drift_ : float
        The estimated drift of the fitted model.

    se_ : float
        The standard error of the predictions. Used to calculate prediction
        intervals.

    References
    ----------

    .. [1] `Assimakopoulos, V. and Nikolopoulos, K. The theta model: a
    decomposition
           approach to forecasting. International Journal of Forecasting 16,
           521-530,
           2000.
           <https://www.sciencedirect.com/science/article/pii
           /S0169207000000662>`_

    .. [2] `Hyndman, Rob J., and Billah, Baki. Unmasking the Theta method.
           International J. Forecasting, 19, 287-290, 2003.
           <https://www.sciencedirect.com/science/article/pii
           /S0169207001001431>`_
    """

    _fitted_param_names = ("initial_level", "smoothing_level")

    def __init__(self, initial_level=None, deseasonalize=True, sp=1):

        self.sp = sp
        self.deseasonalize = deseasonalize

        self.deseasonalizer_ = None
        self.trend_ = None
        self.initial_level_ = None
        self.drift_ = None
        self.se_ = None
        super(ThetaForecaster, self).__init__(initial_level=initial_level,
                                              sp=sp)

    def fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored
        Returns
        -------
        self : returns an instance of self.
        """
        y, _ = check_y_X(y, X)
        sp = check_sp(self.sp)
        if sp > 1 and not self.deseasonalize:
            warn("`sp` is ignored when `deseasonalise`=False")

        if self.deseasonalize:
            self.deseasonalizer_ = Deseasonalizer(sp=self.sp,
                                                  model="multiplicative")
            y = self.deseasonalizer_.fit_transform(y)

        # fit exponential smoothing forecaster
        # find theta lines: Theta lines are just SES + drift
        super(ThetaForecaster, self).fit(y, fh=fh)
        self.initial_level_ = self._fitted_forecaster.params["smoothing_level"]

        # compute trend
        self.trend_ = self._compute_trend(y)
        self._is_fitted = True
        return self

    def _predict(self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA):
        """
        Make forecasts.

        Parameters
        ----------

        fh : array-like
            The forecasters horizon with the steps ahead to to predict.
            Default is
            one-step ahead forecast, i.e. np.array([1]).

        Returns
        -------

        y_pred : pandas.Series
            Returns series of predicted values.
        """
        y_pred = super(ThetaForecaster, self)._predict(fh,
                                                       X,
                                                       return_pred_int=False,
                                                       alpha=alpha)

        # Add drift.
        drift = self._compute_drift()
        y_pred += drift

        if self.deseasonalize:
            y_pred = self.deseasonalizer_.inverse_transform(y_pred)

        if return_pred_int:
            pred_int = self.compute_pred_int(y_pred=y_pred, alpha=alpha)
            return y_pred, pred_int

        return y_pred

    @staticmethod
    def _compute_trend(y):
        # Trend calculated through least squares regression.
        coefs = _fit_trend(y.values.reshape(1, -1), order=1)
        return coefs[0, 0] / 2

    def _compute_drift(self):
        fh = self.fh.to_relative(self.cutoff)
        if np.isclose(self.initial_level_, 0.0):
            # SES was constant, so revert to simple trend
            drift = self.trend_ * fh
        else:
            # Calculate drift from SES parameters
            n_timepoints = len(self._y)
            drift = self.trend_ * (fh +
                                   (1 -
                                    (1 - self.initial_level_)**n_timepoints) /
                                   self.initial_level_)

        return drift

    def _compute_pred_err(self, alphas):
        """
        Get the prediction errors for the forecast.
        """
        self.check_is_fitted()

        n_timepoints = len(self._y)

        self.sigma_ = np.sqrt(self._fitted_forecaster.sse / (n_timepoints - 1))
        sem = self.sigma_ * np.sqrt(
            self.fh.to_relative(self.cutoff) * self.initial_level_**2 + 1)

        errors = []
        for alpha in alphas:
            z = _zscore(1 - alpha)
            error = z * sem
            errors.append(
                pd.Series(error, index=self.fh.to_absolute(self.cutoff)))

        return errors

    def update(self, y, X=None, update_params=True):
        super(ThetaForecaster, self).update(
            y, X, update_params=False)  # use custom update_params routine
        if update_params:
            if self.deseasonalize:
                y = self.deseasonalizer_.transform(self._y)  # use updated y
            self.initial_level_ = self._fitted_forecaster.params[
                "smoothing_level"]
            self.trend_ = self._compute_trend(y)
        return self
def test_transform_inverse_transform_equivalence(sp, model):
    transformer = Deseasonalizer(sp=sp, model=model)
    transformer.fit(y_train)
    yit = transformer.inverse_transform(transformer.transform(y_train))
    np.testing.assert_array_equal(y_train.index, yit.index)
    np.testing.assert_array_almost_equal(y_train, yit)
def test_inverse_transform_time_index(sp, model):
    transformer = Deseasonalizer(sp=sp, model=model)
    transformer.fit(y_train)
    yit = transformer.inverse_transform(y_test)
    np.testing.assert_array_equal(yit.index, y_test.index)
示例#9
0
文件: theta.py 项目: ksachdeva/sktime
class ThetaForecaster(ExponentialSmoothing):
    """Theta method for forecasting.

    The theta method as defined in [1]_ is equivalent to simple exponential
    smoothing (SES) with drift (as demonstrated in [2]_).

    The series is tested for seasonality using the test outlined in A&N. If
    deemed seasonal, the series is seasonally adjusted using a classical
    multiplicative decomposition before applying the theta method. The
    resulting forecasts are then reseasonalised.

    In cases where SES results in a constant forecast, the theta forecaster
    will revert to predicting the SES constant plus a linear trend derived
    from the training data.

    Prediction intervals are computed using the underlying state space model.

    Parameters
    ----------
    initial_level : float, optional
        The alpha value of the simple exponential smoothing, if the value is
        set then
        this will be used, otherwise it will be estimated from the data.
    deseasonalize : bool, optional (default=True)
        If True, data is seasonally adjusted.
    sp : int, optional (default=1)
        The number of observations that constitute a seasonal period for a
        multiplicative deseasonaliser, which is used if seasonality is
        detected in the
        training data. Ignored if a deseasonaliser transformer is provided.
        Default is
        1 (no seasonality).

    Attributes
    ----------
    initial_level_ : float
        The estimated alpha value of the SES fit.
    drift_ : float
        The estimated drift of the fitted model.
    se_ : float
        The standard error of the predictions. Used to calculate prediction
        intervals.

    References
    ----------
    .. [1] Assimakopoulos, V. and Nikolopoulos, K. The theta model: a
       decomposition approach to forecasting. International Journal of
       Forecasting 16, 521-530, 2000.
       https://www.sciencedirect.com/science/article/pii/S0169207000000662

    .. [2] `Hyndman, Rob J., and Billah, Baki. Unmasking the Theta method.
       International J. Forecasting, 19, 287-290, 2003.
       https://www.sciencedirect.com/science/article/pii/S0169207001001431

    Examples
    --------
    >>> from sktime.datasets import load_airline
    >>> from sktime.forecasting.theta import ThetaForecaster
    >>> y = load_airline()
    >>> forecaster = ThetaForecaster(sp=12)
    >>> forecaster.fit(y)
    ThetaForecaster(...)
    >>> y_pred = forecaster.predict(fh=[1,2,3])
    """

    _fitted_param_names = ("initial_level", "smoothing_level")
    _tags = {
        "ignores-exogeneous-X": True,
        "capability:pred_int": True,
        "requires-fh-in-fit": False,
        "handles-missing-data": False,
    }

    def __init__(self, initial_level=None, deseasonalize=True, sp=1):

        self.sp = sp
        self.deseasonalize = deseasonalize
        self.deseasonalizer_ = None
        self.trend_ = None
        self.initial_level_ = None
        self.drift_ = None
        self.se_ = None
        super(ThetaForecaster, self).__init__(initial_level=initial_level,
                                              sp=sp)

    def _fit(self, y, X=None, fh=None):
        """Fit to training data.

        Parameters
        ----------
        y : pd.Series
            Target time series to which to fit the forecaster.
        fh : int, list or np.array, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional (default=None)
            Exogenous variables are ignored

        Returns
        -------
        self : returns an instance of self.
        """
        sp = check_sp(self.sp)
        if sp > 1 and not self.deseasonalize:
            warn("`sp` is ignored when `deseasonalise`=False")

        if self.deseasonalize:
            self.deseasonalizer_ = Deseasonalizer(sp=self.sp,
                                                  model="multiplicative")
            y = self.deseasonalizer_.fit_transform(y)

        self.initialization_method = "known" if self.initial_level else "estimated"
        # fit exponential smoothing forecaster
        # find theta lines: Theta lines are just SES + drift
        super(ThetaForecaster, self)._fit(y, fh=fh)
        self.initial_level_ = self._fitted_forecaster.params["smoothing_level"]

        # compute trend
        self.trend_ = self._compute_trend(y)
        return self

    def _predict(self, fh, X=None, return_pred_int=False, alpha=DEFAULT_ALPHA):
        """Make forecasts.

        Parameters
        ----------
        fh : array-like
            The forecasters horizon with the steps ahead to to predict.
            Default is
            one-step ahead forecast, i.e. np.array([1]).

        Returns
        -------
        y_pred : pandas.Series
            Returns series of predicted values.
        """
        y_pred = super(ThetaForecaster, self)._predict(fh,
                                                       X,
                                                       return_pred_int=False,
                                                       alpha=alpha)

        # Add drift.
        drift = self._compute_drift()
        y_pred += drift

        if self.deseasonalize:
            y_pred = self.deseasonalizer_.inverse_transform(y_pred)

        if return_pred_int:
            pred_int = self.compute_pred_int(y_pred=y_pred, alpha=alpha)
            return y_pred, pred_int

        return y_pred

    @staticmethod
    def _compute_trend(y):
        # Trend calculated through least squares regression.
        coefs = _fit_trend(y.values.reshape(1, -1), order=1)
        return coefs[0, 0] / 2

    def _compute_drift(self):
        fh = self.fh.to_relative(self.cutoff)
        if np.isclose(self.initial_level_, 0.0):
            # SES was constant, so revert to simple trend
            drift = self.trend_ * fh
        else:
            # Calculate drift from SES parameters
            n_timepoints = len(self._y)
            drift = self.trend_ * (fh +
                                   (1 -
                                    (1 - self.initial_level_)**n_timepoints) /
                                   self.initial_level_)

        return drift

    def compute_pred_int(self, y_pred, alpha=DEFAULT_ALPHA):
        """
        Compute/return prediction intervals for a forecast.

        Must be run *after* the forecaster has been fitted.

        If alpha is iterable, multiple intervals will be calculated.

        public method including checks & utility
        dispatches to core logic in _compute_pred_int

        Parameters
        ----------
        y_pred : pd.Series
            Point predictions.
        alpha : float or list, optional (default=0.95)
            A significance level or list of significance levels.

        Returns
        -------
        intervals : pd.DataFrame
            A table of upper and lower bounds for each point prediction in
            ``y_pred``. If ``alpha`` was iterable, then ``intervals`` will be a
            list of such tables.
        """
        self.check_is_fitted()
        alphas = check_alpha(alpha)
        errors = self._compute_pred_err(alphas)

        # compute prediction intervals
        pred_int = [
            pd.DataFrame({
                "lower": y_pred - error,
                "upper": y_pred + error
            }) for error in errors
        ]

        # for a single alpha, return single pd.DataFrame
        if isinstance(alpha, float):
            return pred_int[0]

        # otherwise return list of pd.DataFrames
        return pred_int

    def _compute_pred_err(self, alphas):
        """Get the prediction errors for the forecast."""
        self.check_is_fitted()

        n_timepoints = len(self._y)

        self.sigma_ = np.sqrt(self._fitted_forecaster.sse / (n_timepoints - 1))
        sem = self.sigma_ * np.sqrt(
            self.fh.to_relative(self.cutoff) * self.initial_level_**2 + 1)

        errors = []
        for alpha in alphas:
            z = _zscore(1 - alpha)
            error = z * sem
            errors.append(
                pd.Series(error, index=self.fh.to_absolute(self.cutoff)))

        return errors

    def _update(self, y, X=None, update_params=True):
        super(ThetaForecaster, self)._update(
            y, X, update_params=False)  # use custom update_params routine
        if update_params:
            if self.deseasonalize:
                y = self.deseasonalizer_.transform(self._y)  # use updated y
            self.initial_level_ = self._fitted_forecaster.params[
                "smoothing_level"]
            self.trend_ = self._compute_trend(y)
        return self