Exemplo n.º 1
0
def _split_by_fh(y, fh, X=None):
    """Helper function to split time series with forecasting horizon handling both
    relative and absolute horizons"""
    if X is not None:
        check_equal_time_index(y, X)
    fh = check_fh(fh)
    idx = fh.to_pandas()
    index = y.index

    if fh.is_relative:
        if not fh.is_all_out_of_sample():
            raise ValueError("`fh` must only contain out-of-sample values")
        max_step = idx.max()
        steps = fh.to_indexer()
        train = index[:-max_step]
        test = index[-max_step:]

        y_test = y.loc[test[steps]]

    else:
        min_step, max_step = idx.min(), idx.max()
        train = index[index < min_step]
        test = index[(index <= max_step) & (min_step <= index)]

        y_test = y.loc[idx]

    y_train = y.loc[train]
    if X is None:
        return y_train, y_test

    else:
        X_train = X.loc[train]
        X_test = X.loc[test]
        return y_train, y_test, X_train, X_test
Exemplo n.º 2
0
def check_y_X(y, X=None, allow_empty=False, allow_constant=True, warn_X=False):
    """Validate input data.

    Parameters
    ----------
    y : pd.Series
    X : pd.DataFrame, optional (default=None)
    allow_empty : bool, optional (default=False)
        If True, empty `y` does not raise an error.
    allow_constant : bool, optional (default=True)
        If True, constant `y` does not raise an error.
    warn_X : bool, optional (default=False)
        Raises a warning if True.

    Raises
    ------
    ValueError
        If y or X are invalid inputs
    """
    y = check_y(y, allow_empty=allow_empty, allow_constant=allow_constant)

    if X is not None:
        X = check_X(X=X, warn_X=warn_X)
        check_equal_time_index(y, X)

    return y, X
Exemplo n.º 3
0
def mase_loss(y_test, y_pred, y_train, sp=1):
    """Mean absolute scaled error.

    This scale-free error metric can be used to compare forecast methods on
    a single
    series and also to compare forecast accuracy between series. This metric
    is well
    suited to intermittent-demand series because it never gives infinite or
    undefined
    values.

    Parameters
    ----------
    y_test : pandas Series of shape = (fh,) where fh is the forecasting horizon
        Ground truth (correct) target values.
    y_pred : pandas Series of shape = (fh,)
        Estimated target values.
    y_train : pandas Series of shape = (n_obs,)
        Observed training values.
    sp : int
        Seasonal periodicity of training data.

    Returns
    -------
    loss : float
        MASE loss

    References
    ----------
    ..[1]   Hyndman, R. J. (2006). "Another look at measures of forecast
            accuracy", Foresight, Issue 4.
    """
    # input checks
    y_test = check_y(y_test)
    y_pred = check_y(y_pred)
    y_train = check_y(y_train)
    check_equal_time_index(y_test, y_pred)

    # check if training set is prior to test set
    if y_train is not None:
        check_time_index(y_train.index)
        if y_train.index.max() >= y_test.index.min():
            raise ValueError("Found `y_train` with time index which is not "
                             "before time index of `y_test`")

    #  naive seasonal prediction
    y_train = np.asarray(y_train)
    y_pred_naive = y_train[:-sp]

    # mean absolute error of naive seasonal prediction
    mae_naive = np.mean(np.abs(y_train[sp:] - y_pred_naive))

    # if training data is flat, mae may be zero,
    # return np.nan to avoid divide by zero error
    # and np.inf values
    if mae_naive == 0:
        return np.nan
    else:
        return np.mean(np.abs(y_test - y_pred)) / mae_naive
Exemplo n.º 4
0
def check_y_X(
    y,
    X=None,
    allow_empty=False,
    allow_constant=True,
    enforce_index_type=None,
):
    """Validate input data.

    Parameters
    ----------
    y : pd.Series
    X : pd.DataFrame, optional (default=None)
    allow_empty : bool, optional (default=False)
        If True, empty `y` does not raise an error.
    allow_constant : bool, optional (default=True)
        If True, constant `y` does not raise an error.
    enforce_index_type : type, optional (default=None)
        type of time index

    Raises
    ------
    ValueError
        If y or X are invalid inputs
    """
    y = check_y(
        y,
        allow_empty=allow_empty,
        allow_constant=allow_constant,
        enforce_index_type=enforce_index_type,
    )

    if X is not None:
        # No need to also enforce the index type on X since we're
        # checking for index equality here
        X = check_X(X)
        check_equal_time_index(y, X)

    return y, X
Exemplo n.º 5
0
def smape_loss(y_test, y_pred):
    """Symmetric mean absolute percentage error

    Parameters
    ----------
    y_test : pandas Series of shape = (fh,) where fh is the forecasting horizon
        Ground truth (correct) target values.
    y_pred : pandas Series of shape = (fh,)
        Estimated target values.

    Returns
    -------
    loss : float
        sMAPE loss
    """
    y_test = check_y(y_test)
    y_pred = check_y(y_pred)
    check_equal_time_index(y_test, y_pred)

    nominator = np.abs(y_test - y_pred)
    denominator = np.abs(y_test) + np.abs(y_pred)
    return np.mean(2.0 * nominator / denominator)
Exemplo n.º 6
0
def _split_by_fh(
    y: ACCEPTED_Y_TYPES, fh: FORECASTING_HORIZON_TYPES, X: Optional[pd.DataFrame] = None
) -> SPLIT_TYPE:
    """Split time series with forecasting horizon.

    Handles both relative and absolute horizons.
    """
    if X is not None:
        check_equal_time_index(y, X)
    fh = check_fh(fh)
    idx = fh.to_pandas()
    index = y.index

    if fh.is_relative:
        if not fh.is_all_out_of_sample():
            raise ValueError("`fh` must only contain out-of-sample values")
        max_step = idx.max()
        steps = fh.to_indexer()
        train = index[:-max_step]
        test = index[-max_step:]

        y_test = y.loc[test[steps]]

    else:
        min_step, max_step = idx.min(), idx.max()
        train = index[index < min_step]
        test = index[(index <= max_step) & (min_step <= index)]

        y_test = y.loc[idx]

    y_train = y.loc[train]
    if X is None:
        return y_train, y_test

    else:
        X_train = X.loc[train]
        X_test = X.loc[test]
        return y_train, y_test, X_train, X_test
Exemplo n.º 7
0
def mape_loss(y_test, y_pred):
    """Mean absolute percentage error (MAPE)
        MAPE output is non-negative floating point where the best value is 0.0.
        There is no limit on how large the error can be, particulalrly when `y_test`
        values are close to zero. In such cases the function returns a large value
        instead of `inf`.

    Parameters
    ----------
    y_test : pandas Series of shape = (fh,) where fh is the forecasting horizon
        Ground truth (correct) target values.
    y_pred : pandas Series of shape = (fh,)
        Estimated target values.

    Returns
    -------
    loss : float
        MAPE loss expressed as a fractional number rather than percentage point.


    Examples
    --------
    >>> from sklearn.metrics import mean_absolute_error
    >>> import pandas as pd
    >>> y_test = pd.Series([1, -1, 2])
    >>> y_pred = pd.Series([2, -2, 4])
    >>> mape_loss(y_test, y_pred)
    1.0
    """

    y_test = check_y(y_test)
    y_pred = check_y(y_pred)
    check_equal_time_index(y_test, y_pred)

    eps = np.finfo(np.float64).eps

    return np.mean(np.abs(y_test - y_pred) / np.maximum(np.abs(y_test), eps))
Exemplo n.º 8
0
    def _check_X_y(self, X=None, y=None):
        """Check and coerce X/y for fit/predict/update functions.

        Parameters
        ----------
        y : pd.Series, pd.DataFrame, or np.ndarray (1D or 2D), optional (default=None)
            Time series to check.
        X : pd.DataFrame, or 2D np.array, optional (default=None)
            Exogeneous time series.

        Returns
        -------
        y_inner : Series compatible with self.get_tag("y_inner_mtype") format
            converted/coerced version of y, mtype determined by "y_inner_mtype" tag
            None if y was None
        X_inner : Series compatible with self.get_tag("X_inner_mtype") format
            converted/coerced version of y, mtype determined by "X_inner_mtype" tag
            None if X was None

        Raises
        ------
        TypeError if y or X is not one of the permissible Series mtypes
        TypeError if y is not compatible with self.get_tag("scitype:y")
            if tag value is "univariate", y must be univariate
            if tag value is "multivariate", y must be bi- or higher-variate
            if tag vaule is "both", y can be either
        TypeError if self.get_tag("X-y-must-have-same-index") is True
            and the index set of X is not a super-set of the index set of y

        Writes to self
        --------------
        _y_mtype_last_seen : str, mtype of y
        _converter_store_y : dict, metadata from conversion for back-conversion
        """
        # input checks and minor coercions on X, y
        ###########################################

        enforce_univariate = self.get_tag("scitype:y") == "univariate"
        enforce_multivariate = self.get_tag("scitype:y") == "multivariate"
        enforce_index_type = self.get_tag("enforce_index_type")

        # checking y
        if y is not None:
            check_y_args = {
                "enforce_univariate": enforce_univariate,
                "enforce_multivariate": enforce_multivariate,
                "enforce_index_type": enforce_index_type,
                "allow_None": False,
                "allow_empty": True,
            }

            y = check_series(y, **check_y_args, var_name="y")

            self._y_mtype_last_seen = mtype(y, as_scitype="Series")
        # end checking y

        # checking X
        if X is not None:
            X = check_series(X, enforce_index_type=enforce_index_type, var_name="X")
            if self.get_tag("X-y-must-have-same-index"):
                check_equal_time_index(X, y)
        # end checking X

        # convert X & y to supported inner type, if necessary
        #####################################################

        # retrieve supported mtypes

        # convert X and y to a supported internal mtype
        #  it X/y mtype is already supported, no conversion takes place
        #  if X/y is None, then no conversion takes place (returns None)
        y_inner_mtype = self.get_tag("y_inner_mtype")
        y_inner = convert_to(
            y,
            to_type=y_inner_mtype,
            as_scitype="Series",  # we are dealing with series
            store=self._converter_store_y,
        )

        X_inner_mtype = self.get_tag("X_inner_mtype")
        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype="Series",  # we are dealing with series
        )

        return X_inner, y_inner
Exemplo n.º 9
0
    def fit(self, y, X=None, fh=None):
        """Fit forecaster to training data.

        Parameters
        ----------
        y : pd.Series, pd.DataFrame, or np.array
            Target time series to which to fit the forecaster.
        fh : int, list, np.array or ForecastingHorizon, optional (default=None)
            The forecasters horizon with the steps ahead to to predict.
        X : pd.DataFrame, optional (default=None)
            Exogeneous data

        Returns
        -------
        self :
            Reference to self.

        Notes
        -----
        Changes state by creating a fitted model that updates attributes
        ending in "_" and sets is_fitted flag to True.
        stores data in self._X and self._y
        stores fh, if passed
        updates self.cutoff to most recent time in y
        creates fitted model (attributes ending in "_")
        sets is_fitted flag to true
        """
        # if fit is called, fitted state is re-set
        self._is_fitted = False

        self._set_fh(fh)

        # input checks and minor coercions on X, y
        ###########################################

        # checking y
        enforce_univariate = self.get_tag("scitype:y") == "univariate"
        enforce_multivariate = self.get_tag("scitype:y") == "multivariate"
        enforce_index_type = self.get_tag("enforce_index_type")

        check_y_args = {
            "enforce_univariate": enforce_univariate,
            "enforce_multivariate": enforce_multivariate,
            "enforce_index_type": enforce_index_type,
            "allow_None": False,
        }

        y = check_series(y, **check_y_args, var_name="y")
        # end checking y

        # checking X
        X = check_series(X,
                         enforce_index_type=enforce_index_type,
                         var_name="X")
        if self.get_tag("X-y-must-have-same-index"):
            check_equal_time_index(X, y)
        # end checking X

        self._X = X
        self._y = y

        self._set_cutoff_from_y(y)

        # convert y to supported inner type, if necessary
        ##################################################

        # retrieve supported mtypes for _fit
        y_inner_mtype = self.get_tag("y_inner_mtype")
        X_inner_mtype = self.get_tag("X_inner_mtype")

        # convert y and X to a supported internal type
        #  it y/X type is already supported, no conversion takes place
        y_inner = convert_to(
            y,
            to_type=y_inner_mtype,
            as_scitype="Series",  # we are dealing with series
            store=self.converter_store_y,
        )

        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype="Series",  # we are dealing with series
        )

        # checks and conversions complete, pass to inner fit
        #####################################################

        self._fit(y=y_inner, X=X_inner, fh=fh)

        # this should happen last
        self._is_fitted = True

        return self
Exemplo n.º 10
0
    def update_predict(
        self,
        y,
        cv=None,
        X=None,
        update_params=True,
        return_pred_int=False,
        alpha=DEFAULT_ALPHA,
    ):
        """Make and update predictions iteratively over the test set.

        Parameters
        ----------
        y : pd.Series
        cv : temporal cross-validation generator, optional (default=None)
        X : pd.DataFrame, optional (default=None)
        update_params : bool, optional (default=True)
        return_pred_int : bool, optional (default=False)
        alpha : int or list of ints, optional (default=None)

        Returns
        -------
        y_pred : pd.Series
            Point predictions
        y_pred_int : pd.DataFrame
            Prediction intervals
        """
        self.check_is_fitted()

        if return_pred_int and not self.get_tag("capability:pred_int"):
            raise NotImplementedError(
                f"{self.__class__.__name__} does not have the capability to return "
                "prediction intervals. Please set return_pred_int=False. If you "
                "think this estimator should have the capability, please open "
                "an issue on sktime.")

        # input checks and minor coercions on X, y
        ###########################################

        # checking y
        enforce_univariate = self.get_tag("scitype:y") == "univariate"
        enforce_multivariate = self.get_tag("scitype:y") == "multivariate"
        enforce_index_type = self.get_tag("enforce_index_type")

        check_y_args = {
            "enforce_univariate": enforce_univariate,
            "enforce_multivariate": enforce_multivariate,
            "enforce_index_type": enforce_index_type,
        }

        # update only for non-empty data
        y = check_series(y, allow_empty=True, **check_y_args, var_name="y")
        # end checking y

        # checking X
        X = check_series(X,
                         enforce_index_type=enforce_index_type,
                         var_name="X")
        if self.get_tag("X-y-must-have-same-index"):
            check_equal_time_index(X, y)
        # end checking X

        cv = check_cv(cv)

        return self._predict_moving_cutoff(
            y,
            cv,
            X,
            update_params=update_params,
            return_pred_int=return_pred_int,
            alpha=alpha,
        )
Exemplo n.º 11
0
    def update(self, y, X=None, update_params=True):
        """Update cutoff value and, optionally, fitted parameters.

        This is useful in an online learning setting where new data is observed as
        time moves on. Updating the cutoff value allows to generate new predictions
        from the most recent time point that was observed. Updating the fitted
        parameters allows to incrementally update the parameters without having to
        completely refit. However, note that if no estimator-specific update method
        has been implemented for updating parameters refitting is the default fall-back
        option.

        Parameters
        ----------
        y : pd.Series, pd.DataFrame, or np.array
            Target time series to which to fit the forecaster.
        X : pd.DataFrame, optional (default=None)
            Exogeneous data
        update_params : bool, optional (default=True)
            whether model parameters should be updated

        Returns
        -------
        self : reference to self

        Notes
        -----
        Update self._y and self._X with `y` and `X`, respectively.
        Updates  self._cutoff to last index seen in `y`. If update_params=True,
        updates fitted model that updates attributes ending in "_".
        """
        self.check_is_fitted()

        # input checks and minor coercions on X, y
        ###########################################

        # checking y
        enforce_univariate = self.get_tag("scitype:y") == "univariate"
        enforce_multivariate = self.get_tag("scitype:y") == "multivariate"
        enforce_index_type = self.get_tag("enforce_index_type")

        check_y_args = {
            "enforce_univariate": enforce_univariate,
            "enforce_multivariate": enforce_multivariate,
            "enforce_index_type": enforce_index_type,
        }

        # update only for non-empty data
        y = check_series(y, allow_empty=True, **check_y_args, var_name="y")
        # end checking y

        # checking X
        X = check_series(X,
                         enforce_index_type=enforce_index_type,
                         var_name="X")
        if self.get_tag("X-y-must-have-same-index"):
            check_equal_time_index(X, y)
        # end checking X

        self._update_y_X(y, X)

        # convert y to supported inner type, if necessary
        ##################################################

        # retrieve supported mtypes for _fit
        y_inner_mtype = self.get_tag("y_inner_mtype")
        X_inner_mtype = self.get_tag("X_inner_mtype")

        # convert y and X to a supported internal type
        #  it y/X type is already supported, no conversion takes place
        y_inner = convert_to(
            y,
            to_type=y_inner_mtype,
            as_scitype="Series",  # we are dealing with series
            store=self.converter_store_y,
        )

        X_inner = convert_to(
            X,
            to_type=X_inner_mtype,
            as_scitype="Series",  # we are dealing with series
        )

        # checks and conversions complete, pass to inner fit
        #####################################################

        self._update(y=y_inner, X=X_inner, update_params=update_params)

        return self