Пример #1
0
    def update_predict(
        self,
        y,
        cv=None,
        X=None,
        update_params=True,
    ):
        """Make and update predictions iteratively over the test set.

        Parameters
        ----------
        y : pd.Series
        cv : temporal cross-validation generator, optional (default=None)
        X : pd.DataFrame, optional (default=None)
        update_params : bool, optional (default=True)

        Returns
        -------
        y_pred : pd.Series or pd.DataFrame
        """
        if cv is not None:
            cv = check_cv(cv)
        else:
            cv = SlidingWindowSplitter(
                self.fh.to_relative(self.cutoff),
                window_length=self.window_length_,
                start_with_window=False,
            )
        return self._predict_moving_cutoff(y,
                                           cv,
                                           X,
                                           update_params=update_params)
Пример #2
0
    def update_predict(self,
                       y_test,
                       cv=None,
                       X_test=None,
                       update_params=False,
                       return_pred_int=False,
                       alpha=DEFAULT_ALPHA):
        """Make and update predictions iteratively over the test set.

        Parameters
        ----------
        y_test : pd.Series
        cv : temporal cross-validation generator, optional (default=None)
        X_test : pd.DataFrame, optional (default=None)
        update_params : bool, optional (default=False)
        return_pred_int : bool, optional (default=False)
        alpha : int or list of ints, optional (default=None)

        Returns
        -------
        y_pred : pd.Series or pd.DataFrame
        """
        cv = check_cv(cv) if cv is not None else SlidingWindowSplitter(
            self.fh, window_length=self.window_length_)
        return self._predict_moving_cutoff(y_test,
                                           cv,
                                           X=X_test,
                                           update_params=update_params,
                                           return_pred_int=return_pred_int,
                                           alpha=alpha)
Пример #3
0
    def update_predict(
        self,
        y_test,
        cv=None,
        X_test=None,
        update_params=False,
        return_pred_int=False,
        alpha=DEFAULT_ALPHA,
    ):
        """Make and update predictions iteratively over the test set.

        Parameters
        ----------
        y_test : pd.Series
        cv : temporal cross-validation generator, optional (default=None)
        X_test : pd.DataFrame, optional (default=None)
        update_params : bool, optional (default=False)
        return_pred_int : bool, optional (default=False)
        alpha : int or list of ints, optional (default=None)

        Returns
        -------
        y_pred : pd.Series
            Point predictions
        y_pred_int : pd.DataFrame
            Prediction intervals
        """
        if return_pred_int:
            raise NotImplementedError()
        y_test = check_y(y_test)
        if cv is not None:
            cv = check_cv(cv)
        else:
            cv = SlidingWindowSplitter(start_with_window=True,
                                       window_length=1,
                                       fh=1)

        return self._predict_moving_cutoff(
            y_test,
            X=X_test,
            update_params=update_params,
            return_pred_int=return_pred_int,
            alpha=alpha,
            cv=cv,
        )
Пример #4
0
    def update_predict(
        self,
        y,
        cv=None,
        X=None,
        update_params=True,
        return_pred_int=False,
        alpha=DEFAULT_ALPHA,
    ):
        """Make and update predictions iteratively over the test set.

        Parameters
        ----------
        y : pd.Series
        cv : temporal cross-validation generator, optional (default=None)
        X : pd.DataFrame, optional (default=None)
        update_params : bool, optional (default=True)
        return_pred_int : bool, optional (default=False)
        alpha : int or list of ints, optional (default=None)

        Returns
        -------
        y_pred : pd.Series
            Point predictions
        y_pred_int : pd.DataFrame
            Prediction intervals
        """
        self.check_is_fitted()

        if return_pred_int:
            raise NotImplementedError()
        y = check_y(y)
        cv = check_cv(cv)

        return self._predict_moving_cutoff(
            y,
            cv,
            X,
            update_params=update_params,
            return_pred_int=return_pred_int,
            alpha=alpha,
        )
Пример #5
0
def evaluate(
    forecaster,
    cv,
    y,
    X=None,
    strategy="refit",
    scoring=None,
    return_data=False,
):
    """Evaluate forecaster using timeseries cross-validation.

    Parameters
    ----------
    forecaster : sktime.forecaster
        Any forecaster
    cv : Temporal cross-validation splitter
        Splitter of how to split the data into test data and train data
    y : pd.Series
        Target time series to which to fit the forecaster.
    X : pd.DataFrame, default=None
        Exogenous variables
    strategy : {"refit", "update"}
        Must be "refit" or "update". The strategy defines whether the `forecaster` is
        only fitted on the first train window data and then updated, or always refitted.
    scoring : subclass of sktime.performance_metrics.BaseMetric, default=None.
        Used to get a score function that takes y_pred and y_test arguments
        and accept y_train as keyword argument.
        If None, then uses scoring = MeanAbsolutePercentageError(symmetric=True).
    return_data : bool, default=False
        Returns three additional columns in the DataFrame, by default False.
        The cells of the columns contain each a pd.Series for y_train,
        y_pred, y_test.

    Returns
    -------
    pd.DataFrame
        DataFrame that contains several columns with information regarding each
        refit/update and prediction of the forecaster.

    Examples
    --------
    >>> from sktime.datasets import load_airline
    >>> from sktime.forecasting.model_evaluation import evaluate
    >>> from sktime.forecasting.model_selection import ExpandingWindowSplitter
    >>> from sktime.forecasting.naive import NaiveForecaster
    >>> y = load_airline()
    >>> forecaster = NaiveForecaster(strategy="mean", sp=12)
    >>> cv = ExpandingWindowSplitter(
    ...     initial_window=24,
    ...     step_length=12,
    ...     fh=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
    >>> results = evaluate(forecaster=forecaster, y=y, cv=cv)
    """
    _check_strategy(strategy)
    cv = check_cv(cv, enforce_start_with_window=True)
    scoring = check_scoring(scoring)
    y = check_series(
        y,
        enforce_univariate=forecaster.get_tag("scitype:y") == "univariate",
        enforce_multivariate=forecaster.get_tag("scitype:y") == "multivariate",
    )
    X = check_X(X)

    # Define score name.
    score_name = "test_" + scoring.name

    # Initialize dataframe.
    results = pd.DataFrame()

    # Run temporal cross-validation.
    for i, (train, test) in enumerate(cv.split(y)):
        # split data
        y_train, y_test, X_train, X_test = _split(y, X, train, test, cv.fh)

        # create forecasting horizon
        fh = ForecastingHorizon(y_test.index, is_relative=False)

        # fit/update
        start_fit = time.perf_counter()
        if i == 0 or strategy == "refit":
            forecaster = clone(forecaster)
            forecaster.fit(y_train, X_train, fh=fh)

        else:  # if strategy == "update":
            forecaster.update(y_train, X_train)
        fit_time = time.perf_counter() - start_fit

        pred_type = {
            "pred_quantiles": "forecaster.predict_quantiles",
            "pred_intervals": "forecaster.predict_interval",
            "pred_proba": "forecaster.predict_proba",
            None: "forecaster.predict",
        }
        # predict
        start_pred = time.perf_counter()

        if hasattr(scoring, "metric_args"):
            metric_args = scoring.metric_args

        try:
            scitype = scoring.get_tag("scitype:y_pred")
        except ValueError:
            # If no scitype exists then metric is not proba and no args needed
            scitype = None
            metric_args = {}

        y_pred = eval(pred_type[scitype])(
            fh,
            X_test,
            **metric_args,
        )

        pred_time = time.perf_counter() - start_pred

        # score
        score = scoring(y_test, y_pred, y_train=y_train)

        # save results
        results = results.append(
            {
                score_name: score,
                "fit_time": fit_time,
                "pred_time": pred_time,
                "len_train_window": len(y_train),
                "cutoff": forecaster.cutoff,
                "y_train": y_train if return_data else np.nan,
                "y_test": y_test if return_data else np.nan,
                "y_pred": y_pred if return_data else np.nan,
            },
            ignore_index=True,
        )

    # post-processing of results
    if not return_data:
        results = results.drop(columns=["y_train", "y_test", "y_pred"])
    results["len_train_window"] = results["len_train_window"].astype(int)

    return results
Пример #6
0
    def update_predict(
        self,
        y,
        cv=None,
        X=None,
        update_params=True,
        return_pred_int=False,
        alpha=DEFAULT_ALPHA,
    ):
        """Make predictions and update model iteratively over the test set.

        State required:
            Requires state to be "fitted".

        Accesses in self:
            Fitted model attributes ending in "_".
            Pointers to seen data, self._y and self.X
            self.cutoff, self._is_fitted
            If update_params=True, model attributes ending in "_".

        Writes to self:
            Update self._y and self._X with `y` and `X`, by appending rows.
            Updates self.cutoff and self._cutoff to last index seen in `y`.
            If update_params=True,
                updates fitted model attributes ending in "_".

        Parameters
        ----------
        y : pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
            Time series to which to fit the forecaster.
            if self.get_tag("scitype:y")=="univariate":
                must have a single column/variable
            if self.get_tag("scitype:y")=="multivariate":
                must have 2 or more columns
            if self.get_tag("scitype:y")=="both": no restrictions apply
        cv : temporal cross-validation generator, optional (default=None)
        X : pd.DataFrame, or 2D np.ndarray optional (default=None)
            Exogeneous time series to fit to and predict from
            if self.get_tag("X-y-must-have-same-index"),
            X.index must contain y.index and fh.index
        update_params : bool, optional (default=True)
        return_pred_int : bool, optional (default=False)
        alpha : int or list of ints, optional (default=None)

        Returns
        -------
        y_pred : pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
            Point forecasts at fh, with same index as fh
            y_pred has same type as y
        y_pred_int : pd.DataFrame - only if return_pred_int=True
            in this case, return is 2-tuple (otherwise a single y_pred)
            Prediction intervals
        """
        self.check_is_fitted()

        if return_pred_int and not self.get_tag("capability:pred_int"):
            raise NotImplementedError(
                f"{self.__class__.__name__} does not have the capability to return "
                "prediction intervals. Please set return_pred_int=False. If you "
                "think this estimator should have the capability, please open "
                "an issue on sktime."
            )

        # input checks and minor coercions on X, y
        X_inner, y_inner = self._check_X_y(X=X, y=y)

        cv = check_cv(cv)

        return self._predict_moving_cutoff(
            y=y_inner,
            cv=cv,
            X=X_inner,
            update_params=update_params,
            return_pred_int=return_pred_int,
            alpha=alpha,
        )
Пример #7
0
def evaluate(
    forecaster,
    cv,
    y,
    X=None,
    strategy="refit",
    scoring=None,
    fit_params=None,
    return_data=False,
):
    """Evaluate forecaster using cross-validation

    Parameters
    ----------
    forecaster : sktime.forecaster
        Any forecaster
    y : pd.Series
        Target time series to which to fit the forecaster.
    X : pd.DataFrame, optional (default=None)
        Exogenous variables
    cv : Temporal cross-validation splitter
        Splitter of how to split the data into test data and train data
    strategy : str, optional (default="refit")
        Must be "refit" or "update". The strategy defines whether the `forecaster` is
        only fitted on the first train window data and then updated, or always refitted.
    scoring : object of class MetricFunctionWrapper from
        sktime.performance_metrics, optional. Example scoring=sMAPE().
        Used to get a score function that takes y_pred and y_test as arguments,
        by default None (if None, uses sMAPE)
    fit_params : dict, optional (default=None)
        Parameters passed to the `fit` call of the forecaster.
    return_data : bool, optional
        Returns three additional columns in the DataFrame, by default False.
        The cells of the columns contain each a pd.Series for y_train,
        y_pred, y_test.

    Returns
    -------
    pd.DataFrame
        DataFrame that contains several columns with information regarding each
        refit/update and prediction of the forecaster.

    Examples
    --------
    >>> from sktime.datasets import load_airline
    >>> from sktime.forecasting.model_evaluation import evaluate
    >>> from sktime.forecasting.model_selection import ExpandingWindowSplitter
    >>> from sktime.forecasting.naive import NaiveForecaster
    >>> y = load_airline()
    >>> forecaster = NaiveForecaster(strategy="mean", sp=12)
    >>> cv = ExpandingWindowSplitter(initial_window=24, step_length=12,
    ...                              fh=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
    >>> results = evaluate(forecaster=forecaster, y=y, cv=cv)
    """
    _check_strategy(strategy)
    cv = check_cv(cv, enforce_start_with_window=True)
    scoring = check_scoring(scoring)
    y, X = check_y_X(y, X)
    fit_params = {} if fit_params is None else fit_params

    # Define score name.
    score_name = "test_" + scoring.name

    # Initialize dataframe.
    results = pd.DataFrame()

    # Run temporal cross-validation.
    for i, (train, test) in enumerate(cv.split(y)):
        # split data
        y_train, y_test, X_train, X_test = _split(y, X, train, test, cv.fh)

        # create forecasting horizon
        fh = ForecastingHorizon(y_test.index, is_relative=False)

        # fit/update
        start_fit = time.time()
        if i == 0 or strategy == "refit":
            forecaster.fit(y_train, X_train, fh=fh, **fit_params)

        else:  # if strategy == "update":
            forecaster.update(y_train, X_train)
        fit_time = time.time() - start_fit

        # predict
        start_pred = time.time()
        y_pred = forecaster.predict(fh, X=X_test)
        pred_time = time.time() - start_pred

        # score
        score = scoring(y_pred, y_test)

        # save results
        results = results.append(
            {
                score_name: score,
                "fit_time": fit_time,
                "pred_time": pred_time,
                "len_train_window": len(y_train),
                "cutoff": forecaster.cutoff,
                "y_train": y_train if return_data else np.nan,
                "y_test": y_test if return_data else np.nan,
                "y_pred": y_pred if return_data else np.nan,
            },
            ignore_index=True,
        )

    # post-processing of results
    if not return_data:
        results = results.drop(columns=["y_train", "y_test", "y_pred"])
    results["len_train_window"] = results["len_train_window"].astype(int)

    return results
Пример #8
0
def evaluate(forecaster,
             cv,
             y,
             X=None,
             strategy="refit",
             scoring=None,
             return_data=False):
    """Evaluate forecaster using cross-validation

    Parameters
    ----------
    forecaster : sktime.forecaster
        Any forecaster
    cv : sktime.SlidingWindowSplitter or sktime.ExpandingWindowSplitter
        Splitter of how to split the data into test data and train data
    y : pd.Series
        Target time series to which to fit the forecaster.
    X : pd.DataFrame, optional (default=None)
        Exogenous variables
    strategy : str, optional
        Must be "refit" or "update", by default "refit". The strategy defines
        whether forecaster is only fitted on the first train window data and
        then updated or always refitted.
    scoring : object of class MetricFunctionWrapper from
        sktime.performance_metrics, optional. Example scoring=sMAPE().
        Used to get a score function that takes y_pred and y_test as arguments,
        by default None (if None, uses sMAPE)
    return_data : bool, optional
        Returns three additional columns in the DataFrame, by default False.
        The cells of the columns contain each a pd.Series for y_train,
        y_pred, y_test.

    Returns
    -------
    pd.DataFrame
        DataFrame that contains several columns with information regarding each
        refit/update and prediction of the forecaster.

    Examples
    --------
    >>> from sktime.datasets import load_airline
    >>> from sktime.performance_metrics.forecasting import evaluate
    >>> from sktime.forecasting.model_selection import ExpandingWindowSplitter
    >>> from sktime.forecasting.naive import NaiveForecaster
    >>> y = load_airline()
    >>> forecaster = NaiveForecaster(strategy="drift", sp=12)
    >>> cv = ExpandingWindowSplitter(
        initial_window=24,
        step_length=12,
        fh=[1,2,3,4,5,6,7,8,9,10,11,12]
        )
    >>> evaluate(forecaster=forecaster, y=y, cv=cv)
    """
    cv = check_cv(cv)
    y = check_y(y)
    _check_strategies(strategy)
    scoring = check_scoring(scoring)

    n_splits = cv.get_n_splits(y)
    results = pd.DataFrame()
    cv.start_with_window = True

    for i, (train, test) in enumerate(tqdm(cv.split(y), total=n_splits)):
        # get initial window, if required
        if i == 0 and cv.initial_window and strategy == "update":
            train, test = cv.split_initial(y)
            # this might have to be directly handled in split_initial()
            test = test[:len(cv.fh)]

        # create train/test data
        y_train = y.iloc[train]
        y_test = y.iloc[test]

        X_train = X.iloc[train] if X else None
        X_test = X.iloc[test] if X else None

        # fit/update
        start_fit = time.time()
        if strategy == "refit" or i == 0:
            forecaster.fit(
                y=y_train,
                X=X_train,
                fh=ForecastingHorizon(y_test.index, is_relative=False),
            )
        else:
            # strategy == "update" and i != 0:
            forecaster.update(y=y_train, X=X_train)
        fit_time = time.time() - start_fit

        # predict
        start_pred = time.time()
        y_pred = forecaster.predict(fh=ForecastingHorizon(y_test.index,
                                                          is_relative=False),
                                    X=X_test)
        pred_time = time.time() - start_pred

        # save results
        results = results.append(
            {
                "test_" + scoring.__class__.__name__: scoring(y_pred, y_test),
                "fit_time": fit_time,
                "pred_time": pred_time,
                "len_train_window": len(y_train),
                "cutoff": forecaster.cutoff,
                "y_train": y_train if return_data else np.nan,
                "y_test": y_test if return_data else np.nan,
                "y_pred": y_pred if return_data else np.nan,
            },
            ignore_index=True,
        )

    # post-processing of results
    if not return_data:
        results = results.drop(columns=["y_train", "y_test", "y_pred"])
    results["len_train_window"] = results["len_train_window"].astype(int)

    return results
Пример #9
0
    def update_predict(
        self,
        y,
        cv=None,
        X=None,
        update_params=True,
        return_pred_int=False,
        alpha=DEFAULT_ALPHA,
    ):
        """Make and update predictions iteratively over the test set.

        Parameters
        ----------
        y : pd.Series
        cv : temporal cross-validation generator, optional (default=None)
        X : pd.DataFrame, optional (default=None)
        update_params : bool, optional (default=True)
        return_pred_int : bool, optional (default=False)
        alpha : int or list of ints, optional (default=None)

        Returns
        -------
        y_pred : pd.Series
            Point predictions
        y_pred_int : pd.DataFrame
            Prediction intervals
        """
        self.check_is_fitted()

        if return_pred_int and not self.get_tag("capability:pred_int"):
            raise NotImplementedError(
                f"{self.__class__.__name__} does not have the capability to return "
                "prediction intervals. Please set return_pred_int=False. If you "
                "think this estimator should have the capability, please open "
                "an issue on sktime.")

        # input checks and minor coercions on X, y
        ###########################################

        # checking y
        enforce_univariate = self.get_tag("scitype:y") == "univariate"
        enforce_multivariate = self.get_tag("scitype:y") == "multivariate"
        enforce_index_type = self.get_tag("enforce_index_type")

        check_y_args = {
            "enforce_univariate": enforce_univariate,
            "enforce_multivariate": enforce_multivariate,
            "enforce_index_type": enforce_index_type,
        }

        # update only for non-empty data
        y = check_series(y, allow_empty=True, **check_y_args, var_name="y")
        # end checking y

        # checking X
        X = check_series(X,
                         enforce_index_type=enforce_index_type,
                         var_name="X")
        if self.get_tag("X-y-must-have-same-index"):
            check_equal_time_index(X, y)
        # end checking X

        cv = check_cv(cv)

        return self._predict_moving_cutoff(
            y,
            cv,
            X,
            update_params=update_params,
            return_pred_int=return_pred_int,
            alpha=alpha,
        )