def update_predict( self, y, cv=None, X=None, update_params=True, ): """Make and update predictions iteratively over the test set. Parameters ---------- y : pd.Series cv : temporal cross-validation generator, optional (default=None) X : pd.DataFrame, optional (default=None) update_params : bool, optional (default=True) Returns ------- y_pred : pd.Series or pd.DataFrame """ if cv is not None: cv = check_cv(cv) else: cv = SlidingWindowSplitter( self.fh.to_relative(self.cutoff), window_length=self.window_length_, start_with_window=False, ) return self._predict_moving_cutoff(y, cv, X, update_params=update_params)
def update_predict(self, y_test, cv=None, X_test=None, update_params=False, return_pred_int=False, alpha=DEFAULT_ALPHA): """Make and update predictions iteratively over the test set. Parameters ---------- y_test : pd.Series cv : temporal cross-validation generator, optional (default=None) X_test : pd.DataFrame, optional (default=None) update_params : bool, optional (default=False) return_pred_int : bool, optional (default=False) alpha : int or list of ints, optional (default=None) Returns ------- y_pred : pd.Series or pd.DataFrame """ cv = check_cv(cv) if cv is not None else SlidingWindowSplitter( self.fh, window_length=self.window_length_) return self._predict_moving_cutoff(y_test, cv, X=X_test, update_params=update_params, return_pred_int=return_pred_int, alpha=alpha)
def update_predict( self, y_test, cv=None, X_test=None, update_params=False, return_pred_int=False, alpha=DEFAULT_ALPHA, ): """Make and update predictions iteratively over the test set. Parameters ---------- y_test : pd.Series cv : temporal cross-validation generator, optional (default=None) X_test : pd.DataFrame, optional (default=None) update_params : bool, optional (default=False) return_pred_int : bool, optional (default=False) alpha : int or list of ints, optional (default=None) Returns ------- y_pred : pd.Series Point predictions y_pred_int : pd.DataFrame Prediction intervals """ if return_pred_int: raise NotImplementedError() y_test = check_y(y_test) if cv is not None: cv = check_cv(cv) else: cv = SlidingWindowSplitter(start_with_window=True, window_length=1, fh=1) return self._predict_moving_cutoff( y_test, X=X_test, update_params=update_params, return_pred_int=return_pred_int, alpha=alpha, cv=cv, )
def update_predict( self, y, cv=None, X=None, update_params=True, return_pred_int=False, alpha=DEFAULT_ALPHA, ): """Make and update predictions iteratively over the test set. Parameters ---------- y : pd.Series cv : temporal cross-validation generator, optional (default=None) X : pd.DataFrame, optional (default=None) update_params : bool, optional (default=True) return_pred_int : bool, optional (default=False) alpha : int or list of ints, optional (default=None) Returns ------- y_pred : pd.Series Point predictions y_pred_int : pd.DataFrame Prediction intervals """ self.check_is_fitted() if return_pred_int: raise NotImplementedError() y = check_y(y) cv = check_cv(cv) return self._predict_moving_cutoff( y, cv, X, update_params=update_params, return_pred_int=return_pred_int, alpha=alpha, )
def evaluate( forecaster, cv, y, X=None, strategy="refit", scoring=None, return_data=False, ): """Evaluate forecaster using timeseries cross-validation. Parameters ---------- forecaster : sktime.forecaster Any forecaster cv : Temporal cross-validation splitter Splitter of how to split the data into test data and train data y : pd.Series Target time series to which to fit the forecaster. X : pd.DataFrame, default=None Exogenous variables strategy : {"refit", "update"} Must be "refit" or "update". The strategy defines whether the `forecaster` is only fitted on the first train window data and then updated, or always refitted. scoring : subclass of sktime.performance_metrics.BaseMetric, default=None. Used to get a score function that takes y_pred and y_test arguments and accept y_train as keyword argument. If None, then uses scoring = MeanAbsolutePercentageError(symmetric=True). return_data : bool, default=False Returns three additional columns in the DataFrame, by default False. The cells of the columns contain each a pd.Series for y_train, y_pred, y_test. Returns ------- pd.DataFrame DataFrame that contains several columns with information regarding each refit/update and prediction of the forecaster. Examples -------- >>> from sktime.datasets import load_airline >>> from sktime.forecasting.model_evaluation import evaluate >>> from sktime.forecasting.model_selection import ExpandingWindowSplitter >>> from sktime.forecasting.naive import NaiveForecaster >>> y = load_airline() >>> forecaster = NaiveForecaster(strategy="mean", sp=12) >>> cv = ExpandingWindowSplitter( ... initial_window=24, ... step_length=12, ... fh=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) >>> results = evaluate(forecaster=forecaster, y=y, cv=cv) """ _check_strategy(strategy) cv = check_cv(cv, enforce_start_with_window=True) scoring = check_scoring(scoring) y = check_series( y, enforce_univariate=forecaster.get_tag("scitype:y") == "univariate", enforce_multivariate=forecaster.get_tag("scitype:y") == "multivariate", ) X = check_X(X) # Define score name. score_name = "test_" + scoring.name # Initialize dataframe. results = pd.DataFrame() # Run temporal cross-validation. for i, (train, test) in enumerate(cv.split(y)): # split data y_train, y_test, X_train, X_test = _split(y, X, train, test, cv.fh) # create forecasting horizon fh = ForecastingHorizon(y_test.index, is_relative=False) # fit/update start_fit = time.perf_counter() if i == 0 or strategy == "refit": forecaster = clone(forecaster) forecaster.fit(y_train, X_train, fh=fh) else: # if strategy == "update": forecaster.update(y_train, X_train) fit_time = time.perf_counter() - start_fit pred_type = { "pred_quantiles": "forecaster.predict_quantiles", "pred_intervals": "forecaster.predict_interval", "pred_proba": "forecaster.predict_proba", None: "forecaster.predict", } # predict start_pred = time.perf_counter() if hasattr(scoring, "metric_args"): metric_args = scoring.metric_args try: scitype = scoring.get_tag("scitype:y_pred") except ValueError: # If no scitype exists then metric is not proba and no args needed scitype = None metric_args = {} y_pred = eval(pred_type[scitype])( fh, X_test, **metric_args, ) pred_time = time.perf_counter() - start_pred # score score = scoring(y_test, y_pred, y_train=y_train) # save results results = results.append( { score_name: score, "fit_time": fit_time, "pred_time": pred_time, "len_train_window": len(y_train), "cutoff": forecaster.cutoff, "y_train": y_train if return_data else np.nan, "y_test": y_test if return_data else np.nan, "y_pred": y_pred if return_data else np.nan, }, ignore_index=True, ) # post-processing of results if not return_data: results = results.drop(columns=["y_train", "y_test", "y_pred"]) results["len_train_window"] = results["len_train_window"].astype(int) return results
def update_predict( self, y, cv=None, X=None, update_params=True, return_pred_int=False, alpha=DEFAULT_ALPHA, ): """Make predictions and update model iteratively over the test set. State required: Requires state to be "fitted". Accesses in self: Fitted model attributes ending in "_". Pointers to seen data, self._y and self.X self.cutoff, self._is_fitted If update_params=True, model attributes ending in "_". Writes to self: Update self._y and self._X with `y` and `X`, by appending rows. Updates self.cutoff and self._cutoff to last index seen in `y`. If update_params=True, updates fitted model attributes ending in "_". Parameters ---------- y : pd.Series, pd.DataFrame, or np.ndarray (1D or 2D) Time series to which to fit the forecaster. if self.get_tag("scitype:y")=="univariate": must have a single column/variable if self.get_tag("scitype:y")=="multivariate": must have 2 or more columns if self.get_tag("scitype:y")=="both": no restrictions apply cv : temporal cross-validation generator, optional (default=None) X : pd.DataFrame, or 2D np.ndarray optional (default=None) Exogeneous time series to fit to and predict from if self.get_tag("X-y-must-have-same-index"), X.index must contain y.index and fh.index update_params : bool, optional (default=True) return_pred_int : bool, optional (default=False) alpha : int or list of ints, optional (default=None) Returns ------- y_pred : pd.Series, pd.DataFrame, or np.ndarray (1D or 2D) Point forecasts at fh, with same index as fh y_pred has same type as y y_pred_int : pd.DataFrame - only if return_pred_int=True in this case, return is 2-tuple (otherwise a single y_pred) Prediction intervals """ self.check_is_fitted() if return_pred_int and not self.get_tag("capability:pred_int"): raise NotImplementedError( f"{self.__class__.__name__} does not have the capability to return " "prediction intervals. Please set return_pred_int=False. If you " "think this estimator should have the capability, please open " "an issue on sktime." ) # input checks and minor coercions on X, y X_inner, y_inner = self._check_X_y(X=X, y=y) cv = check_cv(cv) return self._predict_moving_cutoff( y=y_inner, cv=cv, X=X_inner, update_params=update_params, return_pred_int=return_pred_int, alpha=alpha, )
def evaluate( forecaster, cv, y, X=None, strategy="refit", scoring=None, fit_params=None, return_data=False, ): """Evaluate forecaster using cross-validation Parameters ---------- forecaster : sktime.forecaster Any forecaster y : pd.Series Target time series to which to fit the forecaster. X : pd.DataFrame, optional (default=None) Exogenous variables cv : Temporal cross-validation splitter Splitter of how to split the data into test data and train data strategy : str, optional (default="refit") Must be "refit" or "update". The strategy defines whether the `forecaster` is only fitted on the first train window data and then updated, or always refitted. scoring : object of class MetricFunctionWrapper from sktime.performance_metrics, optional. Example scoring=sMAPE(). Used to get a score function that takes y_pred and y_test as arguments, by default None (if None, uses sMAPE) fit_params : dict, optional (default=None) Parameters passed to the `fit` call of the forecaster. return_data : bool, optional Returns three additional columns in the DataFrame, by default False. The cells of the columns contain each a pd.Series for y_train, y_pred, y_test. Returns ------- pd.DataFrame DataFrame that contains several columns with information regarding each refit/update and prediction of the forecaster. Examples -------- >>> from sktime.datasets import load_airline >>> from sktime.forecasting.model_evaluation import evaluate >>> from sktime.forecasting.model_selection import ExpandingWindowSplitter >>> from sktime.forecasting.naive import NaiveForecaster >>> y = load_airline() >>> forecaster = NaiveForecaster(strategy="mean", sp=12) >>> cv = ExpandingWindowSplitter(initial_window=24, step_length=12, ... fh=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) >>> results = evaluate(forecaster=forecaster, y=y, cv=cv) """ _check_strategy(strategy) cv = check_cv(cv, enforce_start_with_window=True) scoring = check_scoring(scoring) y, X = check_y_X(y, X) fit_params = {} if fit_params is None else fit_params # Define score name. score_name = "test_" + scoring.name # Initialize dataframe. results = pd.DataFrame() # Run temporal cross-validation. for i, (train, test) in enumerate(cv.split(y)): # split data y_train, y_test, X_train, X_test = _split(y, X, train, test, cv.fh) # create forecasting horizon fh = ForecastingHorizon(y_test.index, is_relative=False) # fit/update start_fit = time.time() if i == 0 or strategy == "refit": forecaster.fit(y_train, X_train, fh=fh, **fit_params) else: # if strategy == "update": forecaster.update(y_train, X_train) fit_time = time.time() - start_fit # predict start_pred = time.time() y_pred = forecaster.predict(fh, X=X_test) pred_time = time.time() - start_pred # score score = scoring(y_pred, y_test) # save results results = results.append( { score_name: score, "fit_time": fit_time, "pred_time": pred_time, "len_train_window": len(y_train), "cutoff": forecaster.cutoff, "y_train": y_train if return_data else np.nan, "y_test": y_test if return_data else np.nan, "y_pred": y_pred if return_data else np.nan, }, ignore_index=True, ) # post-processing of results if not return_data: results = results.drop(columns=["y_train", "y_test", "y_pred"]) results["len_train_window"] = results["len_train_window"].astype(int) return results
def evaluate(forecaster, cv, y, X=None, strategy="refit", scoring=None, return_data=False): """Evaluate forecaster using cross-validation Parameters ---------- forecaster : sktime.forecaster Any forecaster cv : sktime.SlidingWindowSplitter or sktime.ExpandingWindowSplitter Splitter of how to split the data into test data and train data y : pd.Series Target time series to which to fit the forecaster. X : pd.DataFrame, optional (default=None) Exogenous variables strategy : str, optional Must be "refit" or "update", by default "refit". The strategy defines whether forecaster is only fitted on the first train window data and then updated or always refitted. scoring : object of class MetricFunctionWrapper from sktime.performance_metrics, optional. Example scoring=sMAPE(). Used to get a score function that takes y_pred and y_test as arguments, by default None (if None, uses sMAPE) return_data : bool, optional Returns three additional columns in the DataFrame, by default False. The cells of the columns contain each a pd.Series for y_train, y_pred, y_test. Returns ------- pd.DataFrame DataFrame that contains several columns with information regarding each refit/update and prediction of the forecaster. Examples -------- >>> from sktime.datasets import load_airline >>> from sktime.performance_metrics.forecasting import evaluate >>> from sktime.forecasting.model_selection import ExpandingWindowSplitter >>> from sktime.forecasting.naive import NaiveForecaster >>> y = load_airline() >>> forecaster = NaiveForecaster(strategy="drift", sp=12) >>> cv = ExpandingWindowSplitter( initial_window=24, step_length=12, fh=[1,2,3,4,5,6,7,8,9,10,11,12] ) >>> evaluate(forecaster=forecaster, y=y, cv=cv) """ cv = check_cv(cv) y = check_y(y) _check_strategies(strategy) scoring = check_scoring(scoring) n_splits = cv.get_n_splits(y) results = pd.DataFrame() cv.start_with_window = True for i, (train, test) in enumerate(tqdm(cv.split(y), total=n_splits)): # get initial window, if required if i == 0 and cv.initial_window and strategy == "update": train, test = cv.split_initial(y) # this might have to be directly handled in split_initial() test = test[:len(cv.fh)] # create train/test data y_train = y.iloc[train] y_test = y.iloc[test] X_train = X.iloc[train] if X else None X_test = X.iloc[test] if X else None # fit/update start_fit = time.time() if strategy == "refit" or i == 0: forecaster.fit( y=y_train, X=X_train, fh=ForecastingHorizon(y_test.index, is_relative=False), ) else: # strategy == "update" and i != 0: forecaster.update(y=y_train, X=X_train) fit_time = time.time() - start_fit # predict start_pred = time.time() y_pred = forecaster.predict(fh=ForecastingHorizon(y_test.index, is_relative=False), X=X_test) pred_time = time.time() - start_pred # save results results = results.append( { "test_" + scoring.__class__.__name__: scoring(y_pred, y_test), "fit_time": fit_time, "pred_time": pred_time, "len_train_window": len(y_train), "cutoff": forecaster.cutoff, "y_train": y_train if return_data else np.nan, "y_test": y_test if return_data else np.nan, "y_pred": y_pred if return_data else np.nan, }, ignore_index=True, ) # post-processing of results if not return_data: results = results.drop(columns=["y_train", "y_test", "y_pred"]) results["len_train_window"] = results["len_train_window"].astype(int) return results
def update_predict( self, y, cv=None, X=None, update_params=True, return_pred_int=False, alpha=DEFAULT_ALPHA, ): """Make and update predictions iteratively over the test set. Parameters ---------- y : pd.Series cv : temporal cross-validation generator, optional (default=None) X : pd.DataFrame, optional (default=None) update_params : bool, optional (default=True) return_pred_int : bool, optional (default=False) alpha : int or list of ints, optional (default=None) Returns ------- y_pred : pd.Series Point predictions y_pred_int : pd.DataFrame Prediction intervals """ self.check_is_fitted() if return_pred_int and not self.get_tag("capability:pred_int"): raise NotImplementedError( f"{self.__class__.__name__} does not have the capability to return " "prediction intervals. Please set return_pred_int=False. If you " "think this estimator should have the capability, please open " "an issue on sktime.") # input checks and minor coercions on X, y ########################################### # checking y enforce_univariate = self.get_tag("scitype:y") == "univariate" enforce_multivariate = self.get_tag("scitype:y") == "multivariate" enforce_index_type = self.get_tag("enforce_index_type") check_y_args = { "enforce_univariate": enforce_univariate, "enforce_multivariate": enforce_multivariate, "enforce_index_type": enforce_index_type, } # update only for non-empty data y = check_series(y, allow_empty=True, **check_y_args, var_name="y") # end checking y # checking X X = check_series(X, enforce_index_type=enforce_index_type, var_name="X") if self.get_tag("X-y-must-have-same-index"): check_equal_time_index(X, y) # end checking X cv = check_cv(cv) return self._predict_moving_cutoff( y, cv, X, update_params=update_params, return_pred_int=return_pred_int, alpha=alpha, )