def fit(self, y, fh=None, X=None): """ Internal fit. Parameters ---------- y : pandas.Series Target time series to which to fit the forecaster. X : pandas.DataFrame, shape=[n_obs, n_vars], optional (default=None) An optional 2-d dataframe of exogenous variables. If provided, these variables are used as additional features in the regression operation. This should not include a constant or trend. Note that if an ``ARIMA`` is fit on exogenous features, it must also be provided exogenous features for making predictions. Returns ------- self : returns an instance of self. """ # validate forecasting horizon fh = validate_fh(fh) for _, estimator in self.estimators: # TODO implement set/get params interface # estimator.set_params(**{"check_input": False}) fitted_estimator = estimator.fit(y, fh=fh, X=X) self.fitted_estimators_.append(fitted_estimator) return self
def predict(self, fh=1, X=None): """ Predict using fitted estimator. Parameters ---------- fh : array-like, optional (default=None) The forecasters horizon with the steps ahead to to predict. Default is one-step ahead forecast, i.e. np.array([1]) X : pandas.DataFrame, shape=[n_obs, n_vars], optional (default=None) An optional 2-d dataframe of exogenous variables. If provided, these variables are used as additional features in the regression operation. This should not include a constant or trend. Note that if provided, the forecaster must also have been fitted on the exogenous features. Returns ------- Predictions : pandas.Series, shape=(len(fh),) Returns series of predicted values. """ check_is_fitted(self, '_is_fitted') if self.check_input: validate_X(X) # validate forecasters horizon if fh is not None: fh = validate_fh(fh) # make interface compatible with estimators that only take y kwargs = {} if X is None else {'X': X} # estimator specific implementation of fit method return self._predict(fh=fh, **kwargs)
def split_into_tabular_train_test(x, window_length=None, fh=None, test_size=1): """Helper function to split single time series into tabular train and test sets using rolling window approach""" # validate forecasting horizon fh = validate_fh(fh) # get time series index index = np.arange(len(x)) # set up rolling window iterator rw = RollingWindowSplit(window_length=window_length, fh=fh) # slice time series into windows xs = [] ys = [] for input, output in rw.split(index): xt = x[input] yt = x[output] xs.append(xt) ys.append(yt) # stack windows into tabular array x = np.array(xs) y = np.array(ys) # split into train and test set x_train = x[:-test_size, :] y_train = y[:-test_size, :] x_test = x[-test_size:, :] y_test = y[-test_size:, :] return x_train, y_train, x_test, y_test
def test_fhs(forecaster, fh): m = forecaster() m.fit(y, fh=fh) y_pred = m.predict(fh=fh) # adjust for default value fh = validate_fh(fh) # test length of output assert len(y_pred) == len(fh) # test index assert_array_equal(y_pred.index.values, y.iloc[0].index[-1] + fh)
def test_EnsembleForecaster_fhs(fh): estimators = [('ses', ExpSmoothingForecaster()), ('last', DummyForecaster(strategy='last'))] m = EnsembleForecaster(estimators=estimators) if fh is None: # Fit and predict with default fh m.fit(y) y_pred = m.predict() # for further checks, set fh to default fh = validate_fh(1) else: # Validate fh and then fit/predict fh = validate_fh(fh) m.fit(y, fh=fh) y_pred = m.predict(fh=fh) # test length of output assert len(y_pred) == len(fh) # test index assert_array_equal(y_pred.index.values, y.iloc[0].index[-1] + fh)
def __init__(self, window_length=None, fh=None): # TODO input checks if window_length is not None: if not np.issubdtype(type(window_length), np.integer): raise ValueError( f"Window length must be an integer, but found: {type(window_length)}" ) self.window_length = window_length self.fh = validate_fh(fh) # Attributes updated in split self.n_splits_ = None self.window_length_ = None
def predict(self, fh=None, X=None): """ Internal predict using fitted estimator. Parameters ---------- fh : array-like, optional (default=None) The forecasters horizon with the steps ahead to to predict. Default is one-step ahead forecast, i.e. np.array([1]) X : pandas.DataFrame, shape=[n_obs, n_vars], optional (default=None) An optional 2-d dataframe of exogenous variables. If provided, these variables are used as additional features in the regression operation. This should not include a constant or trend. Note that if provided, the forecaster must also have been fitted on the exogenous features. Returns ------- Predictions : pandas.Series, shape=(len(fh),) Returns series of predicted values. """ # TODO pass X only to estimators where the predict method accepts X, currenlty X is ignored # Forecast all periods from start to end of pred horizon, but only return given time points in pred horizon fh = validate_fh(fh) fh_idx = fh - np.min(fh) # Iterate over estimators y_preds = np.zeros((len(self.fitted_estimators_), len(fh))) indexes = [] for i, estimator in enumerate(self.fitted_estimators_): y_pred = estimator.predict(fh=fh) y_preds[i, :] = y_pred indexes.append(y_pred.index) # Check if all predicted horizons are identical if not all(index.equals(indexes[0]) for index in indexes): raise ValueError('Predicted horizons from estimators do not match') # Average predictions over estimators avg_preds = np.average(y_preds, axis=0, weights=self.weights) # Return average predictions with index index = indexes[0] name = y_preds[0].name if hasattr(y_preds[0], 'name') else None return pd.Series(avg_preds, index=index, name=name)
def test_univariate(dynamic, fh): fh = validate_fh(fh) len_fh = len(fh) y = load_shampoo_sales(return_y_as_dataframe=True) index = np.arange(y.iloc[0, 0].shape[0]) train_times = index[:-len_fh] test_times = index[-len_fh:] y_train = select_times(y, train_times) y_test = select_times(y, test_times) task = ForecastingTask(target="ShampooSales", fh=fh, metadata=y_train) s = Forecasting2TSRReductionStrategy(estimator=regressor, dynamic=dynamic) s.fit(task, y_train) y_pred = s.predict() assert y_pred.shape == y_test[task.target].iloc[0].shape
def predict(self, fh=None, X=None): if X is not None: # TODO handle exog data raise NotImplementedError() # get forecasting horizon fh = validate_fh(fh) len_fh = len(fh) # use last window as test data for prediction x_test = pd.DataFrame(pd.Series([self._last_window])) y_pred = np.zeros(len(fh)) # prediction can be either dynamic making only one-step ahead forecasts using previous forecasts or static using # only the last window and using one fitted estimator for each step ahead forecast if self.dynamic: # Roll last window using previous one-step ahead forecasts for i in range(len_fh): y_pred[i] = self.estimators_.predict(x_test) # append prediction to last window and roll window x_test = np.append(x_test.iloc[0, 0].values, y_pred[i])[-self.window_length_:] # put data into required nested format x_test = pd.DataFrame(pd.Series([pd.Series(x_test)])) else: # Iterate over estimators/forecast horizon # Any fh is ignored if specified for i, estimator in enumerate(self.estimators_): y_pred[i] = estimator.predict(x_test) # Add name and forecast index index = self._last_window.index[-1] + fh name = self._last_window.name return pd.Series(y_pred, name=name, index=index)
def fit(self, y, fh=1, X=None): """ Fit forecaster. Parameters ---------- y : pandas.Series Target time series to which to fit the forecaster. fh : array-like, optional (default=None) The forecasters horizon with the steps ahead to to predict. Default is one-step ahead forecast, i.e. np.array([1]) X : pandas.DataFrame, shape=[n_obs, n_vars], optional (default=None) An optional 2-d dataframe of exogenous variables. If provided, these variables are used as additional features in the regression operation. This should not include a constant or trend. Note that if an ``ARIMA`` is fit on exogenous features, it must also be provided exogenous features for making predictions. Returns ------- self : returns an instance of self. """ if self.check_input: validate_y_X(y, X) # validate forecasting horizon if fh is not None: fh = validate_fh(fh) # Keep index for predicting where forecasters horizon will be relative to y seen in fit self._time_index = get_time_index(y) # Make interface compatible with estimators that only take y and no X kwargs = {} if X is None else {'X': X} # Internal fit. self._fit(y, fh=fh, **kwargs) self._is_fitted = True return self
def fit(self, y, fh=None, X=None): """Fit forecaster. Parameters ---------- y : pandas.Series Target time series to which to fit the forecaster. fh : array-like, optional (default=None) The forecasters horizon with the steps ahead to to predict. Default is one-step ahead forecast, i.e. np.array([1]) X : pandas.DataFrame, shape=[n_obs, n_vars], optional (default=None) An optional 2-d dataframe of exogenous variables. If provided, these variables are used as additional features in the regression operation. This should not include a constant or trend. Note that if an ``ARIMA`` is fit on exogenous features, it must also be provided exogenous features for making predictions. Returns ------- self : returns an instance of self. """ # validate forecasting horizon if fh is None and not self.dynamic: raise ValueError( f"If dynamic is set to False, forecasting horizon (fh) has to be specified in fit, " f"as one estimator is fit for each step ahead forecast of the forecasting horizon" ) if fh is not None: fh = validate_fh(fh) # Make interface compatible with estimators that only take y and no X kwargs = {} if X is None else {'X': X} # Internal fit. self._fit(y, fh=fh, **kwargs) self._is_fitted = True return self
def _fit(self, data): """ Internal fit. Parameters ---------- data : pandas.DataFrame Input data Returns ------- self : an instance of self """ # Select target and feature variables y = data[self._task.target] if len(self._task.features) > 0: X = data[self._task.features] # TODO how to handle exogenous variables raise NotImplementedError() # Set up window roller # For dynamic prediction, models are only trained on one-step ahead forecast fh = 1 if self.dynamic else self._task.fh fh = validate_fh(fh) n_fh = len(fh) self.rw = RollingWindowSplit(window_length=self.window_length, fh=fh) # Unnest target series yt = y.iloc[0] index = np.arange(len(yt)) # Transform target series into tabular format using rolling window splits xs = [] ys = [] for feature_window, target_window in self.rw.split(index): x = yt[feature_window] y = yt[target_window] xs.append(x) ys.append(y) # Construct nested pandas DataFrame for X X = pd.DataFrame(pd.Series([x for x in np.array(xs)])) Y = np.array([np.array(y) for y in ys]) # Fitting if self.dynamic: # Fit estimator for one-step ahead forecast y = Y.ravel() # convert into one-dimensional array estimator = clone(self.estimator) estimator.fit(X, y) self.estimator_ = estimator else: # Fit one estimator for each step-ahead forecast self.estimators = [] self.estimators_ = [] n_fh = len(fh) # Clone estimators self.estimators = [clone(self.estimator) for _ in range(n_fh)] # Iterate over estimators/forecast horizon for estimator, y in zip(self.estimators, Y.T): y = pd.Series(y) estimator.fit(X, y) self.estimators_.append(estimator) # Save the last window-length number of observations for predicting self.window_length_ = self.rw.get_window_length() self._last_window = yt.iloc[-self.window_length_:] return self
def __init__(self, target, fh=None, features=None, metadata=None): self._case = "Forecasting" self._fh = validate_fh(fh) super(ForecastingTask, self).__init__(target, features=features, metadata=metadata)
def test_validate_fh_bad_input_args(arg): with raises(ValueError): validate_fh(arg)
def test_validate_fh_default_arg(): default = None fh = validate_fh(default) assert_array_equal(np.ones(1), fh)