def helper_test_drop(test_case, test_series: TimeSeries): seriesA = test_series.drop_after(pd.Timestamp('20130105')) test_case.assertEqual(seriesA.end_time(), pd.Timestamp('20130105') - test_series.freq()) test_case.assertTrue( np.all(seriesA.time_index() < pd.Timestamp('20130105'))) seriesB = test_series.drop_before(pd.Timestamp('20130105')) test_case.assertEqual(seriesB.start_time(), pd.Timestamp('20130105') + test_series.freq()) test_case.assertTrue( np.all(seriesB.time_index() > pd.Timestamp('20130105'))) test_case.assertEqual(test_series.freq_str(), seriesA.freq_str()) test_case.assertEqual(test_series.freq_str(), seriesB.freq_str())
def helper_test_append_values(test_case, test_series: TimeSeries): # reconstruct series seriesA, seriesB = test_series.split_after(pd.Timestamp('20130106')) test_case.assertEqual( seriesA.append_values(seriesB.values(), seriesB.time_index()), test_series) test_case.assertEqual(seriesA.append_values(seriesB.values()), test_series) # test for equality test_case.assertEqual( test_series.drop_after(pd.Timestamp('20130105')).append_values( test_series.drop_before(pd.Timestamp('20130104')).values()), test_series) test_case.assertEqual(seriesA.append_values([]), seriesA) # randomize order rd_order = np.random.permutation(range(len(seriesB.values()))) test_case.assertEqual( seriesA.append_values(seriesB.values()[rd_order], seriesB.time_index()[rd_order]), test_series) # add non consecutive index with test_case.assertRaises(ValueError): test_case.assertEqual( seriesA.append_values(seriesB.values(), seriesB.time_index() + seriesB.freq()), test_series) # add existing indices with test_case.assertRaises(ValueError): test_case.assertEqual( seriesA.append_values( seriesB.values(), seriesB.time_index() - 3 * seriesB.freq()), test_series) # other frequency with test_case.assertRaises(ValueError): test_case.assertEqual( seriesA.append_values( seriesB.values(), pd.date_range('20130107', '20130113', freq='2d')), test_series)
def historical_forecasts( self, series: TimeSeries, covariates: Optional[TimeSeries] = None, start: Union[pd.Timestamp, float, int] = 0.5, forecast_horizon: int = 1, stride: int = 1, retrain: bool = True, overlap_end: bool = False, last_points_only: bool = True, verbose: bool = False) -> Union[TimeSeries, List[TimeSeries]]: """ Computes the historical forecasts the model would have produced with an expanding training window and (by default) returns a time series created from the last point of each of these individual forecasts. To this end, it repeatedly builds a training set from the beginning of `series`. It trains the current model on the training set, emits a forecast of length equal to forecast_horizon, and then moves the end of the training set forward by `stride` time steps. By default, this method will return a single time series made up of the last point of each historical forecast. This time series will thus have a frequency of `series.freq() * stride`. If `last_points_only` is set to False, it will instead return a list of the historical forecasts. By default, this method always re-trains the models on the entire available history, corresponding to an expanding window strategy. If `retrain` is set to False (useful for models for which training might be time-consuming, such as deep learning models), the model will only be trained on the initial training window (up to `start` time stamp), and only if it has not been trained before. Then, at every iteration, the newly expanded input sequence will be fed to the model to produce the new output. Parameters ---------- series The target time series to use to successively train and evaluate the historical forecasts covariates An optional covariate series. This applies only if the model supports covariates. start The first point of time at which a prediction is computed for a future time. This parameter supports 3 different data types: `float`, `int` and `pandas.Timestamp`. In the case of `float`, the parameter will be treated as the proportion of the time series that should lie before the first prediction point. In the case of `int`, the parameter will be treated as an integer index to the time index of `series` that will be used as first prediction time. In case of `pandas.Timestamp`, this time stamp will be used to determine the first prediction time directly. forecast_horizon The forecast horizon for the predictions stride The number of time steps between two consecutive predictions. retrain Whether to retrain the model for every prediction or not. Currently only `TorchForecastingModel` instances such as `RNNModel`, `TCNModel`, `NBEATSModel` and `TransformerModel` support setting `retrain` to `False`. overlap_end Whether the returned forecasts can go beyond the series' end or not last_points_only Whether to retain only the last point of each historical forecast. If set to True, the method returns a single `TimeSeries` containing the successive point forecasts. Otherwise returns a list of historical `TimeSeries` forecasts. verbose Whether to print progress Returns ------- TimeSeries or List[TimeSeries] By default, a single TimeSeries instance created from the last point of each individual forecast. If `last_points_only` is set to False, a list of the historical forecasts. """ if covariates is not None: raise_if_not( series.has_same_time_as(covariates), 'The provided series and covariates must have the same time index.' ) # prepare the start parameter -> pd.Timestamp start = get_timestamp_at_point(start, series) # build the prediction times in advance (to be able to use tqdm) if not overlap_end: last_valid_pred_time = series.time_index()[-1 - forecast_horizon] else: last_valid_pred_time = series.time_index()[-2] pred_times = [start] while pred_times[-1] < last_valid_pred_time: # compute the next prediction time and add it to pred times pred_times.append(pred_times[-1] + series.freq() * stride) # the last prediction time computed might have overshot last_valid_pred_time if pred_times[-1] > last_valid_pred_time: pred_times.pop(-1) iterator = _build_tqdm_iterator(pred_times, verbose) # Either store the whole forecasts or only the last points of each forecast, depending on last_points_only forecasts = [] last_points_times = [] last_points_values = [] # TODO: We should find a better object oriented way of handling covariates in GlobalForecastingModel fit_signature = signature(self.fit) predict_signature = signature(self.predict) # iterate and forecast for pred_time in iterator: train = series.drop_after(pred_time) # build the training series if covariates is not None: train_cov = covariates.drop_after(pred_time) if retrain: if covariates is not None and 'covariates' in fit_signature.parameters: self.fit(series=train, covariates=train_cov) else: self.fit(series=train) if covariates is not None and 'covariates' in predict_signature.parameters: forecast = self.predict(n=forecast_horizon, series=train, covariates=train_cov) else: if 'series' in predict_signature.parameters: forecast = self.predict(n=forecast_horizon, series=train) else: forecast = self.predict(n=forecast_horizon) if last_points_only: last_points_values.append(forecast.values()[-1]) last_points_times.append(forecast.end_time()) else: forecasts.append(forecast) if last_points_only: return TimeSeries.from_times_and_values( pd.DatetimeIndex(last_points_times), np.array(last_points_values), freq=series.freq() * stride) return forecasts