def helper_test_shift(test_case, test_series: TimeSeries): seriesA = test_case.series1.shift(0) test_case.assertTrue(seriesA == test_case.series1) seriesB = test_series.shift(1) test_case.assertTrue(seriesB.time_index().equals( test_series.time_index()[1:].append( pd.DatetimeIndex( [test_series.time_index()[-1] + test_series.freq()])))) seriesC = test_series.shift(-1) test_case.assertTrue(seriesC.time_index().equals( pd.DatetimeIndex([ test_series.time_index()[0] - test_series.freq() ]).append(test_series.time_index()[:-1]))) with test_case.assertRaises(OverflowError): test_series.shift(1e+6) seriesM = TimeSeries.from_times_and_values( pd.date_range('20130101', '20130601', freq='m'), range(5)) with test_case.assertRaises(OverflowError): seriesM.shift(1e+4) seriesD = TimeSeries.from_times_and_values(pd.date_range( '20130101', '20130101'), range(1), freq='D') seriesE = seriesD.shift(1) test_case.assertEqual(seriesE.time_index()[0], pd.Timestamp('20130102'))
def helper_test_drop(test_case, test_series: TimeSeries): seriesA = test_series.drop_after(pd.Timestamp('20130105')) test_case.assertEqual(seriesA.end_time(), pd.Timestamp('20130105') - test_series.freq()) test_case.assertTrue( np.all(seriesA.time_index() < pd.Timestamp('20130105'))) seriesB = test_series.drop_before(pd.Timestamp('20130105')) test_case.assertEqual(seriesB.start_time(), pd.Timestamp('20130105') + test_series.freq()) test_case.assertTrue( np.all(seriesB.time_index() > pd.Timestamp('20130105'))) test_case.assertEqual(test_series.freq_str(), seriesA.freq_str()) test_case.assertEqual(test_series.freq_str(), seriesB.freq_str())
def make_and_compare_predictions( gathered_stats: TimeSeries, predictions_to_make: ModelsToMake, prediction_duration_past: pd.Timedelta = _ONE_DAY, prediction_duration_future: pd.Timedelta = None, metric: Metric = metrics.coefficient_of_variation, transform: bool = False, ) -> PredictionEvaluations: """Run multiple forecasts and compare their accuracy.""" train, actual = gathered_stats.split_after( gathered_stats.end_time() - prediction_duration_past, ) n_pred: int = len(actual) if prediction_duration_future: n_pred += int(prediction_duration_future / gathered_stats.freq()) if transform: forecasts = make_forecasts_ensure_positive( train=train, n_pred=n_pred, predictions_to_make=predictions_to_make, ) else: forecasts = make_forecasts( train=train, n_pred=n_pred, predictions_to_make=predictions_to_make, ) return PredictionEvaluations( predictions=forecasts, evaluations=compare_predictions(actual, forecasts, metric), )
def _auto_fill(series: TimeSeries, **interpolate_kwargs) -> TimeSeries: """ This function fills the missing values in the TimeSeries `series`, using the `pandas.Dataframe.interpolate()` method. Parameters ---------- series The time series interpolate_kwargs Keyword arguments for `pandas.Dataframe.interpolate()`. See `the documentation <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.interpolate.html>`_ for the list of supported parameters. Returns ------- TimeSeries A new TimeSeries with all missing values filled according to the rules above. """ series_temp = series.pd_dataframe() # pandas interpolate wrapper, with chosen `method` if 'limit_direction' not in interpolate_kwargs: interpolate_kwargs['limit_direction'] = 'both' interpolate_kwargs['inplace'] = True series_temp.interpolate(**interpolate_kwargs) return TimeSeries.from_times_and_values(series.time_index(), series_temp.values, series.freq())
def auto_arima_analyzed_log( # noqa: WPS211 # Found too many arguments gathered_stats: TimeSeries, start_arima_params: ArimaParams = None, max_arima_params: ArimaParams = None, component_index: int = None, seasonal: bool = False, seasonal_length: pd.Timedelta = _ONE_DAY, **autoarima_kwargs: Mapping[str, Any], ) -> AutoARIMA: """ARIMA forecast with atuo-optimized params. Wraps darts.models.arima.AutoARIMA. """ if start_arima_params is None: start_arima_params = ArimaParams(d=None) if max_arima_params is None: max_arima_params = start_arima_params for param_name, start_value in asdict(max_arima_params).items(): try: setattr(max_arima_params, param_name, max(start_value * 2, 1)) except TypeError: setattr(max_arima_params, param_name, 1) model: AutoARIMA = AutoARIMA( # type: ignore # untyped function start_p=start_arima_params.p, d=start_arima_params.d, start_q=start_arima_params.q, max_p=max_arima_params.p, max_d=max_arima_params.d, max_q=max_arima_params.q, seasonal=seasonal, m=int(seasonal_length / gathered_stats.freq()), **autoarima_kwargs, ) model.fit(gathered_stats, component_index=component_index) return model
def calculate_distance_wlen(series: TimeSeries, peak_params: PeakParams) -> PeakParams: """Calculate the distance and wlen parameters for peak-finding. These parameters are calculated as a multiple of ``interval/series.freq()`` """ if not peak_params.distance: peak_params.distance = peak_params.interval / series.freq() if not peak_params.wlen: peak_params.wlen = peak_params.interval * 3 return peak_params
def helper_test_append(test_case, test_series: TimeSeries): # reconstruct series seriesA, seriesB = test_series.split_after(pd.Timestamp('20130106')) test_case.assertEqual(seriesA.append(seriesB), test_series) test_case.assertEqual( seriesA.append(seriesB).freq(), test_series.freq()) # Creating a gap is not allowed seriesC = test_series.drop_before(pd.Timestamp('20130107')) with test_case.assertRaises(ValueError): seriesA.append(seriesC) # Changing frequence is not allowed seriesM = TimeSeries.from_times_and_values( pd.date_range('20130107', '20130507', freq='30D'), range(5)) with test_case.assertRaises(ValueError): seriesA.append(seriesM)
def _const_fill(series: TimeSeries, fill: float = 0) -> TimeSeries: """ Fills the missing values of `series` with only the value provided (default zeroes). Parameters ---------- series The TimeSeries to check for missing values. fill The value used to replace the missing values. Returns ------- TimeSeries A TimeSeries, `series` with all missing values set to `fill`. """ return TimeSeries.from_times_and_values(series.time_index(), series.pd_dataframe().fillna(value=fill), series.freq())
def inverse_transform(self, series: TimeSeries, *args, **kwargs) -> TimeSeries: """ Performs the inverse transformation on a time series Parameters ---------- series The time series to inverse transform Returns ------- TimeSeries The inverse transform """ super().inverse_transform(series, *args, **kwargs) return TimeSeries.from_times_and_values( series.time_index(), self.transformer.inverse_transform(series.values().reshape( (-1, series.width))), series.freq())
def transform(self, series: TimeSeries, *args, **kwargs) -> TimeSeries: """ Returns a new time series, transformed with this (fitted) scaler. This does not handle series with confidence intervals - the intervals are discarded. Parameters ---------- series The time series to transform Returns ------- TimeSeries A new time series, transformed with this (fitted) scaler. """ super().transform(series, *args, **kwargs) return TimeSeries.from_times_and_values( series.time_index(), self.transformer.transform(series.values().reshape( (-1, series.width))), series.freq())
def extract_subseries(series: TimeSeries, min_gap_size: Optional[int] = 1) -> List[TimeSeries]: """ Partitions the series into a sequence of sub-series by using significant gaps of missing values Parameters ---------- series The TimeSeries to partition into sub-series min_gap_size The minimum number of contiguous missing values to consider a gap as significant. Defaults to 1. Returns ------- subseries A list of TimeSeries, sub-series without significant gaps of missing values """ # Remove null values from the series extremes series = series.strip() freq = series.freq() if series.pd_dataframe().isna().sum().sum() == 0: return [series] # Get start/end times of sub-series without gaps of missing values gaps_df = series.gaps().query(f'gap_size>={min_gap_size}') start_times = [series.start_time()] + (gaps_df['gap_end'] + freq).to_list() end_times = (gaps_df['gap_start'] - freq).to_list() + [series.end_time() + freq] subseries = [] for start, end in zip(start_times, end_times): subseries.append(series[start:end]) return subseries
def historical_forecasts( self, series: TimeSeries, covariates: Optional[TimeSeries] = None, start: Union[pd.Timestamp, float, int] = 0.5, forecast_horizon: int = 1, stride: int = 1, retrain: bool = True, overlap_end: bool = False, last_points_only: bool = True, verbose: bool = False) -> Union[TimeSeries, List[TimeSeries]]: """ Computes the historical forecasts the model would have produced with an expanding training window and (by default) returns a time series created from the last point of each of these individual forecasts. To this end, it repeatedly builds a training set from the beginning of `series`. It trains the current model on the training set, emits a forecast of length equal to forecast_horizon, and then moves the end of the training set forward by `stride` time steps. By default, this method will return a single time series made up of the last point of each historical forecast. This time series will thus have a frequency of `series.freq() * stride`. If `last_points_only` is set to False, it will instead return a list of the historical forecasts. By default, this method always re-trains the models on the entire available history, corresponding to an expanding window strategy. If `retrain` is set to False (useful for models for which training might be time-consuming, such as deep learning models), the model will only be trained on the initial training window (up to `start` time stamp), and only if it has not been trained before. Then, at every iteration, the newly expanded input sequence will be fed to the model to produce the new output. Parameters ---------- series The target time series to use to successively train and evaluate the historical forecasts covariates An optional covariate series. This applies only if the model supports covariates. start The first point of time at which a prediction is computed for a future time. This parameter supports 3 different data types: `float`, `int` and `pandas.Timestamp`. In the case of `float`, the parameter will be treated as the proportion of the time series that should lie before the first prediction point. In the case of `int`, the parameter will be treated as an integer index to the time index of `series` that will be used as first prediction time. In case of `pandas.Timestamp`, this time stamp will be used to determine the first prediction time directly. forecast_horizon The forecast horizon for the predictions stride The number of time steps between two consecutive predictions. retrain Whether to retrain the model for every prediction or not. Currently only `TorchForecastingModel` instances such as `RNNModel`, `TCNModel`, `NBEATSModel` and `TransformerModel` support setting `retrain` to `False`. overlap_end Whether the returned forecasts can go beyond the series' end or not last_points_only Whether to retain only the last point of each historical forecast. If set to True, the method returns a single `TimeSeries` containing the successive point forecasts. Otherwise returns a list of historical `TimeSeries` forecasts. verbose Whether to print progress Returns ------- TimeSeries or List[TimeSeries] By default, a single TimeSeries instance created from the last point of each individual forecast. If `last_points_only` is set to False, a list of the historical forecasts. """ if covariates is not None: raise_if_not( series.has_same_time_as(covariates), 'The provided series and covariates must have the same time index.' ) # prepare the start parameter -> pd.Timestamp start = get_timestamp_at_point(start, series) # build the prediction times in advance (to be able to use tqdm) if not overlap_end: last_valid_pred_time = series.time_index()[-1 - forecast_horizon] else: last_valid_pred_time = series.time_index()[-2] pred_times = [start] while pred_times[-1] < last_valid_pred_time: # compute the next prediction time and add it to pred times pred_times.append(pred_times[-1] + series.freq() * stride) # the last prediction time computed might have overshot last_valid_pred_time if pred_times[-1] > last_valid_pred_time: pred_times.pop(-1) iterator = _build_tqdm_iterator(pred_times, verbose) # Either store the whole forecasts or only the last points of each forecast, depending on last_points_only forecasts = [] last_points_times = [] last_points_values = [] # TODO: We should find a better object oriented way of handling covariates in GlobalForecastingModel fit_signature = signature(self.fit) predict_signature = signature(self.predict) # iterate and forecast for pred_time in iterator: train = series.drop_after(pred_time) # build the training series if covariates is not None: train_cov = covariates.drop_after(pred_time) if retrain: if covariates is not None and 'covariates' in fit_signature.parameters: self.fit(series=train, covariates=train_cov) else: self.fit(series=train) if covariates is not None and 'covariates' in predict_signature.parameters: forecast = self.predict(n=forecast_horizon, series=train, covariates=train_cov) else: if 'series' in predict_signature.parameters: forecast = self.predict(n=forecast_horizon, series=train) else: forecast = self.predict(n=forecast_horizon) if last_points_only: last_points_values.append(forecast.values()[-1]) last_points_times.append(forecast.end_time()) else: forecasts.append(forecast) if last_points_only: return TimeSeries.from_times_and_values( pd.DatetimeIndex(last_points_times), np.array(last_points_values), freq=series.freq() * stride) return forecasts
def ts_inverse_transform(series: TimeSeries, transformer, *args, **kwargs) -> TimeSeries: return TimeSeries.from_times_and_values( series.time_index(), transformer.inverse_transform(series.values().reshape( (-1, series.width))), series.freq())
def ts_transform(series: TimeSeries, transformer) -> TimeSeries: return TimeSeries.from_times_and_values( series.time_index(), transformer.transform(series.values().reshape((-1, series.width))), series.freq())