Exemplo n.º 1
0
    def helper_test_shift(test_case, test_series: TimeSeries):
        seriesA = test_case.series1.shift(0)
        test_case.assertTrue(seriesA == test_case.series1)

        seriesB = test_series.shift(1)
        test_case.assertTrue(seriesB.time_index().equals(
            test_series.time_index()[1:].append(
                pd.DatetimeIndex(
                    [test_series.time_index()[-1] + test_series.freq()]))))

        seriesC = test_series.shift(-1)
        test_case.assertTrue(seriesC.time_index().equals(
            pd.DatetimeIndex([
                test_series.time_index()[0] - test_series.freq()
            ]).append(test_series.time_index()[:-1])))

        with test_case.assertRaises(OverflowError):
            test_series.shift(1e+6)

        seriesM = TimeSeries.from_times_and_values(
            pd.date_range('20130101', '20130601', freq='m'), range(5))
        with test_case.assertRaises(OverflowError):
            seriesM.shift(1e+4)

        seriesD = TimeSeries.from_times_and_values(pd.date_range(
            '20130101', '20130101'),
                                                   range(1),
                                                   freq='D')
        seriesE = seriesD.shift(1)
        test_case.assertEqual(seriesE.time_index()[0],
                              pd.Timestamp('20130102'))
Exemplo n.º 2
0
    def helper_test_drop(test_case, test_series: TimeSeries):
        seriesA = test_series.drop_after(pd.Timestamp('20130105'))
        test_case.assertEqual(seriesA.end_time(),
                              pd.Timestamp('20130105') - test_series.freq())
        test_case.assertTrue(
            np.all(seriesA.time_index() < pd.Timestamp('20130105')))

        seriesB = test_series.drop_before(pd.Timestamp('20130105'))
        test_case.assertEqual(seriesB.start_time(),
                              pd.Timestamp('20130105') + test_series.freq())
        test_case.assertTrue(
            np.all(seriesB.time_index() > pd.Timestamp('20130105')))

        test_case.assertEqual(test_series.freq_str(), seriesA.freq_str())
        test_case.assertEqual(test_series.freq_str(), seriesB.freq_str())
Exemplo n.º 3
0
def make_and_compare_predictions(
    gathered_stats: TimeSeries,
    predictions_to_make: ModelsToMake,
    prediction_duration_past: pd.Timedelta = _ONE_DAY,
    prediction_duration_future: pd.Timedelta = None,
    metric: Metric = metrics.coefficient_of_variation,
    transform: bool = False,
) -> PredictionEvaluations:
    """Run multiple forecasts and compare their accuracy."""
    train, actual = gathered_stats.split_after(
        gathered_stats.end_time() - prediction_duration_past, )
    n_pred: int = len(actual)
    if prediction_duration_future:
        n_pred += int(prediction_duration_future / gathered_stats.freq())
    if transform:
        forecasts = make_forecasts_ensure_positive(
            train=train,
            n_pred=n_pred,
            predictions_to_make=predictions_to_make,
        )
    else:
        forecasts = make_forecasts(
            train=train,
            n_pred=n_pred,
            predictions_to_make=predictions_to_make,
        )
    return PredictionEvaluations(
        predictions=forecasts,
        evaluations=compare_predictions(actual, forecasts, metric),
    )
Exemplo n.º 4
0
def _auto_fill(series: TimeSeries, **interpolate_kwargs) -> TimeSeries:
    """
    This function fills the missing values in the TimeSeries `series`,
    using the `pandas.Dataframe.interpolate()` method.

    Parameters
    ----------
    series
        The time series
    interpolate_kwargs
        Keyword arguments for `pandas.Dataframe.interpolate()`.
        See `the documentation
        <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.interpolate.html>`_
        for the list of supported parameters.
    Returns
    -------
    TimeSeries
        A new TimeSeries with all missing values filled according to the rules above.
    """

    series_temp = series.pd_dataframe()

    # pandas interpolate wrapper, with chosen `method`
    if 'limit_direction' not in interpolate_kwargs:
        interpolate_kwargs['limit_direction'] = 'both'
    interpolate_kwargs['inplace'] = True
    series_temp.interpolate(**interpolate_kwargs)

    return TimeSeries.from_times_and_values(series.time_index(), series_temp.values, series.freq())
Exemplo n.º 5
0
def auto_arima_analyzed_log(  # noqa: WPS211  # Found too many arguments
    gathered_stats: TimeSeries,
    start_arima_params: ArimaParams = None,
    max_arima_params: ArimaParams = None,
    component_index: int = None,
    seasonal: bool = False,
    seasonal_length: pd.Timedelta = _ONE_DAY,
    **autoarima_kwargs: Mapping[str, Any],
) -> AutoARIMA:
    """ARIMA forecast with atuo-optimized params.

    Wraps darts.models.arima.AutoARIMA.
    """
    if start_arima_params is None:
        start_arima_params = ArimaParams(d=None)
    if max_arima_params is None:
        max_arima_params = start_arima_params
        for param_name, start_value in asdict(max_arima_params).items():
            try:
                setattr(max_arima_params, param_name, max(start_value * 2, 1))
            except TypeError:
                setattr(max_arima_params, param_name, 1)
    model: AutoARIMA = AutoARIMA(  # type: ignore  # untyped function
        start_p=start_arima_params.p,
        d=start_arima_params.d,
        start_q=start_arima_params.q,
        max_p=max_arima_params.p,
        max_d=max_arima_params.d,
        max_q=max_arima_params.q,
        seasonal=seasonal,
        m=int(seasonal_length / gathered_stats.freq()),
        **autoarima_kwargs,
    )
    model.fit(gathered_stats, component_index=component_index)
    return model
Exemplo n.º 6
0
def calculate_distance_wlen(series: TimeSeries,
                            peak_params: PeakParams) -> PeakParams:
    """Calculate the distance and wlen parameters for peak-finding.

    These parameters are calculated as a multiple of ``interval/series.freq()``
    """
    if not peak_params.distance:
        peak_params.distance = peak_params.interval / series.freq()
    if not peak_params.wlen:
        peak_params.wlen = peak_params.interval * 3
    return peak_params
Exemplo n.º 7
0
    def helper_test_append(test_case, test_series: TimeSeries):
        # reconstruct series
        seriesA, seriesB = test_series.split_after(pd.Timestamp('20130106'))
        test_case.assertEqual(seriesA.append(seriesB), test_series)
        test_case.assertEqual(
            seriesA.append(seriesB).freq(), test_series.freq())

        # Creating a gap is not allowed
        seriesC = test_series.drop_before(pd.Timestamp('20130107'))
        with test_case.assertRaises(ValueError):
            seriesA.append(seriesC)

        # Changing frequence is not allowed
        seriesM = TimeSeries.from_times_and_values(
            pd.date_range('20130107', '20130507', freq='30D'), range(5))
        with test_case.assertRaises(ValueError):
            seriesA.append(seriesM)
Exemplo n.º 8
0
def _const_fill(series: TimeSeries, fill: float = 0) -> TimeSeries:
    """
    Fills the missing values of `series` with only the value provided (default zeroes).

    Parameters
    ----------
    series
        The TimeSeries to check for missing values.
    fill
        The value used to replace the missing values.

    Returns
    -------
    TimeSeries
        A TimeSeries, `series` with all missing values set to `fill`.
    """

    return TimeSeries.from_times_and_values(series.time_index(),
                                            series.pd_dataframe().fillna(value=fill),
                                            series.freq())
Exemplo n.º 9
0
    def inverse_transform(self, series: TimeSeries, *args,
                          **kwargs) -> TimeSeries:
        """
        Performs the inverse transformation on a time series

        Parameters
        ----------
        series
            The time series to inverse transform

        Returns
        -------
        TimeSeries
            The inverse transform
        """
        super().inverse_transform(series, *args, **kwargs)
        return TimeSeries.from_times_and_values(
            series.time_index(),
            self.transformer.inverse_transform(series.values().reshape(
                (-1, series.width))), series.freq())
Exemplo n.º 10
0
    def transform(self, series: TimeSeries, *args, **kwargs) -> TimeSeries:
        """
        Returns a new time series, transformed with this (fitted) scaler.
        This does not handle series with confidence intervals - the intervals are discarded.

        Parameters
        ----------
        series
            The time series to transform

        Returns
        -------
        TimeSeries
            A new time series, transformed with this (fitted) scaler.
        """
        super().transform(series, *args, **kwargs)
        return TimeSeries.from_times_and_values(
            series.time_index(),
            self.transformer.transform(series.values().reshape(
                (-1, series.width))), series.freq())
Exemplo n.º 11
0
def extract_subseries(series: TimeSeries,
                      min_gap_size: Optional[int] = 1) -> List[TimeSeries]:
    """
    Partitions the series into a sequence of sub-series by using significant gaps of missing values

    Parameters
    ----------
    series
        The TimeSeries to partition into sub-series

    min_gap_size
        The minimum number of contiguous missing values to consider a gap as significant. Defaults to 1.

    Returns
    -------
    subseries
        A list of TimeSeries, sub-series without significant gaps of missing values
    """

    # Remove null values from the series extremes
    series = series.strip()
    freq = series.freq()

    if series.pd_dataframe().isna().sum().sum() == 0:
        return [series]

    # Get start/end times of sub-series without gaps of missing values
    gaps_df = series.gaps().query(f'gap_size>={min_gap_size}')
    start_times = [series.start_time()] + (gaps_df['gap_end'] + freq).to_list()
    end_times = (gaps_df['gap_start'] -
                 freq).to_list() + [series.end_time() + freq]

    subseries = []
    for start, end in zip(start_times, end_times):
        subseries.append(series[start:end])

    return subseries
Exemplo n.º 12
0
    def historical_forecasts(
            self,
            series: TimeSeries,
            covariates: Optional[TimeSeries] = None,
            start: Union[pd.Timestamp, float, int] = 0.5,
            forecast_horizon: int = 1,
            stride: int = 1,
            retrain: bool = True,
            overlap_end: bool = False,
            last_points_only: bool = True,
            verbose: bool = False) -> Union[TimeSeries, List[TimeSeries]]:
        """
        Computes the historical forecasts the model would have produced with an expanding training window
        and (by default) returns a time series created from the last point of each of these individual forecasts.
        To this end, it repeatedly builds a training set from the beginning of `series`. It trains the
        current model on the training set, emits a forecast of length equal to forecast_horizon, and then moves
        the end of the training set forward by `stride` time steps.

        By default, this method will return a single time series made up of the last point of each
        historical forecast. This time series will thus have a frequency of `series.freq() * stride`.
        If `last_points_only` is set to False, it will instead return a list of the historical forecasts.

        By default, this method always re-trains the models on the entire available history,
        corresponding to an expanding window strategy.
        If `retrain` is set to False (useful for models for which training might be time-consuming, such as
        deep learning models), the model will only be trained on the initial training window
        (up to `start` time stamp), and only if it has not been trained before. Then, at every iteration, the
        newly expanded input sequence will be fed to the model to produce the new output.

        Parameters
        ----------
        series
            The target time series to use to successively train and evaluate the historical forecasts
        covariates
            An optional covariate series. This applies only if the model supports covariates.
        start
            The first point of time at which a prediction is computed for a future time.
            This parameter supports 3 different data types: `float`, `int` and `pandas.Timestamp`.
            In the case of `float`, the parameter will be treated as the proportion of the time series
            that should lie before the first prediction point.
            In the case of `int`, the parameter will be treated as an integer index to the time index of
            `series` that will be used as first prediction time.
            In case of `pandas.Timestamp`, this time stamp will be used to determine the first prediction time
            directly.
        forecast_horizon
            The forecast horizon for the predictions
        stride
            The number of time steps between two consecutive predictions.
        retrain
            Whether to retrain the model for every prediction or not. Currently only `TorchForecastingModel`
            instances such as `RNNModel`, `TCNModel`, `NBEATSModel` and `TransformerModel` support
            setting `retrain` to `False`.
        overlap_end
            Whether the returned forecasts can go beyond the series' end or not
        last_points_only
            Whether to retain only the last point of each historical forecast.
            If set to True, the method returns a single `TimeSeries` containing the successive point forecasts.
            Otherwise returns a list of historical `TimeSeries` forecasts.
        verbose
            Whether to print progress
        Returns
        -------
        TimeSeries or List[TimeSeries]
            By default, a single TimeSeries instance created from the last point of each individual forecast.
            If `last_points_only` is set to False, a list of the historical forecasts.
        """

        if covariates is not None:
            raise_if_not(
                series.has_same_time_as(covariates),
                'The provided series and covariates must have the same time index.'
            )

        # prepare the start parameter -> pd.Timestamp
        start = get_timestamp_at_point(start, series)

        # build the prediction times in advance (to be able to use tqdm)
        if not overlap_end:
            last_valid_pred_time = series.time_index()[-1 - forecast_horizon]
        else:
            last_valid_pred_time = series.time_index()[-2]

        pred_times = [start]
        while pred_times[-1] < last_valid_pred_time:
            # compute the next prediction time and add it to pred times
            pred_times.append(pred_times[-1] + series.freq() * stride)

        # the last prediction time computed might have overshot last_valid_pred_time
        if pred_times[-1] > last_valid_pred_time:
            pred_times.pop(-1)

        iterator = _build_tqdm_iterator(pred_times, verbose)

        # Either store the whole forecasts or only the last points of each forecast, depending on last_points_only
        forecasts = []

        last_points_times = []
        last_points_values = []

        # TODO: We should find a better object oriented way of handling covariates in GlobalForecastingModel
        fit_signature = signature(self.fit)
        predict_signature = signature(self.predict)

        # iterate and forecast
        for pred_time in iterator:
            train = series.drop_after(pred_time)  # build the training series
            if covariates is not None:
                train_cov = covariates.drop_after(pred_time)

            if retrain:
                if covariates is not None and 'covariates' in fit_signature.parameters:
                    self.fit(series=train, covariates=train_cov)
                else:
                    self.fit(series=train)

            if covariates is not None and 'covariates' in predict_signature.parameters:
                forecast = self.predict(n=forecast_horizon,
                                        series=train,
                                        covariates=train_cov)
            else:
                if 'series' in predict_signature.parameters:
                    forecast = self.predict(n=forecast_horizon, series=train)
                else:
                    forecast = self.predict(n=forecast_horizon)

            if last_points_only:
                last_points_values.append(forecast.values()[-1])
                last_points_times.append(forecast.end_time())
            else:
                forecasts.append(forecast)

        if last_points_only:
            return TimeSeries.from_times_and_values(
                pd.DatetimeIndex(last_points_times),
                np.array(last_points_values),
                freq=series.freq() * stride)
        return forecasts
Exemplo n.º 13
0
 def ts_inverse_transform(series: TimeSeries, transformer, *args,
                          **kwargs) -> TimeSeries:
     return TimeSeries.from_times_and_values(
         series.time_index(),
         transformer.inverse_transform(series.values().reshape(
             (-1, series.width))), series.freq())
Exemplo n.º 14
0
 def ts_transform(series: TimeSeries, transformer) -> TimeSeries:
     return TimeSeries.from_times_and_values(
         series.time_index(),
         transformer.transform(series.values().reshape((-1, series.width))),
         series.freq())