示例#1
0
def simulate_forecast_ar(series: TimeSeries,
                         model: AutoRegressiveModel,
                         start: pd.Timestamp,
                         fcast_horizon_n: int,
                         trim_to_series: bool = True,
                         verbose=False) -> TimeSeries:
    """
    Returns a TimeSeries containing the forecasts that would have been obtained from a given AutoRegressiveModel,
    on a given forecast time horizon.

    :param series: the main series to forecast
    :param model: the AutoRegressiveModel to use
    :param start: when the forecasts start (i.e., the first time at which a prediction is produced for a future time)
    :param fcast_horizon_n: the forecast horizon
    :param trim_to_series: whether the returned predicted series has the end trimmed to match the end of the main series
    :param verbose: whether to print progress
    :return:
    """
    assert start in series, 'The provided start timestamp is not in the time series.'
    assert start != series.end_time(
    ), 'The provided start timestamp is the last timestamp of the time series'

    last_pred_time = series.time_index()[
        -fcast_horizon_n - 2] if trim_to_series else series.time_index()[-2]

    # build the prediction times in advance (to be able to use tqdm)
    pred_times = [start]
    while pred_times[-1] <= last_pred_time:
        pred_times.append(pred_times[-1] + series.freq())

    # what we'll return
    values = []
    times = []

    iterator = _build_iterator(pred_times, verbose)

    for pred_time in iterator:
        if not verbose:
            print('.', end='')
        train = series.drop_end(pred_time)  # build the training series

        model.fit(train)
        pred = model.predict(fcast_horizon_n)
        values.append(pred.values()[-1])  # store the N-th point
        times.append(pred.end_time())  # store the N-th timestamp

    return TimeSeries.from_times_and_values(pd.DatetimeIndex(times),
                                            np.array(values))
示例#2
0
def get_train_val_series(
        series: TimeSeries,
        start: pd.Timestamp,
        nr_points_val: int,
        nr_steps_iter: int = 1) -> List[Tuple[TimeSeries, TimeSeries]]:
    """
    Returns a list of (training_set, validation_set) pairs for backtesting.

    .. todo: this is expanding training window, implement optional sliding window

    :param series: The full time series needs to be split
    :param start: the start time of the earliest validation set
    :param nr_points_val: the number of points in the validation sets
    :param nr_steps_iter: the number of time steps to iterate between the successive validation sets
    :return: a list of (training_set, validation_set) pairs
    """

    raise_if_not(start in series,
                 'The provided start timestamp is not in the time series.',
                 logger)
    raise_if_not(
        start != series.end_time(),
        'The provided start timestamp is the last timestamp of the time series',
        logger)
    # TODO: maybe also check that valset_duration >= series frequency

    curr_val_start: pd.Timestamp = start

    def _get_train_val_and_increase_pointer() -> Tuple[TimeSeries, TimeSeries]:
        nonlocal curr_val_start

        train_series, val_series_all = series.split_after(curr_val_start)
        val_series = val_series_all.slice_n_points_after(
            val_series_all.start_time(), nr_points_val)

        curr_val_start = curr_val_start + nr_steps_iter * series.freq()
        return train_series, val_series

    series_pairs = []
    curr_train_series, curr_val_series = _get_train_val_and_increase_pointer()

    while len(curr_val_series) >= nr_points_val:
        series_pairs.append((curr_train_series, curr_val_series))
        curr_train_series, curr_val_series = _get_train_val_and_increase_pointer(
        )

    return series_pairs
示例#3
0
def simulate_forecast_regr(feature_series: List[TimeSeries],
                           target_series: TimeSeries,
                           model: RegressiveModel,
                           start: pd.Timestamp,
                           fcast_horizon_n: int,
                           trim_to_series: bool = True,
                           verbose=False) -> TimeSeries:
    """
    Returns a TimeSeries containing the forecasts that would have been obtained from a given RegressiveModel,
    on a given forecast time horizon.

    .. todo: review and add to documentation.
    .. todo: optionally also return weights, when those are available in model
    .. todo: (getattr(model.model, 'coef_', None) is not None)

    :param feature_series: the feature time series of the regressive model
    :param target_series: the target time series of the regressive model (i.e., the series to predict)
    :param model: the RegressiveModel to use
    :param start: when the forecasts start (i.e., the first time at which a prediction is produced for a future time)
    :param fcast_horizon_n: the forecast horizon
    :param trim_to_series: whether the returned predicted series has the end trimmed to match the end of the main series
    :param verbose: whether to print progress
    :return:
    """
    raise_if_not(all([s.has_same_time_as(target_series) for s in feature_series]), 'All provided time series must ' \
                                                                             'have the same time index', logger)
    raise_if_not(start in target_series,
                 'The provided start timestamp is not in the time series.',
                 logger)
    raise_if_not(
        start != target_series.end_time(),
        'The provided start timestamp is the last timestamp of the time series',
        logger)

    last_pred_time = target_series.time_index()[
        -fcast_horizon_n -
        2] if trim_to_series else target_series.time_index()[-2]

    # build the prediction times in advance (to be able to use tqdm)
    pred_times = [start]
    while pred_times[-1] <= last_pred_time:
        pred_times.append(pred_times[-1] + target_series.freq())

    # what we'll return
    values = []
    times = []

    iterator = build_tqdm_iterator(pred_times, verbose)

    for pred_time in iterator:
        # build train/val series
        train_features = [s.drop_after(pred_time) for s in feature_series]
        train_target = target_series.drop_after(pred_time)
        val_features = [
            s.slice_n_points_after(pred_time + target_series.freq(),
                                   fcast_horizon_n) for s in feature_series
        ]

        model.fit(train_features, train_target)
        pred = model.predict(val_features)
        values.append(pred.values()[-1])  # store the N-th point
        times.append(pred.end_time())  # store the N-th timestamp

    return TimeSeries.from_times_and_values(pd.DatetimeIndex(times),
                                            np.array(values))