示例#1
0
    def __call__(self, inference_data_loader: InferenceDataLoader,
                 prediction_net: BlockType, input_names: List[str], freq: str,
                 output_transform: Optional[OutputTransform],
                 num_samples: Optional[int], **kwargs) -> Iterator[Forecast]:
        for batch in inference_data_loader:
            inputs = [batch[k] for k in input_names]
            outputs = prediction_net(*inputs).asnumpy()
            if output_transform is not None:
                outputs = output_transform(batch, outputs)

            if num_samples:
                log_once(
                    "Forecast is not sample based. Ignoring parameter `num_samples` from predict method."
                )

            i = -1
            for i, output in enumerate(outputs):
                yield QuantileForecast(
                    output,
                    start_date=batch["forecast_start"][i],
                    freq=freq,
                    item_id=batch[FieldName.ITEM_ID][i]
                    if FieldName.ITEM_ID in batch else None,
                    info=batch["info"][i] if "info" in batch else None,
                    forecast_keys=self.quantiles,
                )
            assert i + 1 == len(batch["forecast_start"])
示例#2
0
    def __call__(self, inference_data_loader: DataLoader, prediction_net,
                 input_names: List[str], freq: str,
                 output_transform: Optional[OutputTransform],
                 num_samples: Optional[int], **kwargs) -> Iterator[Forecast]:
        for batch in inference_data_loader:
            inputs = [batch[k] for k in input_names]
            outputs = predict_to_numpy(prediction_net, inputs)
            if output_transform is not None:
                outputs = output_transform(batch, outputs)

            if num_samples:
                log_once(NOT_SAMPLE_BASED_MSG)

            i = -1
            for i, output in enumerate(outputs):
                yield QuantileForecast(
                    output,
                    start_date=batch["forecast_start"][i],
                    freq=freq,
                    item_id=batch[FieldName.ITEM_ID][i]
                    if FieldName.ITEM_ID in batch else None,
                    info=batch["info"][i] if "info" in batch else None,
                    forecast_keys=self.quantiles,
                )
            assert i + 1 == len(batch["forecast_start"])
示例#3
0
 def to_quantile_forecast(self, quantiles: List[Union[float, str]]):
     return QuantileForecast(
         forecast_arrays=np.array([self.quantile(q) for q in quantiles]),
         forecast_keys=quantiles,
         start_date=self.start_date,
         item_id=self.item_id,
         info=self.info,
     )
示例#4
0
 def get(self, index: int) -> QuantileForecast:
     """
     Returns the quantile forecast at the specified index. This method should typically only be
     used for visualizing single forecasts.
     """
     return QuantileForecast(
         forecast_arrays=self.values[index],
         start_date=pd.Timestamp(self.start_dates[index], freq=self.freq),
         freq=self.freq.freqstr,  # type: ignore
         item_id=self.item_ids[index],
         forecast_keys=self.quantiles,
     )
示例#5
0
    def _gluonts_forecasts_to_data_frame(
            self, forecasts: List[Forecast],
            quantile_levels: List[float]) -> TimeSeriesDataFrame:
        # if predictions are gluonts SampleForecasts, convert them to quantile forecasts
        # but save the means
        forecast_means = []
        quantiles = [str(q) for q in quantile_levels]

        if isinstance(forecasts[0], SampleForecast):
            transformed_targets = []
            for forecast in forecasts:
                tmp = []
                for quantile in quantiles:
                    tmp.append(forecast.quantile(quantile))
                transformed_targets.append(
                    QuantileForecast(
                        forecast_arrays=np.array(tmp),
                        start_date=forecast.start_date,
                        freq=forecast.freq,
                        forecast_keys=quantiles,
                        item_id=forecast.item_id,
                    ))
                forecast_means.append(forecast.mean)

            forecasts = copy.deepcopy(transformed_targets)

        # sanity check to ensure all quantiles are accounted for
        assert all(q in forecasts[0].forecast_keys for q in quantiles), (
            "Some forecast quantiles are missing from GluonTS forecast outputs. Was"
            " the model trained to forecast all quantiles?")
        result_dfs = []
        item_ids = (d.item_id for d in forecasts)

        for i, item_id in enumerate(item_ids):
            item_forecast_dict = dict(
                mean=forecast_means[i] if forecast_means else (
                    forecasts[i].
                    quantile(0.5)  # assign P50 to mean if mean is missing
                ))
            for quantile in quantiles:
                item_forecast_dict[quantile] = forecasts[i].quantile(
                    str(quantile))

            df = pd.DataFrame(item_forecast_dict)
            df[ITEMID] = item_id
            df[TIMESTAMP] = pd.date_range(
                start=forecasts[i].start_date,
                periods=self.prediction_length,
                freq=self.freq,
            )
            result_dfs.append(df)

        return TimeSeriesDataFrame.from_data_frame(pd.concat(result_dfs))
示例#6
0
def test_infer_quantile_forecast(
    quantile_predictions,
    inference_quantiles,
    inferred_quantile_predictions,
):
    tol = 1e-5
    forecast_keys = []
    output = []
    for key, value in quantile_predictions.items():
        forecast_keys.append(key)
        output.append(value)
    output = np.array(output)
    quantile_forecast = QuantileForecast(
        output,
        start_date=Timestamp(0),
        freq="h",
        forecast_keys=forecast_keys,
    )
    if len(forecast_keys) == 1:
        for q in inference_quantiles:
            if forecast_keys[0] == str(q):
                assert (
                    sum(
                        inferred_quantile_predictions[str(q)]
                        - quantile_forecast.quantile(q)
                    )
                    < tol
                ), f"infer_quantile_forecast failed for singleton quantile."

    else:
        assert (
            sum(
                sum(
                    inferred_quantile_predictions[str(q)]
                    - quantile_forecast.quantile(q)
                )
                for q in inference_quantiles
            )
            < tol
        ), f"infer_quantile_forecast failed."
示例#7
0
def test_evaluation_with_QuantileForecast():
    start = "2012-01-11"
    target = [2.4, 1.0, 3.0, 4.4, 5.5, 4.9] * 11
    index = pd.period_range(start=start, freq="1D", periods=len(target))
    ts = pd.Series(index=index, data=target)

    ev = Evaluator(quantiles=("0.1", "0.2", "0.5"))

    fcst = [
        QuantileForecast(
            start_date=pd.Period("2012-01-11", freq="D"),
            forecast_arrays=np.array([[2.4, 9.0, 3.0, 2.4, 5.5, 4.9] * 10]),
            forecast_keys=["0.5"],
        )
    ]

    agg_metric, _ = ev(iter([ts]), iter(fcst))

    assert np.isfinite(agg_metric["wQuantileLoss[0.5]"])
示例#8
0
def test_evaluation_with_QuantileForecast():
    start = '2012-01-01'
    target = [2.4, 1.0, 3.0, 4.4, 5.5, 4.9] * 10
    index = pd.date_range(start=start, freq='1D', periods=len(target))
    ts = pd.Series(index=index, data=target)

    ev = Evaluator(quantiles=('0.1', '0.2', '0.5'))

    fcst = [
        QuantileForecast(
            start_date=pd.Timestamp('2012-01-01'),
            freq='D',
            forecast_arrays=np.array([[2.4, 9.0, 3.0, 2.4, 5.5, 4.9] * 10]),
            forecast_keys=['0.5'],
        )
    ]

    agg_metric, _ = ev(iter([ts]), iter(fcst))

    assert np.isfinite(agg_metric['wQuantileLoss[0.5]'])
示例#9
0
 def _to_forecast(
     self,
     ag_output: np.ndarray,
     start_timestamp: pd.Period,
     item_id=None,
 ) -> Forecast:
     if self.quantiles_to_predict:
         forecasts = ag_output.transpose()
         return QuantileForecast(
             start_date=start_timestamp,
             item_id=item_id,
             forecast_arrays=forecasts,
             forecast_keys=self.forecast_keys,
         )
     else:
         samples = ag_output.reshape((1, self.prediction_length))
         return SampleForecast(
             start_date=start_timestamp,
             item_id=item_id,
             samples=samples,
         )
示例#10
0
    SampleForecast,
    DistributionForecast,
)

from gluonts.mx.distribution import Uniform

QUANTILES = np.arange(1, 100) / 100
SAMPLES = np.arange(101).reshape(101, 1) / 100
START_DATE = pd.Timestamp(2017, 1, 1, 12)
FREQ = "1D"

FORECASTS = {
    "QuantileForecast":
    QuantileForecast(
        forecast_arrays=QUANTILES.reshape(-1, 1),
        start_date=START_DATE,
        forecast_keys=np.array(QUANTILES, str),
        freq=FREQ,
    ),
    "SampleForecast":
    SampleForecast(samples=SAMPLES, start_date=START_DATE, freq=FREQ),
    "DistributionForecast":
    DistributionForecast(
        distribution=Uniform(low=mx.nd.zeros(1), high=mx.nd.ones(1)),
        start_date=START_DATE,
        freq=FREQ,
    ),
}


@pytest.mark.parametrize("name", FORECASTS.keys())
def test_Forecast(name):
示例#11
0
def generate_forecasts(
    predictor: Predictor,
    dataset: Dataset,
    num_samples: int = 100,
    parallelize: bool = False,
) -> Tuple[QuantileForecasts, float]:
    """
    Generates the predictions of the given predictor for the provided dataset.
    The returned prediction object provides the forecasts along with some
    metadata.

    Args:
        predictor: The predictor which is used to make forecasts.
        dataset: The GluonTS dataset which is used for testing.
        num_samples: The number of samples to use for making predictions.
        parallelize: Whether predictions ought to be parallelized.

    Returns:
        The forecasts for the dataset.
        The average latency for generating a single forecast.
    """
    if parallelize:
        predictor = ParallelizedPredictor(predictor,
                                          num_workers=os.cpu_count())

    # First, perform the predictions...
    tic = time.time()
    forecast_pred, _ = make_evaluation_predictions(dataset, predictor,
                                                   num_samples)

    # ...and compute the quantiles
    quantiles = [f"0.{i+1}" for i in range(9)]
    forecasts = []
    for i, forecast in tqdm(
            enumerate(forecast_pred),
            total=maybe_len(dataset),
            disable=not env.use_tqdm,
    ):
        result = None
        if isinstance(forecast, QuantileForecast):
            if forecast.forecast_keys == quantiles:
                result = forecast
        elif isinstance(forecast, SampleForecast):
            quantile_forecast = forecast.to_quantile_forecast(
                quantiles)  # type: ignore
            result = quantile_forecast

        if result is None:
            # If none of the above checks added a quantile forecast, we resort to a method that
            # should work on all types of forecasts
            result = QuantileForecast(
                forecast_arrays=np.stack(
                    [forecast.quantile(q) for q in quantiles], axis=0),
                start_date=forecast.start_date,
                freq=forecast.freq,
                forecast_keys=quantiles,
                item_id=forecast.item_id,
            )

        if result.item_id is None:
            result.item_id = i
        forecasts.append(result)

    toc = time.time()

    # Then, we compute the prediction latency
    latency = (toc - tic) / len(dataset)
    if parallelize:
        # We observed that N CPUs only brought a speedup of ~N/2
        latency = latency * (cast(int, os.cpu_count()) / 2)

    # And convert the list of forecasts into a QuantileForecasts object
    quantile_forecasts = QuantileForecasts(
        values=np.stack([f.forecast_array for f in forecasts]),
        start_dates=np.array([f.start_date for f in forecasts]),
        item_ids=np.array([str(f.item_id) for f in forecasts]),
        freq=to_offset(forecasts[0].freq),  # type: ignore
        quantiles=forecasts[0].forecast_keys,
    )
    return quantile_forecasts, latency
示例#12
0
    def predict(
        self,
        dataset: Dataset,
        num_samples: int = 100,
        intervals: Optional[List] = None,
        save_info: bool = False,
        **kwargs,
    ) -> Iterator[Union[SampleForecast, QuantileForecast]]:
        if self.method_name in POINT_FORECAST_METHODS:
            print("Overriding `output_types` to `mean` since"
                  f" {self.method_name} is a point forecast method.")
        elif self.method_name in QUANTILE_FORECAST_METHODS:
            print("Overriding `output_types` to `quantiles` since "
                  f"{self.method_name} is a quantile forecast method.")

        for data in dataset:
            if self.trunc_length:
                data["target"] = data["target"][-self.trunc_length:]

            params = self.params.copy()
            params["num_samples"] = num_samples

            if self.method_name in POINT_FORECAST_METHODS:
                params["output_types"] = ["mean"]
            elif self.method_name in QUANTILE_FORECAST_METHODS:
                params["output_types"] = ["quantiles", "mean"]
                if intervals is None:
                    # This corresponds to quantiles: 0.05 to 0.95 in steps of 0.05.
                    params["intervals"] = list(range(0, 100, 10))
                else:
                    params["intervals"] = np.sort(intervals).tolist()

            forecast_dict, console_output = self._run_r_forecast(
                data, params, save_info=save_info)

            if self.method_name in QUANTILE_FORECAST_METHODS:
                quantile_forecasts_dict = forecast_dict["quantiles"]

                yield QuantileForecast(
                    forecast_arrays=np.array(
                        list(quantile_forecasts_dict.values())),
                    forecast_keys=list(quantile_forecasts_dict.keys()),
                    start_date=forecast_start(data),
                    freq=self.freq,
                    item_id=data.get("item_id", None),
                )
            else:
                if self.method_name in POINT_FORECAST_METHODS:
                    # Handling special cases outside of R is better, since it is more visible and is easier to change.
                    # Repeat mean forecasts `num_samples` times.
                    samples = np.reshape(
                        forecast_dict["mean"] * params["num_samples"],
                        (params["num_samples"], self.prediction_length),
                    )
                else:
                    samples = np.array(forecast_dict["samples"])

                expected_shape = (
                    params["num_samples"],
                    self.prediction_length,
                )
                assert (
                    samples.shape == expected_shape
                ), f"Expected shape {expected_shape} but found {samples.shape}"
                info = ({
                    "console_output": "\n".join(console_output)
                } if save_info else None)
                yield SampleForecast(
                    samples,
                    forecast_start(data),
                    self.freq,
                    info=info,
                    item_id=data.get("item_id", None),
                )