def __call__(self, inference_data_loader: InferenceDataLoader, prediction_net: BlockType, input_names: List[str], freq: str, output_transform: Optional[OutputTransform], num_samples: Optional[int], **kwargs) -> Iterator[Forecast]: for batch in inference_data_loader: inputs = [batch[k] for k in input_names] outputs = prediction_net(*inputs).asnumpy() if output_transform is not None: outputs = output_transform(batch, outputs) if num_samples: log_once( "Forecast is not sample based. Ignoring parameter `num_samples` from predict method." ) i = -1 for i, output in enumerate(outputs): yield QuantileForecast( output, start_date=batch["forecast_start"][i], freq=freq, item_id=batch[FieldName.ITEM_ID][i] if FieldName.ITEM_ID in batch else None, info=batch["info"][i] if "info" in batch else None, forecast_keys=self.quantiles, ) assert i + 1 == len(batch["forecast_start"])
def __call__(self, inference_data_loader: DataLoader, prediction_net, input_names: List[str], freq: str, output_transform: Optional[OutputTransform], num_samples: Optional[int], **kwargs) -> Iterator[Forecast]: for batch in inference_data_loader: inputs = [batch[k] for k in input_names] outputs = predict_to_numpy(prediction_net, inputs) if output_transform is not None: outputs = output_transform(batch, outputs) if num_samples: log_once(NOT_SAMPLE_BASED_MSG) i = -1 for i, output in enumerate(outputs): yield QuantileForecast( output, start_date=batch["forecast_start"][i], freq=freq, item_id=batch[FieldName.ITEM_ID][i] if FieldName.ITEM_ID in batch else None, info=batch["info"][i] if "info" in batch else None, forecast_keys=self.quantiles, ) assert i + 1 == len(batch["forecast_start"])
def to_quantile_forecast(self, quantiles: List[Union[float, str]]): return QuantileForecast( forecast_arrays=np.array([self.quantile(q) for q in quantiles]), forecast_keys=quantiles, start_date=self.start_date, item_id=self.item_id, info=self.info, )
def get(self, index: int) -> QuantileForecast: """ Returns the quantile forecast at the specified index. This method should typically only be used for visualizing single forecasts. """ return QuantileForecast( forecast_arrays=self.values[index], start_date=pd.Timestamp(self.start_dates[index], freq=self.freq), freq=self.freq.freqstr, # type: ignore item_id=self.item_ids[index], forecast_keys=self.quantiles, )
def _gluonts_forecasts_to_data_frame( self, forecasts: List[Forecast], quantile_levels: List[float]) -> TimeSeriesDataFrame: # if predictions are gluonts SampleForecasts, convert them to quantile forecasts # but save the means forecast_means = [] quantiles = [str(q) for q in quantile_levels] if isinstance(forecasts[0], SampleForecast): transformed_targets = [] for forecast in forecasts: tmp = [] for quantile in quantiles: tmp.append(forecast.quantile(quantile)) transformed_targets.append( QuantileForecast( forecast_arrays=np.array(tmp), start_date=forecast.start_date, freq=forecast.freq, forecast_keys=quantiles, item_id=forecast.item_id, )) forecast_means.append(forecast.mean) forecasts = copy.deepcopy(transformed_targets) # sanity check to ensure all quantiles are accounted for assert all(q in forecasts[0].forecast_keys for q in quantiles), ( "Some forecast quantiles are missing from GluonTS forecast outputs. Was" " the model trained to forecast all quantiles?") result_dfs = [] item_ids = (d.item_id for d in forecasts) for i, item_id in enumerate(item_ids): item_forecast_dict = dict( mean=forecast_means[i] if forecast_means else ( forecasts[i]. quantile(0.5) # assign P50 to mean if mean is missing )) for quantile in quantiles: item_forecast_dict[quantile] = forecasts[i].quantile( str(quantile)) df = pd.DataFrame(item_forecast_dict) df[ITEMID] = item_id df[TIMESTAMP] = pd.date_range( start=forecasts[i].start_date, periods=self.prediction_length, freq=self.freq, ) result_dfs.append(df) return TimeSeriesDataFrame.from_data_frame(pd.concat(result_dfs))
def test_evaluation_with_QuantileForecast(): start = "2012-01-11" target = [2.4, 1.0, 3.0, 4.4, 5.5, 4.9] * 11 index = pd.period_range(start=start, freq="1D", periods=len(target)) ts = pd.Series(index=index, data=target) ev = Evaluator(quantiles=("0.1", "0.2", "0.5")) fcst = [ QuantileForecast( start_date=pd.Period("2012-01-11", freq="D"), forecast_arrays=np.array([[2.4, 9.0, 3.0, 2.4, 5.5, 4.9] * 10]), forecast_keys=["0.5"], ) ] agg_metric, _ = ev(iter([ts]), iter(fcst)) assert np.isfinite(agg_metric["wQuantileLoss[0.5]"])
def test_evaluation_with_QuantileForecast(): start = '2012-01-01' target = [2.4, 1.0, 3.0, 4.4, 5.5, 4.9] * 10 index = pd.date_range(start=start, freq='1D', periods=len(target)) ts = pd.Series(index=index, data=target) ev = Evaluator(quantiles=('0.1', '0.2', '0.5')) fcst = [ QuantileForecast( start_date=pd.Timestamp('2012-01-01'), freq='D', forecast_arrays=np.array([[2.4, 9.0, 3.0, 2.4, 5.5, 4.9] * 10]), forecast_keys=['0.5'], ) ] agg_metric, _ = ev(iter([ts]), iter(fcst)) assert np.isfinite(agg_metric['wQuantileLoss[0.5]'])
def _to_forecast( self, ag_output: np.ndarray, start_timestamp: pd.Period, item_id=None, ) -> Forecast: if self.quantiles_to_predict: forecasts = ag_output.transpose() return QuantileForecast( start_date=start_timestamp, item_id=item_id, forecast_arrays=forecasts, forecast_keys=self.forecast_keys, ) else: samples = ag_output.reshape((1, self.prediction_length)) return SampleForecast( start_date=start_timestamp, item_id=item_id, samples=samples, )
def test_infer_quantile_forecast( quantile_predictions, inference_quantiles, inferred_quantile_predictions, ): tol = 1e-5 forecast_keys = [] output = [] for key, value in quantile_predictions.items(): forecast_keys.append(key) output.append(value) output = np.array(output) quantile_forecast = QuantileForecast( output, start_date=Timestamp(0), freq="h", forecast_keys=forecast_keys, ) if len(forecast_keys) == 1: for q in inference_quantiles: if forecast_keys[0] == str(q): assert ( sum( inferred_quantile_predictions[str(q)] - quantile_forecast.quantile(q) ) < tol ), f"infer_quantile_forecast failed for singleton quantile." else: assert ( sum( sum( inferred_quantile_predictions[str(q)] - quantile_forecast.quantile(q) ) for q in inference_quantiles ) < tol ), f"infer_quantile_forecast failed."
SampleForecast, DistributionForecast, ) from gluonts.mx.distribution import Uniform QUANTILES = np.arange(1, 100) / 100 SAMPLES = np.arange(101).reshape(101, 1) / 100 START_DATE = pd.Timestamp(2017, 1, 1, 12) FREQ = "1D" FORECASTS = { "QuantileForecast": QuantileForecast( forecast_arrays=QUANTILES.reshape(-1, 1), start_date=START_DATE, forecast_keys=np.array(QUANTILES, str), freq=FREQ, ), "SampleForecast": SampleForecast(samples=SAMPLES, start_date=START_DATE, freq=FREQ), "DistributionForecast": DistributionForecast( distribution=Uniform(low=mx.nd.zeros(1), high=mx.nd.ones(1)), start_date=START_DATE, freq=FREQ, ), } @pytest.mark.parametrize("name", FORECASTS.keys()) def test_Forecast(name):
def generate_forecasts( predictor: Predictor, dataset: Dataset, num_samples: int = 100, parallelize: bool = False, ) -> Tuple[QuantileForecasts, float]: """ Generates the predictions of the given predictor for the provided dataset. The returned prediction object provides the forecasts along with some metadata. Args: predictor: The predictor which is used to make forecasts. dataset: The GluonTS dataset which is used for testing. num_samples: The number of samples to use for making predictions. parallelize: Whether predictions ought to be parallelized. Returns: The forecasts for the dataset. The average latency for generating a single forecast. """ if parallelize: predictor = ParallelizedPredictor(predictor, num_workers=os.cpu_count()) # First, perform the predictions... tic = time.time() forecast_pred, _ = make_evaluation_predictions(dataset, predictor, num_samples) # ...and compute the quantiles quantiles = [f"0.{i+1}" for i in range(9)] forecasts = [] for i, forecast in tqdm( enumerate(forecast_pred), total=maybe_len(dataset), disable=not env.use_tqdm, ): result = None if isinstance(forecast, QuantileForecast): if forecast.forecast_keys == quantiles: result = forecast elif isinstance(forecast, SampleForecast): quantile_forecast = forecast.to_quantile_forecast( quantiles) # type: ignore result = quantile_forecast if result is None: # If none of the above checks added a quantile forecast, we resort to a method that # should work on all types of forecasts result = QuantileForecast( forecast_arrays=np.stack( [forecast.quantile(q) for q in quantiles], axis=0), start_date=forecast.start_date, freq=forecast.freq, forecast_keys=quantiles, item_id=forecast.item_id, ) if result.item_id is None: result.item_id = i forecasts.append(result) toc = time.time() # Then, we compute the prediction latency latency = (toc - tic) / len(dataset) if parallelize: # We observed that N CPUs only brought a speedup of ~N/2 latency = latency * (cast(int, os.cpu_count()) / 2) # And convert the list of forecasts into a QuantileForecasts object quantile_forecasts = QuantileForecasts( values=np.stack([f.forecast_array for f in forecasts]), start_dates=np.array([f.start_date for f in forecasts]), item_ids=np.array([str(f.item_id) for f in forecasts]), freq=to_offset(forecasts[0].freq), # type: ignore quantiles=forecasts[0].forecast_keys, ) return quantile_forecasts, latency
def predict( self, dataset: Dataset, num_samples: int = 100, intervals: Optional[List] = None, save_info: bool = False, **kwargs, ) -> Iterator[Union[SampleForecast, QuantileForecast]]: if self.method_name in POINT_FORECAST_METHODS: print("Overriding `output_types` to `mean` since" f" {self.method_name} is a point forecast method.") elif self.method_name in QUANTILE_FORECAST_METHODS: print("Overriding `output_types` to `quantiles` since " f"{self.method_name} is a quantile forecast method.") for data in dataset: if self.trunc_length: data["target"] = data["target"][-self.trunc_length:] params = self.params.copy() params["num_samples"] = num_samples if self.method_name in POINT_FORECAST_METHODS: params["output_types"] = ["mean"] elif self.method_name in QUANTILE_FORECAST_METHODS: params["output_types"] = ["quantiles", "mean"] if intervals is None: # This corresponds to quantiles: 0.05 to 0.95 in steps of 0.05. params["intervals"] = list(range(0, 100, 10)) else: params["intervals"] = np.sort(intervals).tolist() forecast_dict, console_output = self._run_r_forecast( data, params, save_info=save_info) if self.method_name in QUANTILE_FORECAST_METHODS: quantile_forecasts_dict = forecast_dict["quantiles"] yield QuantileForecast( forecast_arrays=np.array( list(quantile_forecasts_dict.values())), forecast_keys=list(quantile_forecasts_dict.keys()), start_date=forecast_start(data), freq=self.freq, item_id=data.get("item_id", None), ) else: if self.method_name in POINT_FORECAST_METHODS: # Handling special cases outside of R is better, since it is more visible and is easier to change. # Repeat mean forecasts `num_samples` times. samples = np.reshape( forecast_dict["mean"] * params["num_samples"], (params["num_samples"], self.prediction_length), ) else: samples = np.array(forecast_dict["samples"]) expected_shape = ( params["num_samples"], self.prediction_length, ) assert ( samples.shape == expected_shape ), f"Expected shape {expected_shape} but found {samples.shape}" info = ({ "console_output": "\n".join(console_output) } if save_info else None) yield SampleForecast( samples, forecast_start(data), self.freq, info=info, item_id=data.get("item_id", None), )