Пример #1
0
def test_seasonal_naive(predictor_cls, freq: str):
    predictor = predictor_cls(
        freq=freq,
        prediction_length=PREDICTION_LENGTH,
        season_length=SEASON_LENGTH,
    )
    dataset = list(
        generate_random_dataset(
            num_ts=NUM_TS,
            start_time=START_TIME,
            freq=freq,
            min_length=MIN_LENGTH,
            max_length=MAX_LENGTH,
        )
    )

    # get forecasts
    forecasts = list(predictor.predict(dataset))

    assert len(dataset) == NUM_TS
    assert len(forecasts) == NUM_TS

    # check forecasts are as expected
    for data, forecast in zip(dataset, forecasts):
        assert forecast.samples.shape == (1, PREDICTION_LENGTH)

        ref = data["target"][
            -SEASON_LENGTH : -SEASON_LENGTH + PREDICTION_LENGTH
        ]

        assert forecast.start_date == forecast_start(data)

        # specifically for the seasonal naive we can test the supposed result directly
        if predictor_cls == SeasonalNaivePredictor:
            assert np.allclose(forecast.samples[0], ref)
Пример #2
0
def test_forecasts(method_name):
    if method_name == "mlp":
        # https://stackoverflow.com/questions/56254321/error-in-ifncol-matrix-rep-argument-is-of-length-zero
        # https://cran.r-project.org/web/packages/neuralnet/index.html
        #   published before the bug fix: https://github.com/bips-hb/neuralnet/pull/21
        # The issue is still open on nnfor package: https://github.com/trnnick/nnfor/issues/8
        # TODO: look for a workaround.
        pytest.xfail(
            "MLP currently does not work because "
            "the `neuralnet` package is not yet updated with a known bug fix in ` bips-hb/neuralnet`"
        )

    dataset = datasets.get_dataset("constant")

    (train_dataset, test_dataset, metadata) = (
        dataset.train,
        dataset.test,
        dataset.metadata,
    )

    freq = metadata.freq
    prediction_length = metadata.prediction_length

    params = dict(
        freq=freq, prediction_length=prediction_length, method_name=method_name
    )

    predictor = RForecastPredictor(**params)
    predictions = list(predictor.predict(train_dataset))

    forecast_type = (
        QuantileForecast
        if method_name in QUANTILE_FORECAST_METHODS
        else SampleForecast
    )
    assert all(
        isinstance(prediction, forecast_type) for prediction in predictions
    )

    assert all(prediction.freq == freq for prediction in predictions)

    assert all(
        prediction.prediction_length == prediction_length
        for prediction in predictions
    )

    assert all(
        prediction.start_date == forecast_start(data)
        for data, prediction in zip(train_dataset, predictions)
    )

    evaluator = Evaluator()
    agg_metrics, item_metrics = backtest_metrics(
        test_dataset=test_dataset,
        predictor=predictor,
        evaluator=evaluator,
    )
    assert agg_metrics["mean_wQuantileLoss"] < TOLERANCE
    assert agg_metrics["NRMSE"] < TOLERANCE
    assert agg_metrics["RMSE"] < TOLERANCE
Пример #3
0
    def predict(
        self, dataset: Dataset, num_samples: Optional[int] = None
    ) -> Iterator[Forecast]:
        """
        Returns a dictionary taking each quantile to a list of floats,
        which are the predictions for that quantile as you run over
        (time_steps, time_series) lexicographically. So: first it would give
        the quantile prediction for the first time step for all time series,
        then the second time step for all time series ˜˜ , and so forth.
        """
        context_length = self.preprocess_object.context_window_size

        if num_samples:
            log_once(
                "Forecast is not sample based. Ignoring parameter `num_samples` from predict method."
            )

        for ts in dataset:
            featurized_data = self.preprocess_object.make_features(
                ts, starting_index=len(ts["target"]) - context_length
            )
            yield RotbaumForecast(
                self.model_list,
                [featurized_data],
                start_date=forecast_start(ts),
                prediction_length=self.prediction_length,
                freq=self.freq,
            )
Пример #4
0
    def predict(
        self,
        dataset: Dataset,
        num_samples: int = 100,
        save_info: bool = False,
        **kwargs,
    ) -> Iterator[SampleForecast]:
        for entry in dataset:
            if isinstance(entry, dict):
                data = entry
            else:
                data = entry.data
                if self.trunc_length:
                    data = data[-self.trunc_length:]

            params = self.params.copy()
            params["num_samples"] = num_samples

            forecast_dict, console_output = self._run_r_forecast(
                data, params, save_info=save_info)

            samples = np.array(forecast_dict["samples"])
            expected_shape = (params["num_samples"], self.prediction_length)
            assert (
                samples.shape == expected_shape
            ), f"Expected shape {expected_shape} but found {samples.shape}"
            info = ({
                "console_output": "\n".join(console_output)
            } if save_info else None)
            yield SampleForecast(samples,
                                 forecast_start(data),
                                 self.freq,
                                 info=info)
Пример #5
0
 def predict_item(self, item: DataEntry) -> SampleForecast:
     samples_shape = self.num_samples, self.prediction_length
     samples = np.full(samples_shape, self.value)
     return SampleForecast(
         samples=samples,
         start_date=forecast_start(item),
         freq=self.freq,
         item_id=item.get("id"),
     )
Пример #6
0
    def predict_item(self, item: DataEntry) -> Forecast:
        past_ts_data = item["target"]
        forecast_start_time = forecast_start(item)

        assert (len(past_ts_data) >=
                1), "all time series should have at least one data point"

        prediction = naive_2(past_ts_data, self.prediction_length, self.freq)

        samples = np.array([prediction])

        return SampleForecast(samples, forecast_start_time, self.freq)
Пример #7
0
    def predict_item(self, item: DataEntry) -> Forecast:
        prediction = item["target"][-self.prediction_length:]
        samples = np.broadcast_to(
            array=np.expand_dims(prediction, 0),
            shape=(self.num_samples, self.prediction_length),
        )

        return SampleForecast(
            samples=samples,
            start_date=forecast_start(item),
            freq=self.freq,
            item_id=item.get(FieldName.ITEM_ID),
        )
Пример #8
0
    def predict_item(self, item: DataEntry) -> SampleForecast:
        if self.context_length is not None:
            target = item["target"][-self.context_length:]
        else:
            target = item["target"]

        mean = np.nanmean(target)
        std = np.nanstd(target)
        normal = np.random.standard_normal(self.shape)

        return SampleForecast(
            samples=std * normal + mean,
            start_date=forecast_start(item),
            freq=self.freq,
            item_id=item.get(FieldName.ITEM_ID),
        )
Пример #9
0
    def predict_item(self, item: DataEntry) -> SampleForecast:
        target = item["target"].tolist()

        for _ in range(self.prediction_length):
            if self.context_length is not None:
                window = target[-self.context_length:]
            else:
                window = target

            target.append(np.nanmean(window))

        return SampleForecast(
            samples=np.array([target[-self.prediction_length:]]),
            start_date=forecast_start(item),
            freq=self.freq,
            item_id=item.get(FieldName.ITEM_ID),
        )
Пример #10
0
    def predict_item(self, item: DataEntry) -> Forecast:
        target = np.asarray(item["target"], np.float32)
        len_ts = len(target)
        forecast_start_time = forecast_start(item)

        assert (len_ts >=
                1), "all time series should have at least one data point"

        if len_ts >= self.season_length:
            indices = [
                len_ts - self.season_length + k % self.season_length
                for k in range(self.prediction_length)
            ]
            samples = target[indices].reshape((1, self.prediction_length))
        else:
            samples = np.full(shape=(1, self.prediction_length),
                              fill_value=target.mean())

        return SampleForecast(samples, forecast_start_time, self.freq)
Пример #11
0
    def predict(
        self,
        dataset: Dataset,
        num_samples: int = 1,
        save_info: bool = False,
        **kwargs,
    ) -> Iterator[SampleForecast]:
        if num_samples != 1:
            num_samples = 1
            logger.warning(
                "num_samples changed to 1 because Croston is non-probabilistic"
            )

        assert num_samples == 1, "Non Probabilistic Method only supports num_samples=1"

        for entry in dataset:
            if isinstance(entry, dict):
                data = entry
            else:
                data = entry.data
                if self.trunc_length:
                    data = data[-self.trunc_length:]

            params = self.params.copy()
            params["num_samples"] = num_samples
            forecast_dict = self._run_croston_forecast(data, params)
            samples = np.array(forecast_dict["samples"])
            expected_shape = (params["num_samples"], self.prediction_length)
            assert (
                samples.shape == expected_shape
            ), f"Expected shape {expected_shape} but found {samples.shape}"

            yield SampleForecast(samples,
                                 forecast_start(data),
                                 self.freq,
                                 item_id=data["item_id"])
Пример #12
0
    def predict(
        self,
        dataset: Dataset,
        num_samples: int = 100,
        intervals: Optional[List] = None,
        save_info: bool = False,
        **kwargs,
    ) -> Iterator[Union[SampleForecast, QuantileForecast]]:
        if self.method_name in POINT_FORECAST_METHODS:
            print("Overriding `output_types` to `mean` since"
                  f" {self.method_name} is a point forecast method.")
        elif self.method_name in QUANTILE_FORECAST_METHODS:
            print("Overriding `output_types` to `quantiles` since "
                  f"{self.method_name} is a quantile forecast method.")

        for data in dataset:
            if self.trunc_length:
                data["target"] = data["target"][-self.trunc_length:]

            params = self.params.copy()
            params["num_samples"] = num_samples

            if self.method_name in POINT_FORECAST_METHODS:
                params["output_types"] = ["mean"]
            elif self.method_name in QUANTILE_FORECAST_METHODS:
                params["output_types"] = ["quantiles", "mean"]
                if intervals is None:
                    # This corresponds to quantiles: 0.05 to 0.95 in steps of 0.05.
                    params["intervals"] = list(range(0, 100, 10))
                else:
                    params["intervals"] = np.sort(intervals).tolist()

            forecast_dict, console_output = self._run_r_forecast(
                data, params, save_info=save_info)

            if self.method_name in QUANTILE_FORECAST_METHODS:
                quantile_forecasts_dict = forecast_dict["quantiles"]

                yield QuantileForecast(
                    forecast_arrays=np.array(
                        list(quantile_forecasts_dict.values())),
                    forecast_keys=list(quantile_forecasts_dict.keys()),
                    start_date=forecast_start(data),
                    freq=self.freq,
                    item_id=data.get("item_id", None),
                )
            else:
                if self.method_name in POINT_FORECAST_METHODS:
                    # Handling special cases outside of R is better, since it is more visible and is easier to change.
                    # Repeat mean forecasts `num_samples` times.
                    samples = np.reshape(
                        forecast_dict["mean"] * params["num_samples"],
                        (params["num_samples"], self.prediction_length),
                    )
                else:
                    samples = np.array(forecast_dict["samples"])

                expected_shape = (
                    params["num_samples"],
                    self.prediction_length,
                )
                assert (
                    samples.shape == expected_shape
                ), f"Expected shape {expected_shape} but found {samples.shape}"
                info = ({
                    "console_output": "\n".join(console_output)
                } if save_info else None)
                yield SampleForecast(
                    samples,
                    forecast_start(data),
                    self.freq,
                    info=info,
                    item_id=data.get("item_id", None),
                )