def predict_item(self, item, trained_model): """Compute quantiles using the confidence intervals of autoarima. Args: item (DataEntry): One timeseries. trained_model (STLForecastResults): Trained STL model. Returns: SampleForecast of quantiles. """ target_length = len(item[TIMESERIES_KEYS.TARGET]) start_date = frequency_add(item[TIMESERIES_KEYS.START], target_length) samples = [] for alpha in np.arange(0.02, 1.01, 0.02): predictions = trained_model.get_prediction( start=target_length, end=target_length + self.prediction_length - 1) confidence_intervals = predictions.conf_int(alpha=alpha) samples += [ confidence_intervals["lower"].values, confidence_intervals["upper"].values ] return SampleForecast(samples=np.stack(samples), start_date=start_date, freq=self.freq)
def predict_item(self, item, trained_model): """Compute quantiles using the confidence intervals of autoarima. Args: item (DataEntry): One timeseries. trained_model (pm.auto_arima): Trained autoarima model. Returns: SampleForecast of quantiles. """ start_date = frequency_add(item[TIMESERIES_KEYS.START], len(item[TIMESERIES_KEYS.TARGET])) prediction_external_features = self._set_prediction_external_features( item) samples = [] for alpha in np.arange(0.02, 1.01, 0.02): confidence_intervals = trained_model.predict( n_periods=self.prediction_length, X=prediction_external_features, return_conf_int=True, alpha=alpha)[1] samples += [confidence_intervals[:, 0], confidence_intervals[:, 1]] return SampleForecast(samples=np.stack(samples), start_date=start_date, freq=self.freq)
def predict_item(self, item: DataEntry) -> SampleForecast: if self.context_length is not None: target = item["target"][-self.context_length :] else: target = item["target"] mean = np.nanmean(target) std = np.nanstd(target) normal = np.random.standard_normal(self.shape) start_date = frequency_add(item["start"], len(item["target"])) return SampleForecast( samples=std * normal + mean, start_date=start_date, freq=self.freq, item_id=item.get(FieldName.ITEM_ID), )
def predict(self, dataset: Dataset, **kwargs) -> Iterator[SampleForecast]: for item in dataset: if self.context_length is not None: target = item["target"][-self.context_length:] else: target = item["target"] mean = np.nanmean(target) std = np.nanstd(target) normal = np.random.standard_normal(self.shape) start_date = frequency_add(item["start"], len(target)) yield SampleForecast( samples=std * normal + mean, start_date=start_date, freq=self.freq, item_id=item["id"] if "id" in item else None, )
def generate_lstnet_dataset(dataset_path: Path, dataset_name: str): ds_info = datasets_info[dataset_name] os.makedirs(dataset_path, exist_ok=True) with open(dataset_path / "metadata.json", "w") as f: f.write( json.dumps( metadata( cardinality=ds_info.num_series, freq=ds_info.freq, prediction_length=ds_info.prediction_length, ))) train_file = dataset_path / "train" / "data.json" test_file = dataset_path / "test" / "data.json" time_index = pd.date_range( start=ds_info.start_date, freq=ds_info.freq, periods=ds_info.num_time_steps, ) df = pd.read_csv(ds_info.url, header=None) assert df.shape == ( ds_info.num_time_steps, ds_info.num_series, ), f"expected num_time_steps/num_series {(ds_info.num_time_steps, ds_info.num_series)} but got {df.shape}" timeseries = load_from_pandas(df=df, time_index=time_index, agg_freq=ds_info.agg_freq) # the last date seen during training ts_index = timeseries[0].index training_end = ts_index[int(len(ts_index) * (8 / 10))] train_ts = [] for cat, ts in enumerate(timeseries): sliced_ts = ts[:training_end] if len(sliced_ts) > 0: train_ts.append( to_dict( target_values=sliced_ts.values, start=sliced_ts.index[0], cat=[cat], )) assert len(train_ts) == ds_info.num_series save_to_file(train_file, train_ts) # time of the first prediction prediction_dates = [ frequency_add(training_end, i * ds_info.prediction_length) for i in range(ds_info.rolling_evaluations) ] test_ts = [] for prediction_start_date in prediction_dates: for cat, ts in enumerate(timeseries): # print(prediction_start_date) prediction_end_date = frequency_add(prediction_start_date, ds_info.prediction_length) sliced_ts = ts[:prediction_end_date] test_ts.append( to_dict( target_values=sliced_ts.values, start=sliced_ts.index[0], cat=[cat], )) assert len(test_ts) == ds_info.num_series * ds_info.rolling_evaluations save_to_file(test_file, test_ts)