def predict(self): """ Use the gluon dataset of training to predict future values and concat all forecasts timeseries of different identifiers and quantiles together """ forecasts = self.predictor.predict(self.gluon_dataset) forecasts_list = list(forecasts) forecasts_timeseries = self._compute_forecasts_timeseries( forecasts_list) multiple_df = concat_timeseries_per_identifiers(forecasts_timeseries) self.forecasts_df = concat_all_timeseries(multiple_df) self.time_column_name = self.gluon_dataset.list_data[0][ TIMESERIES_KEYS.TIME_COLUMN_NAME] self.identifiers_columns = (list(self.gluon_dataset.list_data[0][ TIMESERIES_KEYS.IDENTIFIERS].keys()) if TIMESERIES_KEYS.IDENTIFIERS in self.gluon_dataset.list_data[0] else []) if self.include_history: self.forecasts_df = self._include_history( self.frequency, history_length_limit=self.history_length_limit) self.forecasts_df = add_row_origin(self.forecasts_df, both=ROW_ORIGIN.FORECAST, left_only=ROW_ORIGIN.HISTORY) self.forecasts_df = self.forecasts_df.rename( columns={"index": self.time_column_name})
def _include_history(self, frequency, history_length_limit=None): """Include the historical data on which the model was trained to the forecasts dataframe. Args: frequency (str): Used to reconstruct the date range (because a gluon ListDataset only store the start date). history_length_limit (int): Maximum number of values to retrieve from historical data per timeseries. Default to None which means all. Returns: DataFrame containing both the historical data and the forecasted values. """ history_timeseries = self._retrieve_history_timeseries( frequency, history_length_limit) multiple_df = concat_timeseries_per_identifiers(history_timeseries) history_df = concat_all_timeseries(multiple_df) return history_df.merge(self.forecasts_df, on=["index"] + self.identifiers_columns, how="left", indicator=True)
def train_evaluate(self, train_list_dataset, test_list_dataset, make_forecasts=False, retrain=False): """Train Model on train_list_dataset and evaluate it on test_list_dataset. Then retrain on test_list_dataset if retrain=True. Args: train_list_dataset (gluonts.dataset.common.ListDataset): ListDataset created with the GluonDataset class. test_list_dataset (gluonts.dataset.common.ListDataset): ListDataset created with the GluonDataset class. make_forecasts (bool, optional): Whether to make the evaluation forecasts and return them. Defaults to False. retrain (bool, optional): Whether to retrain model on test_list_dataset after the evaluation. Defaults to False. Returns: Evaluation metrics DataFrame for each target and aggregated. List of timeseries identifiers column names. Empty list if none found in train_list_dataset. DataFrame of predictions for the last prediction_length timesteps of the test_list_dataset timeseries if make_forecasts is True. """ logger.info(f"Evaluating {self.get_label()} model performance...") start = perf_counter() evaluation_predictor = self._train_estimator(train_list_dataset) agg_metrics, item_metrics, forecasts = self._make_evaluation_predictions( evaluation_predictor, test_list_dataset) self.evaluation_time = perf_counter() - start logger.info( f"Evaluating {self.get_label()} model performance: Done in {self.evaluation_time:.2f} seconds" ) if retrain: self.train(test_list_dataset) metrics, identifiers_columns = self._format_metrics( agg_metrics, item_metrics, train_list_dataset) if make_forecasts: median_forecasts_timeseries = self._compute_median_forecasts_timeseries( forecasts, train_list_dataset) multiple_df = concat_timeseries_per_identifiers( median_forecasts_timeseries) forecasts_df = concat_all_timeseries(multiple_df) return metrics, identifiers_columns, forecasts_df return metrics, identifiers_columns
def predict(self): """ Use the gluon dataset of training to predict future values and concat all forecasts timeseries of different identifiers and quantiles together """ model_handler = ModelHandler(self.model_name) if self.model_name and not model_handler.can_use_external_feature( ) and TIMESERIES_KEYS.FEAT_DYNAMIC_REAL in self.gluon_dataset.list_data[ 0]: # remove external features from the ListDataset used for predictions if the model cannot use them gluon_dataset_without_external_features = remove_unused_external_features( self.gluon_dataset, self.frequency) forecasts = self.predictor.predict( gluon_dataset_without_external_features) else: forecasts = self.predictor.predict(self.gluon_dataset) forecasts_list = list(forecasts) forecasts_timeseries = self._compute_forecasts_timeseries( forecasts_list) multiple_df = concat_timeseries_per_identifiers(forecasts_timeseries) self.forecasts_df = concat_all_timeseries(multiple_df) self.time_column_name = self.gluon_dataset.list_data[0][ TIMESERIES_KEYS.TIME_COLUMN_NAME] self.identifiers_columns = (list(self.gluon_dataset.list_data[0][ TIMESERIES_KEYS.IDENTIFIERS].keys()) if TIMESERIES_KEYS.IDENTIFIERS in self.gluon_dataset.list_data[0] else []) if self.include_history: self.forecasts_df = self._include_history( self.frequency, history_length_limit=self.history_length_limit) self.forecasts_df = add_row_origin(self.forecasts_df, both=ROW_ORIGIN.FORECAST, left_only=ROW_ORIGIN.HISTORY) self.forecasts_df = self.forecasts_df.rename( columns={"index": self.time_column_name})