def dataset_info(self, train_ds: Dataset, test_ds: Dataset) -> DatasetInfo: return DatasetInfo( name=f'RecipeDataset({repr(self.recipe)})', metadata=self.metadata, prediction_length=self.prediction_length, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), )
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( freq="1H", feat_static_cat=[ CategoricalFeatureInfo( name="feat_static_cat_000", cardinality="10" ) ], feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")], ) start_date = "2000-01-01 00:00:00" train_ds = ListDataset( data_iter=[ { "item": str(i), "start": start_date, "target": [float(i)] * 24, "feat_static_cat": [i], "feat_static_real": [float(i)], } for i in range(10) ], freq=metadata.freq, ) test_ds = ListDataset( data_iter=[ { "item": str(i), "start": start_date, "target": [float(i)] * 30, "feat_static_cat": [i], "feat_static_real": [float(i)], } for i in range(10) ], freq=metadata.freq, ) info = DatasetInfo( name="constant_dataset", metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( freq="1H", feat_static_cat=[ CategoricalFeatureInfo( name="feat_static_cat_000", cardinality="10" ) ], feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")], ) start_date = "2000-01-01 00:00:00" train_ds = ListDataset( data_iter=[ { FieldName.ITEM_ID: str(i), FieldName.START: start_date, FieldName.TARGET: [float(i)] * 24, FieldName.FEAT_STATIC_CAT: [i], FieldName.FEAT_STATIC_REAL: [float(i)], } for i in range(10) ], freq=metadata.freq, ) test_ds = ListDataset( data_iter=[ { FieldName.ITEM_ID: str(i), FieldName.START: start_date, FieldName.TARGET: [float(i)] * 30, FieldName.FEAT_STATIC_CAT: [i], FieldName.FEAT_STATIC_REAL: [float(i)], } for i in range(10) ], freq=metadata.freq, ) info = DatasetInfo( name="constant_dataset", metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( freq='1H', feat_static_cat=[ CategoricalFeatureInfo( name='feat_static_cat_000', cardinality='10' ) ], feat_static_real=[BasicFeatureInfo(name='feat_static_real_000')], ) start_date = '2000-01-01 00:00:00' train_ds = ListDataset( data_iter=[ { 'item': str(i), 'start': start_date, 'target': [float(i)] * 24, 'feat_static_cat': [i], 'feat_static_real': [float(i)], } for i in range(10) ], freq=metadata.freq, ) test_ds = ListDataset( data_iter=[ { 'item': str(i), 'start': start_date, 'target': [float(i)] * 30, 'feat_static_cat': [i], 'feat_static_real': [float(i)], } for i in range(10) ], freq=metadata.freq, ) info = DatasetInfo( name='constant_dataset', metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def test_forecast_parser(): # verify that logged for estimator, datasets and metrics can be recovered # from their string representation dataset_info, train_ds, test_ds = constant_dataset() estimator = make_estimator(dataset_info.metadata.freq, dataset_info.prediction_length) assert repr(estimator) == repr(load_code(repr(estimator))) predictor = estimator.train(training_data=train_ds) stats = calculate_dataset_statistics(train_ds) assert stats == eval(repr(stats), globals(), {"gluonts": gluonts}) # TODO: use load evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) agg_metrics, _ = backtest_metrics(test_ds, predictor, evaluator) # reset infinite metrics to 0 (otherwise the assertion below fails) for key, val in agg_metrics.items(): if not math.isfinite(val): agg_metrics[key] = 0.0 assert agg_metrics == load_code(dump_code(agg_metrics))
def compute_stats(dataset: Optional[str], data_path: str, output_path: str): """ Computes simple dataset features either for a single dataset or all datasets in the registry. """ source = Path(data_path) target = Path(output_path) target.mkdir(parents=True, exist_ok=True) if dataset is None: dataset_list = list(DATASET_REGISTRY.items()) else: dataset_list = [(dataset, DATASET_REGISTRY[dataset])] for dataset_name, config in tqdm(dataset_list): file = target / f"{dataset_name}.json" if file.exists(): continue stats = calculate_dataset_statistics( config(source).data.train(val=False).gluonts()) with file.open("w+") as f: json.dump( { "integer_dataset": stats.integer_dataset, "mean_target_length": stats.mean_target_length, "num_time_observations": stats.num_time_observations, "num_time_series": stats.num_time_series, }, f, )
def derive_auto_fields(cls, train_iter): stats = calculate_dataset_statistics(train_iter) return { "use_feat_dynamic_real": stats.num_feat_dynamic_real > 0, "use_feat_static_cat": bool(stats.feat_static_cat), "cardinality": [len(cats) for cats in stats.feat_static_cat], }
def run_train( forecaster: Estimator, train_dataset: Dataset, validation_dataset: Optional[Dataset], ) -> Predictor: log.metric("train_dataset_stats", calculate_dataset_statistics(train_dataset)) return forecaster.train(train_dataset, validation_dataset)
def test_benchmark(caplog): # makes sure that information logged can be reconstructed from previous # logs with caplog.at_level(logging.DEBUG): dataset_info, train_ds, test_ds = constant_dataset() estimator = make_estimator(dataset_info.metadata.freq, dataset_info.prediction_length) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) backtest_metrics(train_ds, test_ds, estimator, evaluator) train_stats = calculate_dataset_statistics(train_ds) test_stats = calculate_dataset_statistics(test_ds) log_info = BacktestInformation.make_from_log_contents(caplog.text) assert train_stats == log_info.train_dataset_stats assert test_stats == log_info.test_dataset_stats assert equals(estimator, log_info.estimator) print(log_info)
def test_benchmark(caplog): # makes sure that information logged can be reconstructed from previous # logs caplog.set_level(logging.DEBUG, logger='log.txt') dataset_info, train_ds, test_ds = constant_dataset() estimator = make_estimator(dataset_info.metadata.time_granularity, dataset_info.prediction_length) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) backtest_metrics(train_ds, test_ds, estimator, evaluator) train_stats = calculate_dataset_statistics(train_ds) test_stats = calculate_dataset_statistics(test_ds) log_file = str(Path(__file__).parent / 'log.txt') log_info = BacktestInformation.make_from_log(log_file) assert train_stats == log_info.train_dataset_stats assert test_stats == log_info.test_dataset_stats assert equals(estimator, log_info.estimator) print(log_info)
def test_dataset_statistics(self) -> None: n = 2 T = 10 # use integers to avoid float conversion that can fail comparison np.random.seed(0) targets = np.random.randint(0, 10, (n, T)) scale_histogram = ScaleHistogram() for i in range(n): scale_histogram.add(targets[i, :]) scale_histogram.add([]) expected = DatasetStatistics( integer_dataset=True, num_time_series=n + 1, num_time_observations=targets.size, mean_target_length=T * 2 / 3, min_target=targets.min(), mean_target=targets.mean(), mean_abs_target=targets.mean(), max_target=targets.max(), cats=[{0}, {1, 2}], num_dynamic_feat=2, num_missing_values=0, scale_histogram=scale_histogram, ) # FIXME: the cast below is a hack to make mypy happy timeseries = cast( Dataset, [ make_time_series( target=targets[0, :], cat=[0, 1], num_dynamic_feat=2 ), make_time_series( target=targets[1, :], cat=[0, 2], num_dynamic_feat=2 ), make_time_series( target=np.array([]), cat=[0, 2], num_dynamic_feat=2 ), ], ) found = calculate_dataset_statistics(timeseries) assert expected == found
def derive_auto_fields(cls, train_iter): stats = calculate_dataset_statistics(train_iter) auto_fields = { "use_feat_dynamic_real": stats.num_feat_dynamic_real > 0, "use_feat_static_cat": bool(stats.feat_static_cat), "cardinality": [len(cats) for cats in stats.feat_static_cat], } logger = logging.getLogger(__name__) logger.info( f"gluonts[from_inputs]: use_feat_dynamic_real set to " f"'{auto_fields['use_feat_dynamic_real']}', and use use_feat_static_cat to " f"'{auto_fields['use_feat_static_cat']}' with cardinality of '{auto_fields['cardinality']}'" ) return auto_fields
def get_bucket_samplers( ts_datasets: List[ListDataset]) -> List[BucketInstanceSampler]: """ generate BucketInstanceSampler for each dataset in ts_datasets based on distribution of that dataset Arguments: ts_datasets {List[ListDataset]} -- GluonTS dataset object Returns: List[BucketInstanceSampler] -- list of samplers in which, for each, the probability of sampling from bucket i is the inverse of its number of elements """ dataset_stats = [ calculate_dataset_statistics(ts_dataset) for ts_dataset in ts_datasets ] return [ BucketInstanceSampler(stats.scale_histogram) for stats in dataset_stats ]
def test_BucketInstanceSampler(): N = 6 train_length = 2 pred_length = 1 ds = make_dataset(N, train_length) dataset_stats = calculate_dataset_statistics(ds) t = transform.Chain( trans=[ transform.InstanceSplitter( target_field=transform.FieldName.TARGET, is_pad_field=transform.FieldName.IS_PAD, start_field=transform.FieldName.START, forecast_start_field=transform.FieldName.FORECAST_START, train_sampler=transform.BucketInstanceSampler( dataset_stats.scale_histogram ), past_length=train_length, future_length=pred_length, pick_incomplete=True, ) ] ) assert_serializable(t) scale_hist = ScaleHistogram() repetition = 200 for i in range(repetition): for data in t(iter(ds), is_train=True): target_values = data["past_target"] # for simplicity, discard values that are zeros to avoid confusion with padding target_values = target_values[target_values > 0] scale_hist.add(target_values) expected_values = {i: repetition for i in range(1, N)} found_values = scale_hist.bin_counts for i in range(1, N): assert abs( expected_values[i] - found_values[i] < expected_values[i] * 0.3 )
def test_dataset_histogram(self) -> None: # generates 2 ** N - 1 timeseries with constant increasing values N = 6 n = 2 ** N - 1 T = 5 targets = np.ones((n, T)) for i in range(0, n): targets[i, :] = targets[i, :] * i # FIXME: the cast below is a hack to make mypy happy timeseries = cast( Dataset, [make_time_series(target=targets[i, :]) for i in range(n)] ) found = calculate_dataset_statistics(timeseries) hist = found.scale_histogram.bin_counts for i in range(0, N): assert i in hist assert hist[i] == 2 ** i
def test_dataset_statistics_exceptions(self) -> None: def check_error_message(expected_regex, dataset) -> None: with self.assertRaisesRegex(GluonTSDataError, expected_regex): calculate_dataset_statistics(dataset) check_error_message('Time series dataset is empty!', []) check_error_message( 'Only empty time series found in the dataset!', [make_time_series(target=np.random.randint(0, 10, 0))], ) # different number of dynamic_feat check_error_message( 'Found instances with different number of features in ' 'dynamic_feat, found one with 2 and another with 1.', [ make_time_series(num_dynamic_feat=2), make_time_series(num_dynamic_feat=1), ], ) # different number of dynamic_feat check_error_message( 'Found instances with different number of features in ' 'dynamic_feat, found one with 0 and another with 1.', [ make_time_series(num_dynamic_feat=0), make_time_series(num_dynamic_feat=1), ], ) # different number of dynamic_feat check_error_message( 'dynamic_feat was found for some instances but not others.', [ make_time_series(num_dynamic_feat=1), make_time_series(num_dynamic_feat=0), ], ) # infinite target # check_error_message( # 'Target values have to be finite (e.g., not "inf", "-inf", ' # '"nan", or null) and cannot exceed single precision floating ' # 'point range.', # [make_time_series(target=np.full(20, np.inf))] # ) # infinite dynamic_feat inf_dynamic_feat = np.full((2, len(target)), np.inf) check_error_message( 'Features values have to be finite and cannot exceed single ' 'precision floating point range.', [ ts( start=start, target=target, dynamic_feat=inf_dynamic_feat, cat=[0, 1], ) ], ) # cat different length check_error_message( 'Not all cat vectors have the same length 2 != 1.', [ts(start, target, [0, 1]), ts(start, target, [1])], ) # cat different length check_error_message( 'Each feature in dynamic_feat has to have the same length as the ' 'target. Found an instance with dynamic_feat of length 1 and a ' 'target of length 20.', [ ts(start, target, [0, 1], dynamic_feat=np.ones((1, 1))), ts(start, target, [1], dynamic_feat=np.ones((1, 1))), ], ) calculate_dataset_statistics( # FIXME: the cast below is a hack to make mypy happy cast( Dataset, [ make_time_series(num_dynamic_feat=2), make_time_series(num_dynamic_feat=2), ], ) ) calculate_dataset_statistics( # FIXME: the cast below is a hack to make mypy happy cast( Dataset, [ make_time_series(num_dynamic_feat=0), make_time_series(num_dynamic_feat=0), ], ) )
def calc_stats(self) -> DatasetStatistics: return calculate_dataset_statistics(self)
def backtest_metrics( train_dataset: Optional[Dataset], test_dataset: Dataset, forecaster: Union[Estimator, Predictor], evaluator=Evaluator( quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9) ), num_eval_samples: int = 100, logging_file: Optional[str] = None, use_symbol_block_predictor: bool = False, ): """ Parameters ---------- train_dataset Dataset to use for training. test_dataset Dataset to use for testing. forecaster An estimator or a predictor to use for generating predictions. evaluator Evaluator to use. num_eval_samples Number of samples to use when generating sample-based forecasts. logging_file If specified, information of the backtest is redirected to this file. use_symbol_block_predictor Use a :class:`SymbolBlockPredictor` during testing. Returns ------- tuple A tuple of aggregate metrics and per-time-series metrics obtained by training `forecaster` on `train_dataset` and evaluating the resulting `evaluator` provided on the `test_dataset`. """ if logging_file is not None: log_formatter = logging.Formatter( "[%(asctime)s %(levelname)s %(thread)d] %(message)s", datefmt="%m/%d/%Y %H:%M:%S", ) logger = logging.getLogger(__name__) handler = logging.FileHandler(logging_file) handler.setFormatter(log_formatter) logger.addHandler(handler) else: logger = logging.getLogger(__name__) if train_dataset is not None: train_statistics = calculate_dataset_statistics(train_dataset) serialize_message(logger, train_dataset_stats_key, train_statistics) test_statistics = calculate_dataset_statistics(test_dataset) serialize_message(logger, test_dataset_stats_key, test_statistics) if isinstance(forecaster, Estimator): serialize_message(logger, estimator_key, forecaster) predictor = forecaster.train(train_dataset) if isinstance(forecaster, GluonEstimator) and isinstance( predictor, GluonPredictor ): inference_data_loader = InferenceDataLoader( dataset=test_dataset, transform=predictor.input_transform, batch_size=forecaster.trainer.batch_size, ctx=forecaster.trainer.ctx, float_type=forecaster.float_type, ) if forecaster.trainer.hybridize: predictor.hybridize(batch=next(iter(inference_data_loader))) if use_symbol_block_predictor: predictor = predictor.as_symbol_block_predictor( batch=next(iter(inference_data_loader)) ) else: predictor = forecaster forecast_it, ts_it = make_evaluation_predictions( test_dataset, predictor=predictor, num_eval_samples=num_eval_samples ) agg_metrics, item_metrics = evaluator( ts_it, forecast_it, num_series=len(test_dataset) ) # we only log aggregate metrics for now as item metrics may be very large for name, value in agg_metrics.items(): serialize_message(logger, f"metric-{name}", value) if logging_file is not None: # Close the file handler to avoid letting the file open. # https://stackoverflow.com/questions/24816456/python-logging-wont-shutdown logger.removeHandler(handler) del logger, handler return agg_metrics, item_metrics
def test_dataset_statistics(self) -> None: num_time_series = 3 num_time_observations = 10 num_feat_dynamic_real = 2 num_past_feat_dynamic_real = 3 num_feat_dynamic_cat = 2 num_missing_values = 0 # use integers to avoid float conversion that can fail comparison np.random.seed(0) targets = np.random.randint( 0, 10, (num_time_series - 1, num_time_observations) ) scale_histogram = ScaleHistogram() for i in range(num_time_series - 1): scale_histogram.add(targets[i, :]) scale_histogram.add([]) expected = DatasetStatistics( integer_dataset=True, num_time_series=num_time_series, # includes empty array num_time_observations=targets.size, mean_target_length=num_time_observations * (num_time_series - 1) / num_time_series, max_target_length=num_time_observations, min_target=targets.min(), mean_target=targets.mean(), mean_abs_target=targets.mean(), max_target=targets.max(), feat_static_real=[{0.1}, {0.2, 0.3}], feat_static_cat=[{1}, {2, 3}], num_feat_dynamic_real=num_feat_dynamic_real, num_past_feat_dynamic_real=num_past_feat_dynamic_real, num_feat_dynamic_cat=num_feat_dynamic_cat, num_missing_values=num_missing_values, scale_histogram=scale_histogram, ) # FIXME: the cast below is a hack to make mypy happy timeseries = cast( Dataset, [ make_time_series( target=targets[0, :], feat_static_cat=[1, 2], feat_static_real=[0.1, 0.2], num_feat_dynamic_cat=num_feat_dynamic_cat, num_feat_dynamic_real=num_feat_dynamic_real, num_past_feat_dynamic_real=num_past_feat_dynamic_real, ), make_time_series( target=targets[1, :], feat_static_cat=[1, 3], feat_static_real=[0.1, 0.3], num_feat_dynamic_cat=num_feat_dynamic_cat, num_feat_dynamic_real=num_feat_dynamic_real, num_past_feat_dynamic_real=num_past_feat_dynamic_real, ), make_time_series( target=np.array([]), feat_static_cat=[1, 3], feat_static_real=[0.1, 0.3], num_feat_dynamic_cat=num_feat_dynamic_cat, num_feat_dynamic_real=num_feat_dynamic_real, num_past_feat_dynamic_real=num_past_feat_dynamic_real, ), ], ) found = calculate_dataset_statistics(timeseries) assert expected == found
def check_error_message(expected_regex, dataset) -> None: with self.assertRaisesRegex(GluonTSDataError, expected_regex): calculate_dataset_statistics(dataset)
def test_dataset_statistics_exceptions(self) -> None: def check_error_message(expected_regex, dataset) -> None: with self.assertRaisesRegex(GluonTSDataError, expected_regex): calculate_dataset_statistics(dataset) check_error_message("Time series dataset is empty!", []) check_error_message( "Only empty time series found in the dataset!", [make_time_series(target=np.random.randint(0, 10, 0))], ) # infinite target # check_error_message( # "Target values have to be finite (e.g., not inf, -inf, " # "or None) and cannot exceed single precision floating " # "point range.", # [make_time_series(target=np.full(20, np.inf))] # ) # different number of feat_dynamic_{cat, real} check_error_message( "Found instances with different number of features in " "feat_dynamic_cat, found one with 2 and another with 1.", [ make_time_series(num_feat_dynamic_cat=2), make_time_series(num_feat_dynamic_cat=1), ], ) check_error_message( "Found instances with different number of features in " "feat_dynamic_cat, found one with 0 and another with 1.", [ make_time_series(num_feat_dynamic_cat=0), make_time_series(num_feat_dynamic_cat=1), ], ) check_error_message( "feat_dynamic_cat was found for some instances but not others.", [ make_time_series(num_feat_dynamic_cat=1), make_time_series(num_feat_dynamic_cat=0), ], ) check_error_message( "Found instances with different number of features in " "feat_dynamic_real, found one with 2 and another with 1.", [ make_time_series(num_feat_dynamic_real=2), make_time_series(num_feat_dynamic_real=1), ], ) check_error_message( "Found instances with different number of features in " "feat_dynamic_real, found one with 0 and another with 1.", [ make_time_series(num_feat_dynamic_real=0), make_time_series(num_feat_dynamic_real=1), ], ) check_error_message( "feat_dynamic_real was found for some instances but not others.", [ make_time_series(num_feat_dynamic_real=1), make_time_series(num_feat_dynamic_real=0), ], ) # infinite feat_dynamic_{cat,real} inf_dynamic_feat = np.full((2, len(target)), np.inf) check_error_message( "Features values have to be finite and cannot exceed single " "precision floating point range.", [ ts( start, target, feat_dynamic_cat=inf_dynamic_feat, feat_static_cat=[0, 1], ) ], ) check_error_message( "Features values have to be finite and cannot exceed single " "precision floating point range.", [ ts( start, target, feat_dynamic_real=inf_dynamic_feat, feat_static_cat=[0, 1], ) ], ) # feat_dynamic_{cat, real} different length from target check_error_message( "Each feature in feat_dynamic_cat has to have the same length as the " "target. Found an instance with feat_dynamic_cat of length 1 and a " "target of length 20.", [ ts( start=start, target=target, feat_static_cat=[0, 1], feat_dynamic_cat=np.ones((1, 1)), ) ], ) check_error_message( "Each feature in feat_dynamic_real has to have the same length as the " "target. Found an instance with feat_dynamic_real of length 1 and a " "target of length 20.", [ ts( start=start, target=target, feat_static_cat=[0, 1], feat_dynamic_real=np.ones((1, 1)), ) ], ) # feat_static_{cat, real} different length check_error_message( "Not all feat_static_cat vectors have the same length 2 != 1.", [ ts(start=start, target=target, feat_static_cat=[0, 1]), ts(start=start, target=target, feat_static_cat=[1]), ], ) check_error_message( "Not all feat_static_real vectors have the same length 2 != 1.", [ ts(start=start, target=target, feat_static_real=[0, 1]), ts(start=start, target=target, feat_static_real=[1]), ], ) calculate_dataset_statistics( # FIXME: the cast below is a hack to make mypy happy cast( Dataset, [ make_time_series(num_feat_dynamic_cat=2), make_time_series(num_feat_dynamic_cat=2), ], ) ) calculate_dataset_statistics( # FIXME: the cast below is a hack to make mypy happy cast( Dataset, [ make_time_series(num_feat_dynamic_cat=0), make_time_series(num_feat_dynamic_cat=0), ], ) )
def backtest_metrics( train_dataset: Optional[Dataset], test_dataset: Dataset, forecaster: Union[Estimator, Predictor], evaluator=Evaluator( quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9) ), num_samples: int = 100, logging_file: Optional[str] = None, use_symbol_block_predictor: Optional[bool] = False, num_workers: Optional[int] = None, num_prefetch: Optional[int] = None, **kwargs, ): """ Parameters ---------- train_dataset Dataset to use for training. test_dataset Dataset to use for testing. forecaster An estimator or a predictor to use for generating predictions. evaluator Evaluator to use. num_samples Number of samples to use when generating sample-based forecasts. logging_file If specified, information of the backtest is redirected to this file. use_symbol_block_predictor Use a :class:`SymbolBlockPredictor` during testing. num_workers The number of multiprocessing workers to use for data preprocessing. By default 0, in which case no multiprocessing will be utilized. num_prefetch The number of prefetching batches only works if `num_workers` > 0. If `prefetch` > 0, it allow worker process to prefetch certain batches before acquiring data from iterators. Note that using large prefetching batch will provide smoother bootstrapping performance, but will consume more shared_memory. Using smaller number may forfeit the purpose of using multiple worker processes, try reduce `num_workers` in this case. By default it defaults to `num_workers * 2`. Returns ------- tuple A tuple of aggregate metrics and per-time-series metrics obtained by training `forecaster` on `train_dataset` and evaluating the resulting `evaluator` provided on the `test_dataset`. """ if logging_file is not None: log_formatter = logging.Formatter( "[%(asctime)s %(levelname)s %(thread)d] %(message)s", datefmt="%m/%d/%Y %H:%M:%S", ) logger = logging.getLogger(__name__) handler = logging.FileHandler(logging_file) handler.setFormatter(log_formatter) logger.addHandler(handler) else: logger = logging.getLogger(__name__) if train_dataset is not None: train_statistics = calculate_dataset_statistics(train_dataset) serialize_message(logger, train_dataset_stats_key, train_statistics) test_statistics = calculate_dataset_statistics(test_dataset) serialize_message(logger, test_dataset_stats_key, test_statistics) if isinstance(forecaster, Estimator): serialize_message(logger, estimator_key, forecaster) assert train_dataset is not None predictor = forecaster.train(train_dataset) if isinstance(forecaster, GluonEstimator) and isinstance( predictor, GluonPredictor ): inference_data_loader = InferenceDataLoader( dataset=test_dataset, transform=predictor.input_transform, batch_size=forecaster.trainer.batch_size, ctx=forecaster.trainer.ctx, dtype=forecaster.dtype, num_workers=num_workers, num_prefetch=num_prefetch, **kwargs, ) if forecaster.trainer.hybridize: predictor.hybridize(batch=next(iter(inference_data_loader))) if use_symbol_block_predictor: predictor = predictor.as_symbol_block_predictor( batch=next(iter(inference_data_loader)) ) else: predictor = forecaster forecast_it, ts_it = make_evaluation_predictions( test_dataset, predictor=predictor, num_samples=num_samples ) agg_metrics, item_metrics = evaluator( ts_it, forecast_it, num_series=maybe_len(test_dataset) ) # we only log aggregate metrics for now as item metrics may be very large for name, value in agg_metrics.items(): serialize_message(logger, f"metric-{name}", value) if logging_file is not None: # Close the file handler to avoid letting the file open. # https://stackoverflow.com/questions/24816456/python-logging-wont-shutdown logger.removeHandler(handler) del logger, handler return agg_metrics, item_metrics
def backtest_metrics( test_dataset: Dataset, predictor: Predictor, evaluator=Evaluator(quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)), num_samples: int = 100, logging_file: Optional[str] = None, ): """ Parameters ---------- test_dataset Dataset to use for testing. predictor The predictor to test. evaluator Evaluator to use. num_samples Number of samples to use when generating sample-based forecasts. logging_file If specified, information of the backtest is redirected to this file. Returns ------- tuple A tuple of aggregate metrics and per-time-series metrics obtained by training `forecaster` on `train_dataset` and evaluating the resulting `evaluator` provided on the `test_dataset`. """ if logging_file is not None: log_formatter = logging.Formatter( "[%(asctime)s %(levelname)s %(thread)d] %(message)s", datefmt="%m/%d/%Y %H:%M:%S", ) logger = logging.getLogger(__name__) handler = logging.FileHandler(logging_file) handler.setFormatter(log_formatter) logger.addHandler(handler) else: logger = logging.getLogger(__name__) test_statistics = calculate_dataset_statistics(test_dataset) serialize_message(logger, test_dataset_stats_key, test_statistics) forecast_it, ts_it = make_evaluation_predictions(test_dataset, predictor=predictor, num_samples=num_samples) agg_metrics, item_metrics = evaluator(ts_it, forecast_it, num_series=maybe_len(test_dataset)) # we only log aggregate metrics for now as item metrics may be very large for name, value in agg_metrics.items(): serialize_message(logger, f"metric-{name}", value) if logging_file is not None: # Close the file handler to avoid letting the file open. # https://stackoverflow.com/questions/24816456/python-logging-wont-shutdown logger.removeHandler(handler) del logger, handler return agg_metrics, item_metrics