def test_metrics_multivariate_custom_eval_fn( timeseries, res, has_nans, eval_dims, input_type, eval_name, eval_fn, agg_str, fcst_type, ): ts_datastructure = pd.DataFrame evaluator = MultivariateEvaluator( quantiles=QUANTILES, eval_dims=eval_dims, target_agg_funcs={"sum": np.sum}, custom_eval_fn={eval_name: [eval_fn, agg_str, fcst_type]}, ) agg_metrics, item_metrics = calculate_metrics( timeseries, evaluator, ts_datastructure, has_nans=has_nans, forecaster=naive_multivariate_forecaster, input_type=input_type, ) for metric, score in agg_metrics.items(): if metric in res.keys(): assert np.isclose(score, res[metric], equal_nan=True), ( "Scores for the metric {} do not match: \nexpected: {} " "\nobtained: {}".format(metric, res[metric], score))
def test_metrics_multivariate( timeseries, res, has_nans, eval_dims, input_type ): ts_datastructure = pd.DataFrame evaluator = MultivariateEvaluator( quantiles=QUANTILES, eval_dims=eval_dims, target_agg_funcs={"sum": np.sum}, ) agg_metrics, item_metrics = calculate_metrics( timeseries, evaluator, ts_datastructure, has_nans=has_nans, forecaster=naive_multivariate_forecaster, input_type=input_type, ) for metric, score in agg_metrics.items(): if metric in res.keys(): assert abs(score - res[metric]) < 0.001, ( "Scores for the metric {} do not match: \nexpected: {} " "\nobtained: {}".format(metric, res[metric], score) )
def test_smoke( hybridize: bool, target_dim_sample: int, use_marginal_transformation: bool ): num_batches_per_epoch = 1 estimator = GPVAREstimator( distr_output=LowrankGPOutput(rank=2), num_cells=1, num_layers=1, pick_incomplete=True, prediction_length=metadata.prediction_length, target_dim=target_dim, target_dim_sample=target_dim_sample, freq=metadata.freq, use_marginal_transformation=use_marginal_transformation, trainer=Trainer( epochs=2, batch_size=10, learning_rate=1e-4, num_batches_per_epoch=num_batches_per_epoch, hybridize=hybridize, ), ) predictor = estimator.train(training_data=dataset.train) agg_metrics, _ = backtest_metrics( test_dataset=dataset.test, predictor=predictor, num_samples=10, evaluator=MultivariateEvaluator( quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9) ), ) assert agg_metrics["ND"] < 2.5
def test_lstnet( skip_size, ar_window, lead_time, prediction_length, hybridize, scaling, dtype, ): estimator = LSTNetEstimator( skip_size=skip_size, ar_window=ar_window, num_series=NUM_SERIES, channels=6, kernel_size=2, context_length=4, freq=freq, lead_time=lead_time, prediction_length=prediction_length, trainer=Trainer(epochs=1, batch_size=2, learning_rate=0.01, hybridize=hybridize), scaling=scaling, dtype=dtype, ) predictor = estimator.train(dataset.train) with tempfile.TemporaryDirectory() as directory: predictor.serialize(Path(directory)) predictor_copy = Predictor.deserialize(Path(directory)) assert predictor == predictor_copy forecast_it, ts_it = make_evaluation_predictions(dataset=dataset.test, predictor=predictor, num_samples=NUM_SAMPLES) forecasts = list(forecast_it) tss = list(ts_it) assert len(forecasts) == len(tss) == len(dataset.test) test_ds = dataset.test.list_data[0] for fct in forecasts: assert fct.freq == freq assert fct.samples.shape == ( NUM_SAMPLES, prediction_length, NUM_SERIES, ) assert (fct.start_date == pd.period_range( start=test_ds["start"], periods=test_ds["target"].shape[1], # number of test periods freq=freq, )[-prediction_length]) evaluator = MultivariateEvaluator( quantiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(dataset.test)) assert agg_metrics["ND"] < 1.0
def evaluate( model: Predictor, test_data: ListDataset, num_samples: int ) -> Tuple[List[Forecast], List[pd.Series], Dict[str, float], pd.DataFrame]: forecast_it, ts_it = make_evaluation_predictions(dataset=test_data, predictor=model, num_samples=num_samples) forecasts = list(forecast_it) tss = list(ts_it) evaluator = MultivariateEvaluator() agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_data)) return forecasts, tss, agg_metrics, item_metrics
def __init__( self, predictor: Predictor, test_dataset: ListDataset, original_dataset: ListDataset, multivariate: bool = True, num_samples: int = 100, ) -> None: self.predictor = predictor self.test_dataset = test_dataset self.original_series = list(original_dataset) self.num_samples = num_samples self.agg_metrics = None self.ind_metrics = None original_values = [ np.ma.masked_invalid(series['target']) for series in self.original_series ] if multivariate: if original_values[0].ndim > 1: self.evaluator = MultivariateEvaluator(eval_dims=[0]) original_values = [s[0] for s in original_values] else: self.evaluator = MultivariateEvaluator() else: self.evaluator = Evaluator() self.targets = [ series[-self.predictor.prediction_length:] for series in original_values ] self.past_datas = [ series[:-self.predictor.prediction_length] for series in original_values ]
def test_lstnet(skip_size, ar_window, horizon, prediction_length, hybridize, dtype): estimator = LSTNetEstimator( skip_size=skip_size, ar_window=ar_window, num_series=10, channels=6, kernel_size=3, context_length=4, freq=freq, horizon=horizon, prediction_length=prediction_length, trainer=Trainer(epochs=1, batch_size=2, learning_rate=0.01, hybridize=hybridize), dtype=dtype, ) predictor = estimator.train(dataset.train) forecast_it, ts_it = make_evaluation_predictions(dataset=dataset.test, predictor=predictor, num_samples=NUM_SAMPLES) forecasts = list(forecast_it) tss = list(ts_it) assert len(forecasts) == len(tss) == len(dataset.test) test_ds = dataset.test.list_data[0] for fct in forecasts: assert fct.freq == freq if estimator.horizon: assert fct.samples.shape == (NUM_SAMPLES, 1, NUM_SERIES) else: assert fct.samples.shape == ( NUM_SAMPLES, prediction_length, NUM_SERIES, ) assert (fct.start_date == pd.date_range( start=str(test_ds["start"]), periods=test_ds["target"].shape[1], # number of test periods freq=freq, closed="right", )[-(horizon or prediction_length)]) evaluator = MultivariateEvaluator( quantiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(dataset.test)) assert agg_metrics["ND"] < 1.5
def test_deepvar( distr_output, num_batches_per_epoch, Estimator, hybridize, use_marginal_transformation, ): estimator = Estimator( num_cells=20, num_layers=1, pick_incomplete=True, target_dim=target_dim, prediction_length=metadata.prediction_length, # target_dim=target_dim, freq=metadata.freq, distr_output=distr_output, scaling=False, use_marginal_transformation=use_marginal_transformation, trainer=Trainer( epochs=1, batch_size=8, learning_rate=1e-10, minimum_learning_rate=1e-13, num_batches_per_epoch=num_batches_per_epoch, hybridize=hybridize, ), ) predictor = estimator.train(training_data=dataset.train) agg_metrics, _ = backtest_metrics( test_dataset=dataset.test, predictor=predictor, evaluator=MultivariateEvaluator( quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9) ), ) assert agg_metrics["ND"] < 1.5
def test_deepvar_hierarchical( likelihood_weight, CRPS_weight, sample_LH, coherent_train_samples, coherent_pred_samples, warmstart_epoch_frac, ): estimator = DeepVARHierarchicalEstimator( freq=train_datasets.metadata.freq, prediction_length=prediction_length, target_dim=train_datasets.metadata.S.shape[0], S=train_datasets.metadata.S, likelihood_weight=likelihood_weight, CRPS_weight=CRPS_weight, sample_LH=sample_LH, coherent_train_samples=coherent_train_samples, coherent_pred_samples=coherent_pred_samples, warmstart_epoch_frac=warmstart_epoch_frac, trainer=Trainer( epochs=10, num_batches_per_epoch=1, hybridize=False, ), num_samples_for_loss=10, ) predictor = estimator.train(training_data=train_datasets.train) agg_metrics, _ = backtest_metrics( test_dataset=train_datasets.test, predictor=predictor, evaluator=MultivariateEvaluator(quantiles=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)), ) assert agg_metrics["ND"] < 1.5