def load_multivariate_constant_dataset(): metadata, train_ds, test_ds = constant_dataset() grouper_train = MultivariateGrouper(max_target_dim=NUM_SERIES) grouper_test = MultivariateGrouper(max_target_dim=NUM_SERIES) return TrainDatasets( metadata=metadata, train=grouper_train(train_ds), test=grouper_test(test_ds), )
def load_multivariate_constant_dataset(): dataset_info, train_ds, test_ds = constant_dataset() grouper_train = MultivariateGrouper(max_target_dim=10) grouper_test = MultivariateGrouper(num_test_dates=1, max_target_dim=10) metadata = dataset_info.metadata metadata.prediction_length = dataset_info.prediction_length return TrainDatasets( metadata=dataset_info.metadata, train=grouper_train(train_ds), test=grouper_test(test_ds), )
def test_shuffle_iter() -> None: # test with range data = [{str(i): str(i)} for i in range(20)] shuffled_data = ShuffleIter(base_iterator=iter(data), shuffle_buffer_length=10) assert len(list(shuffled_data)) == 20 # test with constant gluonts dataset ds_info, train_ds, test_ds = constant_dataset() base_iter, base_iter_backup = itertools.tee(iter(train_ds), 2) shuffled_data = ShuffleIter(base_iterator=base_iter, shuffle_buffer_length=5) assert len(list(shuffled_data)) == len(list(base_iter_backup))
def test_max_normalize(): info, train_ds, test_ds = constant_dataset() datasets = TrainDatasets(info.metadata, train_ds, test_ds) normalize = MaxNormalize(datasets).apply() assert normalize.datasets.metadata == datasets.metadata for i, train_data in enumerate(normalize.datasets.train): train = train_data["target"] if i == 0: assert np.all(train == np.zeros(len(train), dtype=np.float32)) else: assert np.all(train == np.ones(len(train), dtype=np.float32)) assert normalize.datasets.test is not None for i, test_data in enumerate(normalize.datasets.test): test = test_data["target"] if i == 0: assert np.all(test == np.zeros(len(test), dtype=np.float32)) else: assert np.all(test == np.ones(len(test), dtype=np.float32))
def test_benchmark(caplog): # makes sure that information logged can be reconstructed from previous # logs with caplog.at_level(logging.DEBUG): dataset_info, train_ds, test_ds = constant_dataset() estimator = make_estimator(dataset_info.metadata.freq, dataset_info.prediction_length) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) backtest_metrics(train_ds, test_ds, estimator, evaluator) train_stats = calculate_dataset_statistics(train_ds) test_stats = calculate_dataset_statistics(test_ds) log_info = BacktestInformation.make_from_log_contents(caplog.text) assert train_stats == log_info.train_dataset_stats assert test_stats == log_info.test_dataset_stats assert equals(estimator, log_info.estimator) print(log_info)
def test_general_functionality() -> None: ds_info, train_ds, test_ds = constant_dataset() freq = ds_info.metadata.freq prediction_length = ds_info.prediction_length trainer = Trainer(epochs=3, num_batches_per_epoch=5) estimator = DeepAREstimator(prediction_length=prediction_length, freq=freq, trainer=trainer) predictor = estimator.train(training_data=train_ds) agg_metrics, item_metrics = backtest_metrics( test_dataset=test_ds, predictor=predictor, evaluator=Evaluator(calculate_owa=False), ) # just some sanity check assert (agg_metrics is not None and item_metrics is not None ), "Metrics should not be None if everything went smooth."
def test_forecast_parser(): # verify that logged for estimator, datasets and metrics can be recovered # from their string representation dataset_info, train_ds, test_ds = constant_dataset() estimator = make_estimator(dataset_info.metadata.freq, dataset_info.prediction_length) assert repr(estimator) == repr(load_code(repr(estimator))) stats = calculate_dataset_statistics(train_ds) assert stats == eval(repr(stats)) # TODO: use load evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) agg_metrics, _ = backtest_metrics(train_ds, test_ds, estimator, evaluator) # reset infinite metrics to 0 (otherwise the assertion below fails) for key, val in agg_metrics.items(): if not math.isfinite(val): agg_metrics[key] = 0.0 assert agg_metrics == load_code(dump_code(agg_metrics))
def test_benchmark(caplog): # makes sure that information logged can be reconstructed from previous # logs caplog.set_level(logging.DEBUG, logger='log.txt') dataset_info, train_ds, test_ds = constant_dataset() estimator = make_estimator(dataset_info.metadata.time_granularity, dataset_info.prediction_length) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) backtest_metrics(train_ds, test_ds, estimator, evaluator) train_stats = calculate_dataset_statistics(train_ds) test_stats = calculate_dataset_statistics(test_ds) log_file = str(Path(__file__).parent / 'log.txt') log_info = BacktestInformation.make_from_log(log_file) assert train_stats == log_info.train_dataset_stats assert test_stats == log_info.test_dataset_stats assert equals(estimator, log_info.estimator) print(log_info)
def test_appendix_c(): """ Test GluonTS paper examples from arxiv paper: https://arxiv.org/abs/1906.05264 Appendix C """ from typing import List from mxnet import gluon from gluonts.model.estimator import GluonEstimator from gluonts.model.predictor import Predictor, RepresentableBlockPredictor from gluonts.trainer import Trainer from gluonts.transform import ( InstanceSplitter, FieldName, Transformation, ExpectedNumInstanceSampler, ) from gluonts.core.component import validated from gluonts.support.util import copy_parameters class MyTrainNetwork(gluon.HybridBlock): def __init__(self, prediction_length, cells, act_type, **kwargs): super().__init__(**kwargs) self.prediction_length = prediction_length with self.name_scope(): # Set up a network that predicts the target self.nn = gluon.nn.HybridSequential() for c in cells: self.nn.add(gluon.nn.Dense(units=c, activation=act_type)) self.nn.add( gluon.nn.Dense(units=self.prediction_length, activation=act_type)) def hybrid_forward(self, F, past_target, future_target): prediction = self.nn(past_target) # calculate L1 loss to learn the median return (prediction - future_target).abs().mean(axis=-1) class MyPredNetwork(MyTrainNetwork): # The prediction network only receives # past target and returns predictions def hybrid_forward(self, F, past_target): prediction = self.nn(past_target) return prediction.expand_dims(axis=1) class MyEstimator(GluonEstimator): @validated() def __init__( self, freq: str, prediction_length: int, act_type: str = "relu", context_length: int = 30, cells: List[int] = [40, 40, 40], trainer: Trainer = Trainer(epochs=10), ) -> None: super().__init__(trainer=trainer) self.freq = freq self.prediction_length = prediction_length self.act_type = act_type self.context_length = context_length self.cells = cells def create_training_network(self) -> MyTrainNetwork: return MyTrainNetwork( prediction_length=self.prediction_length, cells=self.cells, act_type=self.act_type, ) def create_predictor( self, transformation: Transformation, trained_network: gluon.HybridBlock, ) -> Predictor: prediction_network = MyPredNetwork( prediction_length=self.prediction_length, cells=self.cells, act_type=self.act_type, ) copy_parameters(trained_network, prediction_network) return RepresentableBlockPredictor( input_transform=transformation, prediction_net=prediction_network, batch_size=self.trainer.batch_size, freq=self.freq, prediction_length=self.prediction_length, ctx=self.trainer.ctx, ) def create_transformation(self): # Model specific input transform # Here we use a transformation that randomly # selects training samples from all series. return InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=ExpectedNumInstanceSampler(num_instances=1), past_length=self.context_length, future_length=self.prediction_length, ) from gluonts.trainer import Trainer from gluonts.evaluation import Evaluator from gluonts.evaluation.backtest import backtest_metrics dataset_info, train_ds, test_ds = constant_dataset() meta = dataset_info.metadata estimator = MyEstimator( freq=meta.time_granularity, prediction_length=1, trainer=Trainer(epochs=1, batch_size=32), ) predictor = estimator.train(train_ds) evaluator = Evaluator(quantiles=(0.1, 0.5, 0.9)) agg_metrics, item_metrics = backtest_metrics( train_dataset=train_ds, test_dataset=test_ds, forecaster=predictor, evaluator=evaluator, )
ref = data["target"][ -SEASON_LENGTH : -SEASON_LENGTH + PREDICTION_LENGTH ] assert forecast.start_date == forecast_start(data) # specifically for the seasonal naive we can test the supposed result directly if predictor_cls == SeasonalNaivePredictor: assert np.allclose(forecast.samples[0], ref) # CONSTANT DATASET TESTS: dataset_info, constant_train_ds, constant_test_ds = constant_dataset() CONSTANT_DATASET_FREQ = dataset_info.metadata.freq CONSTANT_DATASET_PREDICTION_LENGTH = dataset_info.prediction_length def seasonal_naive_predictor(): return ( SeasonalNaivePredictor, dict(prediction_length=CONSTANT_DATASET_PREDICTION_LENGTH), ) def naive_2_predictor(): return ( Naive2Predictor, dict(prediction_length=CONSTANT_DATASET_PREDICTION_LENGTH),
from itertools import islice from gluonts.distribution import StudentTOutput, StudentT from gluonts.dataset.artificial import constant_dataset from gluonts.dataset.loader import TrainDataLoader from gluonts.support.util import get_hybrid_forward_input_names from gluonts.model.deepar import DeepAREstimator import mxnet as mx from gluonts.trainer import Trainer ds_info, train_ds, test_ds = constant_dataset() freq = ds_info.metadata.time_granularity prediction_length = ds_info.prediction_length def test_shape(): """ Makes sure additional tensors can be accessed and have expected shapes """ prediction_length = ds_info.prediction_length estimator = DeepAREstimator( freq=freq, prediction_length=prediction_length, trainer=Trainer(epochs=1, num_batches_per_epoch=1), distr_output=StudentTOutput(), ) training_transformation, trained_net = estimator.train_model(train_ds) # todo adapt loader to anomaly detection use-case batch_size = 2
def generate_dataset(name): dataset = None if name == "constant": _, _, dataset = constant_dataset() elif name == "varying": # Tests edge cases # t0: start time of target # ts: start time of desired range # te: end time of desired range # t1: end time of target # ts < te, t0 <= t1 # # start time index of rolling window is 20 # end time index of rolling window is 24 # ts = 2000-01-01 20:00:00 # te = 2000-01-02 00:00:00 ds_list = [ { # test 1: ends after end time, te > t1 "target": [0.0] * 30, "start": pd.Timestamp(2000, 1, 1, 0, 0), }, { # test 2: ends at the end time, te == t1 "target": [0.0] * 25, "start": pd.Timestamp(2000, 1, 1, 0, 0), }, { # test 3: ends between start and end times, ts < t1 < te "target": [0.0] * 23, "start": pd.Timestamp(2000, 1, 1, 0, 0), }, { # test 4: ends on start time, ts == t1 "target": [0.0] * 20, "start": pd.Timestamp(2000, 1, 1, 0, 0), }, { # test 5: ends before start time, t1 < ts "target": [0.0] * 15, "start": pd.Timestamp(2000, 1, 1, 0, 0), }, { # test 6: starts on start ends after end, ts == t0, te > t1 "target": [0.0] * 10, "start": pd.Timestamp(2000, 1, 1, 20, 0), }, { # test 7: starts in between ts and te, ts < t0 < te < t1 "target": [0.0] * 10, "start": pd.Timestamp(2000, 1, 1, 22, 0), }, { # test 8: starts on end time, te == t0 "target": [0.0] * 10, "start": pd.Timestamp(2000, 1, 2, 0, 0), }, { # test 9: starts after end time, te < t0 "target": [0.0] * 10, "start": pd.Timestamp(2000, 1, 2, 1, 0), }, { # test 10: starts after ts & ends before te, ts < t0 < t1 < te "target": [0.0] * 3, "start": pd.Timestamp(2000, 1, 1, 21, 0), }, ] dataset = ListDataset(ds_list, "H") else: pytest.raises(ValueError) return dataset
) @pytest.mark.parametrize("data, n, expected", [([1, 2, 3], 7, [1, 2, 3, 1, 2, 3, 1]), ([], 4, [])]) def test_cyclic(data: Iterable, n: int, expected: List) -> None: cyclic_data = Cyclic(data) actual = list(itertools.islice(cyclic_data, n)) assert actual == expected @pytest.mark.parametrize( "data", [ range(20), constant_dataset()[1], ], ) def test_pseudo_shuffled(data: Iterable) -> None: list_data = list(data) shuffled_iter = PseudoShuffled(iter(list_data), shuffle_buffer_length=5) shuffled_data = list(shuffled_iter) assert len(shuffled_data) == len(list_data) assert all(d in shuffled_data for d in list_data) @pytest.mark.parametrize( "data, expected_elements_per_iteration", [ (Cached(range(4)), (list(range(4)), ) * 5), (batcher(range(10), 3), ([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]], [])),
def default_list_dataset(): yield constant_dataset()[1]
# or in the "license" file accompanying this file. This file is distributed # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either # express or implied. See the License for the specific language governing # permissions and limitations under the License. from typing import Iterable, List import itertools import pytest from gluonts.dataset.artificial import constant_dataset from gluonts.itertools import pseudo_shuffled, cyclic @pytest.mark.parametrize( "data, n, expected", [([1, 2, 3], 7, [1, 2, 3, 1, 2, 3, 1]), ([], 4, [])] ) def test_cyclic(data: Iterable, n: int, expected: List) -> None: cyclic_data = cyclic(data) actual = list(itertools.islice(cyclic_data, n)) assert actual == expected @pytest.mark.parametrize("data", [range(20), constant_dataset()[1],]) def test_pseudo_shuffled(data: Iterable) -> None: list_data = list(data) shuffled_iter = pseudo_shuffled(iter(list_data), shuffle_buffer_length=5) shuffled_data = list(shuffled_iter) assert len(shuffled_data) == len(list_data) assert all(d in shuffled_data for d in list_data)
def __enter__(self): return constant_dataset()[1]