Пример #1
0
def load_multivariate_constant_dataset():
    metadata, train_ds, test_ds = constant_dataset()
    grouper_train = MultivariateGrouper(max_target_dim=NUM_SERIES)
    grouper_test = MultivariateGrouper(max_target_dim=NUM_SERIES)
    return TrainDatasets(
        metadata=metadata,
        train=grouper_train(train_ds),
        test=grouper_test(test_ds),
    )
Пример #2
0
def load_multivariate_constant_dataset():
    dataset_info, train_ds, test_ds = constant_dataset()
    grouper_train = MultivariateGrouper(max_target_dim=10)
    grouper_test = MultivariateGrouper(num_test_dates=1, max_target_dim=10)
    metadata = dataset_info.metadata
    metadata.prediction_length = dataset_info.prediction_length
    return TrainDatasets(
        metadata=dataset_info.metadata,
        train=grouper_train(train_ds),
        test=grouper_test(test_ds),
    )
Пример #3
0
def test_shuffle_iter() -> None:
    # test with range
    data = [{str(i): str(i)} for i in range(20)]
    shuffled_data = ShuffleIter(base_iterator=iter(data),
                                shuffle_buffer_length=10)
    assert len(list(shuffled_data)) == 20

    # test with constant gluonts dataset
    ds_info, train_ds, test_ds = constant_dataset()
    base_iter, base_iter_backup = itertools.tee(iter(train_ds), 2)
    shuffled_data = ShuffleIter(base_iterator=base_iter,
                                shuffle_buffer_length=5)
    assert len(list(shuffled_data)) == len(list(base_iter_backup))
Пример #4
0
def test_max_normalize():
    info, train_ds, test_ds = constant_dataset()
    datasets = TrainDatasets(info.metadata, train_ds, test_ds)
    normalize = MaxNormalize(datasets).apply()
    assert normalize.datasets.metadata == datasets.metadata
    for i, train_data in enumerate(normalize.datasets.train):
        train = train_data["target"]
        if i == 0:
            assert np.all(train == np.zeros(len(train), dtype=np.float32))
        else:
            assert np.all(train == np.ones(len(train), dtype=np.float32))

    assert normalize.datasets.test is not None
    for i, test_data in enumerate(normalize.datasets.test):
        test = test_data["target"]
        if i == 0:
            assert np.all(test == np.zeros(len(test), dtype=np.float32))
        else:
            assert np.all(test == np.ones(len(test), dtype=np.float32))
Пример #5
0
def test_benchmark(caplog):
    # makes sure that information logged can be reconstructed from previous
    # logs

    with caplog.at_level(logging.DEBUG):
        dataset_info, train_ds, test_ds = constant_dataset()

        estimator = make_estimator(dataset_info.metadata.freq,
                                   dataset_info.prediction_length)
        evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
        backtest_metrics(train_ds, test_ds, estimator, evaluator)
        train_stats = calculate_dataset_statistics(train_ds)
        test_stats = calculate_dataset_statistics(test_ds)

    log_info = BacktestInformation.make_from_log_contents(caplog.text)

    assert train_stats == log_info.train_dataset_stats
    assert test_stats == log_info.test_dataset_stats
    assert equals(estimator, log_info.estimator)

    print(log_info)
def test_general_functionality() -> None:
    ds_info, train_ds, test_ds = constant_dataset()
    freq = ds_info.metadata.freq
    prediction_length = ds_info.prediction_length

    trainer = Trainer(epochs=3, num_batches_per_epoch=5)

    estimator = DeepAREstimator(prediction_length=prediction_length,
                                freq=freq,
                                trainer=trainer)

    predictor = estimator.train(training_data=train_ds)

    agg_metrics, item_metrics = backtest_metrics(
        test_dataset=test_ds,
        predictor=predictor,
        evaluator=Evaluator(calculate_owa=False),
    )

    # just some sanity check
    assert (agg_metrics is not None and item_metrics is not None
            ), "Metrics should not be None if everything went smooth."
Пример #7
0
def test_forecast_parser():
    # verify that logged for estimator, datasets and metrics can be recovered
    # from their string representation

    dataset_info, train_ds, test_ds = constant_dataset()

    estimator = make_estimator(dataset_info.metadata.freq,
                               dataset_info.prediction_length)
    assert repr(estimator) == repr(load_code(repr(estimator)))

    stats = calculate_dataset_statistics(train_ds)
    assert stats == eval(repr(stats))  # TODO: use load

    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, _ = backtest_metrics(train_ds, test_ds, estimator, evaluator)

    # reset infinite metrics to 0 (otherwise the assertion below fails)
    for key, val in agg_metrics.items():
        if not math.isfinite(val):
            agg_metrics[key] = 0.0

    assert agg_metrics == load_code(dump_code(agg_metrics))
Пример #8
0
def test_benchmark(caplog):
    # makes sure that information logged can be reconstructed from previous
    # logs

    caplog.set_level(logging.DEBUG, logger='log.txt')

    dataset_info, train_ds, test_ds = constant_dataset()

    estimator = make_estimator(dataset_info.metadata.time_granularity,
                               dataset_info.prediction_length)
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    backtest_metrics(train_ds, test_ds, estimator, evaluator)
    train_stats = calculate_dataset_statistics(train_ds)
    test_stats = calculate_dataset_statistics(test_ds)
    log_file = str(Path(__file__).parent / 'log.txt')
    log_info = BacktestInformation.make_from_log(log_file)

    assert train_stats == log_info.train_dataset_stats
    assert test_stats == log_info.test_dataset_stats
    assert equals(estimator, log_info.estimator)

    print(log_info)
Пример #9
0
def test_appendix_c():
    """
    Test GluonTS paper examples from arxiv paper:
    https://arxiv.org/abs/1906.05264

    Appendix C
    """
    from typing import List
    from mxnet import gluon
    from gluonts.model.estimator import GluonEstimator
    from gluonts.model.predictor import Predictor, RepresentableBlockPredictor
    from gluonts.trainer import Trainer
    from gluonts.transform import (
        InstanceSplitter,
        FieldName,
        Transformation,
        ExpectedNumInstanceSampler,
    )
    from gluonts.core.component import validated
    from gluonts.support.util import copy_parameters

    class MyTrainNetwork(gluon.HybridBlock):
        def __init__(self, prediction_length, cells, act_type, **kwargs):
            super().__init__(**kwargs)
            self.prediction_length = prediction_length
            with self.name_scope():
                # Set up a network that predicts the target
                self.nn = gluon.nn.HybridSequential()
                for c in cells:
                    self.nn.add(gluon.nn.Dense(units=c, activation=act_type))
                    self.nn.add(
                        gluon.nn.Dense(units=self.prediction_length,
                                       activation=act_type))

        def hybrid_forward(self, F, past_target, future_target):
            prediction = self.nn(past_target)
            # calculate L1 loss to learn the median
            return (prediction - future_target).abs().mean(axis=-1)

    class MyPredNetwork(MyTrainNetwork):
        # The prediction network only receives
        # past target and returns predictions
        def hybrid_forward(self, F, past_target):
            prediction = self.nn(past_target)
            return prediction.expand_dims(axis=1)

    class MyEstimator(GluonEstimator):
        @validated()
        def __init__(
                self,
                freq: str,
                prediction_length: int,
                act_type: str = "relu",
                context_length: int = 30,
                cells: List[int] = [40, 40, 40],
                trainer: Trainer = Trainer(epochs=10),
        ) -> None:
            super().__init__(trainer=trainer)
            self.freq = freq
            self.prediction_length = prediction_length
            self.act_type = act_type
            self.context_length = context_length
            self.cells = cells

        def create_training_network(self) -> MyTrainNetwork:
            return MyTrainNetwork(
                prediction_length=self.prediction_length,
                cells=self.cells,
                act_type=self.act_type,
            )

        def create_predictor(
            self,
            transformation: Transformation,
            trained_network: gluon.HybridBlock,
        ) -> Predictor:
            prediction_network = MyPredNetwork(
                prediction_length=self.prediction_length,
                cells=self.cells,
                act_type=self.act_type,
            )

            copy_parameters(trained_network, prediction_network)

            return RepresentableBlockPredictor(
                input_transform=transformation,
                prediction_net=prediction_network,
                batch_size=self.trainer.batch_size,
                freq=self.freq,
                prediction_length=self.prediction_length,
                ctx=self.trainer.ctx,
            )

        def create_transformation(self):
            # Model specific input transform
            # Here we use a transformation that randomly
            # selects training samples from all series.
            return InstanceSplitter(
                target_field=FieldName.TARGET,
                is_pad_field=FieldName.IS_PAD,
                start_field=FieldName.START,
                forecast_start_field=FieldName.FORECAST_START,
                train_sampler=ExpectedNumInstanceSampler(num_instances=1),
                past_length=self.context_length,
                future_length=self.prediction_length,
            )

    from gluonts.trainer import Trainer
    from gluonts.evaluation import Evaluator
    from gluonts.evaluation.backtest import backtest_metrics

    dataset_info, train_ds, test_ds = constant_dataset()

    meta = dataset_info.metadata
    estimator = MyEstimator(
        freq=meta.time_granularity,
        prediction_length=1,
        trainer=Trainer(epochs=1, batch_size=32),
    )
    predictor = estimator.train(train_ds)

    evaluator = Evaluator(quantiles=(0.1, 0.5, 0.9))
    agg_metrics, item_metrics = backtest_metrics(
        train_dataset=train_ds,
        test_dataset=test_ds,
        forecaster=predictor,
        evaluator=evaluator,
    )
Пример #10
0
        ref = data["target"][
            -SEASON_LENGTH : -SEASON_LENGTH + PREDICTION_LENGTH
        ]

        assert forecast.start_date == forecast_start(data)

        # specifically for the seasonal naive we can test the supposed result directly
        if predictor_cls == SeasonalNaivePredictor:
            assert np.allclose(forecast.samples[0], ref)


# CONSTANT DATASET TESTS:


dataset_info, constant_train_ds, constant_test_ds = constant_dataset()
CONSTANT_DATASET_FREQ = dataset_info.metadata.freq
CONSTANT_DATASET_PREDICTION_LENGTH = dataset_info.prediction_length


def seasonal_naive_predictor():
    return (
        SeasonalNaivePredictor,
        dict(prediction_length=CONSTANT_DATASET_PREDICTION_LENGTH),
    )


def naive_2_predictor():
    return (
        Naive2Predictor,
        dict(prediction_length=CONSTANT_DATASET_PREDICTION_LENGTH),
from itertools import islice

from gluonts.distribution import StudentTOutput, StudentT
from gluonts.dataset.artificial import constant_dataset
from gluonts.dataset.loader import TrainDataLoader
from gluonts.support.util import get_hybrid_forward_input_names
from gluonts.model.deepar import DeepAREstimator
import mxnet as mx
from gluonts.trainer import Trainer

ds_info, train_ds, test_ds = constant_dataset()
freq = ds_info.metadata.time_granularity
prediction_length = ds_info.prediction_length


def test_shape():
    """
    Makes sure additional tensors can be accessed and have expected shapes
    """
    prediction_length = ds_info.prediction_length
    estimator = DeepAREstimator(
        freq=freq,
        prediction_length=prediction_length,
        trainer=Trainer(epochs=1, num_batches_per_epoch=1),
        distr_output=StudentTOutput(),
    )

    training_transformation, trained_net = estimator.train_model(train_ds)

    # todo adapt loader to anomaly detection use-case
    batch_size = 2
Пример #12
0
def generate_dataset(name):
    dataset = None
    if name == "constant":
        _, _, dataset = constant_dataset()
    elif name == "varying":
        # Tests edge cases
        # t0: start time of target
        # ts: start time of desired range
        # te: end time of desired range
        # t1: end time of target
        # ts < te, t0 <= t1
        #
        # start time index of rolling window is 20
        # end time index of rolling window is 24
        # ts = 2000-01-01 20:00:00
        # te = 2000-01-02 00:00:00

        ds_list = [
            {  # test 1: ends after end time, te > t1
                "target": [0.0] * 30,
                "start": pd.Timestamp(2000, 1, 1, 0, 0),
            },
            {  # test 2: ends at the end time, te == t1
                "target": [0.0] * 25,
                "start": pd.Timestamp(2000, 1, 1, 0, 0),
            },
            {  # test 3: ends between start and end times, ts < t1 < te
                "target": [0.0] * 23,
                "start": pd.Timestamp(2000, 1, 1, 0, 0),
            },
            {  # test 4: ends on start time, ts == t1
                "target": [0.0] * 20,
                "start": pd.Timestamp(2000, 1, 1, 0, 0),
            },
            {  # test 5: ends before start time, t1 < ts
                "target": [0.0] * 15,
                "start": pd.Timestamp(2000, 1, 1, 0, 0),
            },
            {  # test 6: starts on start ends after end, ts == t0, te > t1
                "target": [0.0] * 10,
                "start": pd.Timestamp(2000, 1, 1, 20, 0),
            },
            {  # test 7: starts in between ts and te, ts < t0 < te < t1
                "target": [0.0] * 10,
                "start": pd.Timestamp(2000, 1, 1, 22, 0),
            },
            {  # test 8: starts on end time, te == t0
                "target": [0.0] * 10,
                "start": pd.Timestamp(2000, 1, 2, 0, 0),
            },
            {  # test 9: starts after end time, te < t0
                "target": [0.0] * 10,
                "start": pd.Timestamp(2000, 1, 2, 1, 0),
            },
            {  # test 10: starts after ts & ends before te, ts < t0 < t1 < te
                "target": [0.0] * 3,
                "start": pd.Timestamp(2000, 1, 1, 21, 0),
            },
        ]
        dataset = ListDataset(ds_list, "H")
    else:
        pytest.raises(ValueError)
    return dataset
Пример #13
0
)


@pytest.mark.parametrize("data, n, expected",
                         [([1, 2, 3], 7, [1, 2, 3, 1, 2, 3, 1]), ([], 4, [])])
def test_cyclic(data: Iterable, n: int, expected: List) -> None:
    cyclic_data = Cyclic(data)
    actual = list(itertools.islice(cyclic_data, n))
    assert actual == expected


@pytest.mark.parametrize(
    "data",
    [
        range(20),
        constant_dataset()[1],
    ],
)
def test_pseudo_shuffled(data: Iterable) -> None:
    list_data = list(data)
    shuffled_iter = PseudoShuffled(iter(list_data), shuffle_buffer_length=5)
    shuffled_data = list(shuffled_iter)
    assert len(shuffled_data) == len(list_data)
    assert all(d in shuffled_data for d in list_data)


@pytest.mark.parametrize(
    "data, expected_elements_per_iteration",
    [
        (Cached(range(4)), (list(range(4)), ) * 5),
        (batcher(range(10), 3), ([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]], [])),
Пример #14
0
def default_list_dataset():
    yield constant_dataset()[1]
Пример #15
0
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

from typing import Iterable, List
import itertools

import pytest

from gluonts.dataset.artificial import constant_dataset
from gluonts.itertools import pseudo_shuffled, cyclic


@pytest.mark.parametrize(
    "data, n, expected", [([1, 2, 3], 7, [1, 2, 3, 1, 2, 3, 1]), ([], 4, [])]
)
def test_cyclic(data: Iterable, n: int, expected: List) -> None:
    cyclic_data = cyclic(data)
    actual = list(itertools.islice(cyclic_data, n))
    assert actual == expected


@pytest.mark.parametrize("data", [range(20), constant_dataset()[1],])
def test_pseudo_shuffled(data: Iterable) -> None:
    list_data = list(data)
    shuffled_iter = pseudo_shuffled(iter(list_data), shuffle_buffer_length=5)
    shuffled_data = list(shuffled_iter)
    assert len(shuffled_data) == len(list_data)
    assert all(d in shuffled_data for d in list_data)
Пример #16
0
 def __enter__(self):
     return constant_dataset()[1]