def get_dataset_and_transformation():
    # dont recompute, since expensive
    global _data_cache
    if _data_cache is not None:
        return _data_cache

    # create constant dataset with each time series having
    # variable length and unique constant integer entries
    dataset = ConstantDataset(
        num_steps=CD_NUM_STEPS, num_timeseries=CD_NUM_TIME_SERIES
    )
    list_dataset = list(dataset.train)
    for i, ts in enumerate(list_dataset):
        ts["start"] = pd.Timestamp(ts_input=ts["start"], freq=dataset.freq)
        # get randomness in the ts lengths
        ts["target"] = np.array(
            ts["target"] * random.randint(1, CD_MAX_LEN_MULTIPLICATION_FACTOR)
        )
    list_dataset = ListDataset(data_iter=list_dataset, freq=dataset.freq)
    list_dataset_pred_length = dataset.prediction_length

    # use every possible time point to split the time series
    transformation = Chain(
        [
            InstanceSplitter(
                target_field=FieldName.TARGET,
                is_pad_field=FieldName.IS_PAD,
                start_field=FieldName.START,
                forecast_start_field=FieldName.FORECAST_START,
                train_sampler=UniformSplitSampler(
                    p=SPLITTING_SAMPLE_PROBABILITY  # THIS IS IMPORTANT FOR THE TEST
                ),
                past_length=CONTEXT_LEN,
                future_length=list_dataset_pred_length,
                dummy_value=1.0,
            ),
        ]
    )

    # original no multiprocessing processed validation dataset
    train_data_transformed_original = list(
        ValidationDataLoader(
            dataset=list_dataset,
            transform=transformation,
            batch_size=BATCH_SIZE,
            num_workers=0,  # This is the crucial difference
            ctx=current_context(),
        )
    )

    _data_cache = (
        list_dataset,
        transformation,
        list_dataset_pred_length,
        train_data_transformed_original,
    )

    return _data_cache
Пример #2
0
from gluonts.dataset.common import TrainDatasets, load_datasets
from gluonts.dataset.repository._artificial import generate_artificial_dataset
from gluonts.dataset.repository._gp_copula_2019 import (
    generate_gp_copula_dataset, )
from gluonts.dataset.repository._lstnet import generate_lstnet_dataset
from gluonts.dataset.repository._m3 import generate_m3_dataset
from gluonts.dataset.repository._m4 import generate_m4_dataset
from gluonts.dataset.repository._m5 import generate_m5_dataset
from gluonts.dataset.repository._tsf_datasets import (
    generate_forecasting_dataset, )
from gluonts.support.util import get_download_path

dataset_recipes = OrderedDict({
    # each recipe generates a dataset given a path
    "constant":
    partial(generate_artificial_dataset, dataset=ConstantDataset()),
    "exchange_rate":
    partial(generate_lstnet_dataset, dataset_name="exchange_rate"),
    "solar-energy":
    partial(generate_lstnet_dataset, dataset_name="solar-energy"),
    "electricity":
    partial(generate_lstnet_dataset, dataset_name="electricity"),
    "traffic":
    partial(generate_lstnet_dataset, dataset_name="traffic"),
    "exchange_rate_nips":
    partial(generate_gp_copula_dataset, dataset_name="exchange_rate_nips"),
    "electricity_nips":
    partial(generate_gp_copula_dataset, dataset_name="electricity_nips"),
    "traffic_nips":
    partial(generate_gp_copula_dataset, dataset_name="traffic_nips"),
    "solar_nips":