def get_dataset_and_transformation(): # dont recompute, since expensive global _data_cache if _data_cache is not None: return _data_cache # create constant dataset with each time series having # variable length and unique constant integer entries dataset = ConstantDataset( num_steps=CD_NUM_STEPS, num_timeseries=CD_NUM_TIME_SERIES ) list_dataset = list(dataset.train) for i, ts in enumerate(list_dataset): ts["start"] = pd.Timestamp(ts_input=ts["start"], freq=dataset.freq) # get randomness in the ts lengths ts["target"] = np.array( ts["target"] * random.randint(1, CD_MAX_LEN_MULTIPLICATION_FACTOR) ) list_dataset = ListDataset(data_iter=list_dataset, freq=dataset.freq) list_dataset_pred_length = dataset.prediction_length # use every possible time point to split the time series transformation = Chain( [ InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=UniformSplitSampler( p=SPLITTING_SAMPLE_PROBABILITY # THIS IS IMPORTANT FOR THE TEST ), past_length=CONTEXT_LEN, future_length=list_dataset_pred_length, dummy_value=1.0, ), ] ) # original no multiprocessing processed validation dataset train_data_transformed_original = list( ValidationDataLoader( dataset=list_dataset, transform=transformation, batch_size=BATCH_SIZE, num_workers=0, # This is the crucial difference ctx=current_context(), ) ) _data_cache = ( list_dataset, transformation, list_dataset_pred_length, train_data_transformed_original, ) return _data_cache
from gluonts.dataset.common import TrainDatasets, load_datasets from gluonts.dataset.repository._artificial import generate_artificial_dataset from gluonts.dataset.repository._gp_copula_2019 import ( generate_gp_copula_dataset, ) from gluonts.dataset.repository._lstnet import generate_lstnet_dataset from gluonts.dataset.repository._m3 import generate_m3_dataset from gluonts.dataset.repository._m4 import generate_m4_dataset from gluonts.dataset.repository._m5 import generate_m5_dataset from gluonts.dataset.repository._tsf_datasets import ( generate_forecasting_dataset, ) from gluonts.support.util import get_download_path dataset_recipes = OrderedDict({ # each recipe generates a dataset given a path "constant": partial(generate_artificial_dataset, dataset=ConstantDataset()), "exchange_rate": partial(generate_lstnet_dataset, dataset_name="exchange_rate"), "solar-energy": partial(generate_lstnet_dataset, dataset_name="solar-energy"), "electricity": partial(generate_lstnet_dataset, dataset_name="electricity"), "traffic": partial(generate_lstnet_dataset, dataset_name="traffic"), "exchange_rate_nips": partial(generate_gp_copula_dataset, dataset_name="exchange_rate_nips"), "electricity_nips": partial(generate_gp_copula_dataset, dataset_name="electricity_nips"), "traffic_nips": partial(generate_gp_copula_dataset, dataset_name="traffic_nips"), "solar_nips":