def test_recipe_dataset(recipe) -> None: data = RecipeDataset( recipe=recipe, metadata=MetaData( freq="D", feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")], feat_static_cat=[ CategoricalFeatureInfo(name="foo", cardinality=10) ], feat_dynamic_real=[BasicFeatureInfo(name="binary_causal")], ), max_train_length=20, prediction_length=10, num_timeseries=10, trim_length_fun=lambda x, **kwargs: np.minimum( int(np.random.geometric(1 / (kwargs["train_length"] / 2))), kwargs["train_length"], ), ) generated = data.generate() generated_train = list(generated.train) generated_test = list(generated.test) train_lengths = np.array([len(x["target"]) for x in generated_train]) test_lengths = np.array([len(x["target"]) for x in generated_test]) assert np.all(test_lengths >= 10) assert np.all(test_lengths - train_lengths >= 10) assert len(list(generated.train)) == 10
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( freq='1H', feat_static_cat=[ CategoricalFeatureInfo( name='feat_static_cat_000', cardinality='10' ) ], feat_static_real=[BasicFeatureInfo(name='feat_static_real_000')], ) start_date = '2000-01-01 00:00:00' train_ds = ListDataset( data_iter=[ { 'item': str(i), 'start': start_date, 'target': [float(i)] * 24, 'feat_static_cat': [i], 'feat_static_real': [float(i)], } for i in range(10) ], freq=metadata.freq, ) test_ds = ListDataset( data_iter=[ { 'item': str(i), 'start': start_date, 'target': [float(i)] * 30, 'feat_static_cat': [i], 'feat_static_real': [float(i)], } for i in range(10) ], freq=metadata.freq, ) info = DatasetInfo( name='constant_dataset', metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( freq="1H", feat_static_cat=[ CategoricalFeatureInfo( name="feat_static_cat_000", cardinality="10" ) ], feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")], ) start_date = "2000-01-01 00:00:00" train_ds = ListDataset( data_iter=[ { "item": str(i), "start": start_date, "target": [float(i)] * 24, "feat_static_cat": [i], "feat_static_real": [float(i)], } for i in range(10) ], freq=metadata.freq, ) test_ds = ListDataset( data_iter=[ { "item": str(i), "start": start_date, "target": [float(i)] * 30, "feat_static_cat": [i], "feat_static_real": [float(i)], } for i in range(10) ], freq=metadata.freq, ) info = DatasetInfo( name="constant_dataset", metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def constant_dataset() -> Tuple[DatasetInfo, Dataset, Dataset]: metadata = MetaData( freq="1H", feat_static_cat=[ CategoricalFeatureInfo( name="feat_static_cat_000", cardinality="10" ) ], feat_static_real=[BasicFeatureInfo(name="feat_static_real_000")], ) start_date = "2000-01-01 00:00:00" train_ds = ListDataset( data_iter=[ { FieldName.ITEM_ID: str(i), FieldName.START: start_date, FieldName.TARGET: [float(i)] * 24, FieldName.FEAT_STATIC_CAT: [i], FieldName.FEAT_STATIC_REAL: [float(i)], } for i in range(10) ], freq=metadata.freq, ) test_ds = ListDataset( data_iter=[ { FieldName.ITEM_ID: str(i), FieldName.START: start_date, FieldName.TARGET: [float(i)] * 30, FieldName.FEAT_STATIC_CAT: [i], FieldName.FEAT_STATIC_REAL: [float(i)], } for i in range(10) ], freq=metadata.freq, ) info = DatasetInfo( name="constant_dataset", metadata=metadata, prediction_length=2, train_statistics=calculate_dataset_statistics(train_ds), test_statistics=calculate_dataset_statistics(test_ds), ) return info, train_ds, test_ds
def default_synthetic() -> Tuple[DatasetInfo, Dataset, Dataset]: recipe = [ (FieldName.TARGET, LinearTrend() + RandomGaussian()), (FieldName.FEAT_STATIC_CAT, RandomCat([10])), ( FieldName.FEAT_STATIC_REAL, ForEachCat(RandomGaussian(1, (10,)), FieldName.FEAT_STATIC_CAT) + RandomGaussian(0.1, (10,)), ), ] data = RecipeDataset( recipe=recipe, metadata=MetaData( freq="D", feat_static_real=[ BasicFeatureInfo(name=FieldName.FEAT_STATIC_REAL) ], feat_static_cat=[ CategoricalFeatureInfo( name=FieldName.FEAT_STATIC_CAT, cardinality=10 ) ], feat_dynamic_real=[ BasicFeatureInfo(name=FieldName.FEAT_DYNAMIC_REAL) ], ), max_train_length=20, prediction_length=10, num_timeseries=10, trim_length_fun=lambda x, **kwargs: np.minimum( int(np.random.geometric(1 / (kwargs["train_length"] / 2))), kwargs["train_length"], ), ) generated = data.generate() assert generated.test is not None info = data.dataset_info(generated.train, generated.test) return info, generated.train, generated.test
def default_synthetic() -> Tuple[DatasetInfo, Dataset, Dataset]: recipe = [ ('target', LinearTrend() + RandomGaussian()), ('feat_static_cat', RandomCat([10])), ( 'feat_static_real', ForEachCat(RandomGaussian(1, 10), 'feat_static_cat') + RandomGaussian(0.1, 10), ), ] data = RecipeDataset( recipe=recipe, metadata=MetaData( time_granularity='D', feat_static_real=[BasicFeatureInfo(name='feat_static_real')], feat_static_cat=[ CategoricalFeatureInfo(name='feat_static_cat', cardinality=10) ], feat_dynamic_real=[BasicFeatureInfo(name='feat_dynamic_real')], ), max_train_length=20, prediction_length=10, num_timeseries=10, trim_length_fun=lambda x, **kwargs: np.minimum( int(np.random.geometric(1 / (kwargs['train_length'] / 2))), kwargs['train_length'], ), ) generated = data.generate() assert generated.test is not None info = data.dataset_info(generated.train, generated.test) return info, generated.train, generated.test
# Test data include fcast_length which are ground truths. test_data = df2gluonts(processed_df_fill, cat_inverted_idx, fcast_len=0, freq=freq, ts_id=['sku', 'Label', 'Custname'], static_cat=['sku', 'Label', 'Custname']) gluonts_datasets = TrainDatasets( metadata=MetaData( freq=freq, target={'name': 'quantity'}, feat_static_cat=[ # Add 'unknown'. CategoricalFeatureInfo(name=k, cardinality=len(v) + 1) for k, v in cat_inverted_idx.items() ], prediction_length=fcast_length), train=train_data, test=test_data) # %% epochs = 20 metric = [ { "Name": "train:loss", "Regex": r"Epoch\[\d+\] Evaluation metric 'epoch_loss'=(\S+)" }, {