def test_make_get_and_cast_tensors_fn(self): input_dict = { 't1': tf.constant(value=0, dtype=tf.int32), 't2': tf.constant(value=-1.0, dtype=tf.float32), 't3': tf.constant(value=1.0, dtype=tf.float64), } # Equivalent to get_tensors_fn. fn = base.make_get_and_cast_tensors_fn(output_tensors={ 't1': None, 't2': None, }) self.assertTrue(callable(fn)) self.assertEqual(fn(input_dict), { 't1': input_dict['t1'], 't2': input_dict['t2'] }) # Cast to different type. fn = base.make_get_and_cast_tensors_fn(output_tensors={ 't1': tf.float64, 't2': tf.float64, }) self.assertTrue(callable(fn)) output_dict = fn(input_dict) self.assertSetEqual(set(output_dict.keys()), {'t1', 't2'}) self.assertEqual(output_dict['t1'].dtype, tf.float64) self.assertEqual(output_dict['t2'].dtype, tf.float64) # General case. fn = base.make_get_and_cast_tensors_fn( output_tensors={ 't1': ('t1_new_name', tf.float64), 't2': tf.float64, 't3': None, }) self.assertTrue(callable(fn)) output_dict = fn(input_dict) self.assertSetEqual(set(output_dict.keys()), {'t1_new_name', 't2', 't3'}) self.assertEqual(output_dict['t1_new_name'].dtype, tf.float64) self.assertEqual(output_dict['t2'].dtype, tf.float64) self.assertEqual(output_dict['t3'].dtype, input_dict['t3'].dtype) # Output key does not exist. fn = base.make_get_and_cast_tensors_fn(output_tensors={ 't1': None, 't25': ('t2', tf.float32), }) self.assertTrue(callable(fn)) with self.assertRaises(KeyError): fn(input_dict)
def __init__(self, subset="rgb", data_key="image", data_dir=None): dataset_name = "eurosat/{}:2.*.*".format(subset) dataset_builder = tfds.builder(dataset_name, data_dir=data_dir) dataset_builder.download_and_prepare() # Example counts are retrieved from the tensorflow dataset info. num_examples = dataset_builder.info.splits[tfds.Split.TRAIN].num_examples train_count = num_examples * TRAIN_SPLIT_PERCENT // 100 val_count = num_examples * VALIDATION_SPLIT_PERCENT // 100 test_count = num_examples * TEST_SPLIT_PERCENT // 100 tfds_splits = { "train": "train[:{}]".format(train_count), "val": "train[{}:{}]".format(train_count, train_count+val_count), "trainval": "train[:{}]".format(train_count+val_count), "test": "train[{}:]".format(train_count+val_count), "train800": "train[:800]", "val200": "train[{}:{}]".format(train_count, train_count+200), "train800val200": "train[:800]+train[{}:{}]".format(train_count, train_count+200), } # Creates a dict with example counts for each split. num_samples_splits = { "train": train_count, "val": val_count, "trainval": train_count + val_count, "test": test_count, "train800": 800, "val200": 200, "train800val200": 1000, } num_channels = 3 if data_key == "sentinel2": num_channels = 13 super(EurosatData, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=100, shuffle_buffer_size=10000, base_preprocess_fn=base.make_get_and_cast_tensors_fn({ data_key: ("image", None), "label": ("label", None), }), image_key=data_key, num_channels=num_channels, num_classes=dataset_builder.info.features["label"].num_classes)
def __init__(self, data_dir=None): dataset_builder = tfds.builder("svhn_cropped:3.*.*", data_dir=data_dir) dataset_builder.download_and_prepare() # Example counts are retrieved from the tensorflow dataset info. trainval_count = dataset_builder.info.splits[tfds.Split.TRAIN].num_examples test_count = dataset_builder.info.splits[tfds.Split.TEST].num_examples # Creates a dict with example counts for each split. num_samples_splits = { # Calculates the train/val split example count based on percent. "train": TRAIN_SPLIT_PERCENT * trainval_count // 100, "val": trainval_count - TRAIN_SPLIT_PERCENT * trainval_count // 100, "trainval": trainval_count, "test": test_count, "train800": 800, "val200": 200, "train800val200": 1000, } # Defines dataset specific train/val/trainval/test splits. # The validation set is split out of the original training set, and the # remaining examples are used as the "train" split. The "trainval" split # corresponds to the original training set. tfds_splits = { "train": "train[:{}]".format(num_samples_splits["train"]), "val": "train[{}:]".format(num_samples_splits["train"]), "trainval": "train", "test": "test", "train800": "train[:800]", "val200": "train[{}:{}]".format(num_samples_splits["train"], num_samples_splits["train"] + 200), "train800val200": "train[:800]+train[{}:{}]".format( num_samples_splits["train"], num_samples_splits["train"] + 200), } super(SvhnData, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, # Note: Rename tensors but keep their original types. base_preprocess_fn=base.make_get_and_cast_tensors_fn({ "image": ("image", None), "label": ("label", None), }), num_classes=dataset_builder.info.features["label"] .num_classes)
def __init__(self, data_dir=None): dataset_builder = tfds.builder("resisc45:3.*.*", data_dir=data_dir) dataset_builder.download_and_prepare() # Example counts are retrieved from the tensorflow dataset info. num_examples = dataset_builder.info.splits["train"].num_examples train_count = num_examples * TRAIN_SPLIT_PERCENT // 100 val_count = num_examples * VALIDATION_SPLIT_PERCENT // 100 test_count = num_examples * TEST_SPLIT_PERCENT // 100 tfds_splits = { "train": "train[:{}]".format(train_count), "val": "train[{}:{}]".format(train_count, train_count + val_count), "trainval": "train[:{}]".format(train_count + val_count), "test": "train[{}:]".format(train_count + val_count), "train800": "train[:800]", "val200": "train[{}:{}]".format(train_count, train_count + 200), "train800val200": "train[:800]+train[{}:{}]".format(train_count, train_count + 200), } # Creates a dict with example counts for each split. num_samples_splits = { "train": train_count, "val": val_count, "trainval": train_count + val_count, "test": test_count, "train800": 800, "val200": 200, "train800val200": 1000, } super(Resisc45Data, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, # Note: Rename tensors but keep their original types. base_preprocess_fn=base.make_get_and_cast_tensors_fn({ "image": ("image", None), "label": ("label", None), }), num_classes=dataset_builder.info.features["label"].num_classes)
def __init__(self, year=2017, data_dir=None): supported_years = [2017] if year not in supported_years: raise ValueError( "Only competitions from years {!r} are supported, but {!r} was given" .format(supported_years, year)) dataset_builder = tfds.builder("i_naturalist{}:0.1.0".format(year), data_dir=data_dir) tfds_splits = { "train": "train[:{}%]".format(TRAIN_SPLIT_PERCENT), "val": "train[{}%:]".format(TRAIN_SPLIT_PERCENT), "trainval": "train", "test": "validation" } # Example counts are retrieved from the tensorflow dataset info. trainval_count = dataset_builder.info.splits[ tfds.Split.TRAIN].num_examples train_count = int(round(trainval_count * TRAIN_SPLIT_PERCENT / 100.0)) val_count = trainval_count - train_count test_count = dataset_builder.info.splits[ tfds.Split.VALIDATION].num_examples # Creates a dict with example counts for each split. num_samples_splits = { "train": train_count, "val": val_count, "trainval": trainval_count, "test": test_count } super(INaturalistData, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, base_preprocess_fn=base.make_get_and_cast_tensors_fn({ "image": ("image", None), "label": ("label", None), }), num_classes=dataset_builder.info.features["label"].num_classes, image_key="image")
def __init__(self, data_dir=None): dataset_builder = tfds.builder("dmlab:2.0.1", data_dir=data_dir) tfds_splits = { "train": "train", "val": "validation", "trainval": "train+validation", "test": "test", "train800": "train[:800]", "val200": "validation[:200]", "train800val200": "train[:800]+validation[:200]", } # Example counts are retrieved from the tensorflow dataset info. train_count = dataset_builder.info.splits["train"].num_examples val_count = dataset_builder.info.splits["validation"].num_examples test_count = dataset_builder.info.splits["test"].num_examples # Creates a dict with example counts for each split. num_samples_splits = { "train": train_count, "val": val_count, "trainval": train_count + val_count, "test": test_count, "train800": 800, "val200": 200, "train800val200": 1000, } super(DmlabData, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, base_preprocess_fn=base.make_get_and_cast_tensors_fn({ "image": ("image", None), "label": ("label", None), }), num_classes=dataset_builder.info.features["label"].num_classes, image_key="image")
def __init__(self, data_dir=None): dataset_builder = tfds.builder("oxford_flowers102:2.*.*", data_dir=data_dir) dataset_builder.download_and_prepare() tfds_splits = { "train": "train", "val": "validation", "trainval": "train+validation", "test": "test", } # Example counts are retrieved from the tensorflow dataset info. train_count = dataset_builder.info.splits[tfds.Split.TRAIN].num_examples val_count = dataset_builder.info.splits[tfds.Split.VALIDATION].num_examples test_count = dataset_builder.info.splits[tfds.Split.TEST].num_examples # Creates a dict with example counts for each split. num_samples_splits = { "train": train_count, "val": val_count, "trainval": train_count + val_count, "test": test_count } super(OxfordFlowers102Data, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, # Note: Rename tensors but keep their original types. base_preprocess_fn=base.make_get_and_cast_tensors_fn({ "image": ("image", None), "label": ("label", None), }), num_classes=dataset_builder.info.features["label"] .num_classes)
def __init__(self, data_dir=None, train_split_percent=None): dataset_builder = tfds.builder("oxford_flowers102:2.*.*", data_dir=data_dir) dataset_builder.download_and_prepare() # Example counts are retrieved from the tensorflow dataset info. train_count = dataset_builder.info.splits[ tfds.Split.TRAIN].num_examples val_count = dataset_builder.info.splits[ tfds.Split.VALIDATION].num_examples test_count = dataset_builder.info.splits[tfds.Split.TEST].num_examples if train_split_percent: tfds_splits = { "train": "train[:{s}%]+validation[:{s}%]".format(s=train_split_percent), "val": "train[-{s}%:]+validation[-{s}%:]".format( s=train_split_percent), "trainval": "train+validation", "test": "test", "train800": "train[:800]", "val200": "validation[:200]", "train800val200": "train[:800]+validation[:200]", } num_samples_splits = { "train": (((train_count + val_count) // 100) * train_split_percent), "val": (((train_count + val_count) // 100) * (100 - train_split_percent)), "trainval": train_count + val_count, "test": test_count, "train800": 800, "val200": 200, "train800val200": 1000, } else: tfds_splits = { "train": "train", "val": "validation", "trainval": "train+validation", "test": "test", "train800": "train[:800]", "val200": "validation[:200]", "train800val200": "train[:800]+validation[:200]", } num_samples_splits = { "train": train_count, "val": val_count, "trainval": train_count + val_count, "test": test_count, "train800": 800, "val200": 200, "train800val200": 1000, } super(OxfordFlowers102Data, self).__init__( dataset_builder=dataset_builder, tfds_splits=tfds_splits, num_samples_splits=num_samples_splits, num_preprocessing_threads=400, shuffle_buffer_size=10000, # Note: Rename tensors but keep their original types. base_preprocess_fn=base.make_get_and_cast_tensors_fn({ "image": ("image", None), "label": ("label", None), }), num_classes=dataset_builder.info.features["label"].num_classes)