示例#1
0
    def test_make_get_and_cast_tensors_fn(self):
        input_dict = {
            't1': tf.constant(value=0, dtype=tf.int32),
            't2': tf.constant(value=-1.0, dtype=tf.float32),
            't3': tf.constant(value=1.0, dtype=tf.float64),
        }
        # Equivalent to get_tensors_fn.
        fn = base.make_get_and_cast_tensors_fn(output_tensors={
            't1': None,
            't2': None,
        })
        self.assertTrue(callable(fn))
        self.assertEqual(fn(input_dict), {
            't1': input_dict['t1'],
            't2': input_dict['t2']
        })

        # Cast to different type.
        fn = base.make_get_and_cast_tensors_fn(output_tensors={
            't1': tf.float64,
            't2': tf.float64,
        })
        self.assertTrue(callable(fn))
        output_dict = fn(input_dict)
        self.assertSetEqual(set(output_dict.keys()), {'t1', 't2'})
        self.assertEqual(output_dict['t1'].dtype, tf.float64)
        self.assertEqual(output_dict['t2'].dtype, tf.float64)

        # General case.
        fn = base.make_get_and_cast_tensors_fn(
            output_tensors={
                't1': ('t1_new_name', tf.float64),
                't2': tf.float64,
                't3': None,
            })
        self.assertTrue(callable(fn))
        output_dict = fn(input_dict)
        self.assertSetEqual(set(output_dict.keys()),
                            {'t1_new_name', 't2', 't3'})
        self.assertEqual(output_dict['t1_new_name'].dtype, tf.float64)
        self.assertEqual(output_dict['t2'].dtype, tf.float64)
        self.assertEqual(output_dict['t3'].dtype, input_dict['t3'].dtype)

        # Output key does not exist.
        fn = base.make_get_and_cast_tensors_fn(output_tensors={
            't1': None,
            't25': ('t2', tf.float32),
        })
        self.assertTrue(callable(fn))
        with self.assertRaises(KeyError):
            fn(input_dict)
示例#2
0
  def __init__(self, subset="rgb", data_key="image", data_dir=None):
    dataset_name = "eurosat/{}:2.*.*".format(subset)
    dataset_builder = tfds.builder(dataset_name, data_dir=data_dir)
    dataset_builder.download_and_prepare()

    # Example counts are retrieved from the tensorflow dataset info.
    num_examples = dataset_builder.info.splits[tfds.Split.TRAIN].num_examples
    train_count = num_examples * TRAIN_SPLIT_PERCENT // 100
    val_count = num_examples * VALIDATION_SPLIT_PERCENT // 100
    test_count = num_examples * TEST_SPLIT_PERCENT // 100

    tfds_splits = {
        "train":
            "train[:{}]".format(train_count),
        "val":
            "train[{}:{}]".format(train_count, train_count+val_count),
        "trainval":
            "train[:{}]".format(train_count+val_count),
        "test":
            "train[{}:]".format(train_count+val_count),
        "train800":
            "train[:800]",
        "val200":
            "train[{}:{}]".format(train_count, train_count+200),
        "train800val200":
            "train[:800]+train[{}:{}]".format(train_count, train_count+200),
    }

    # Creates a dict with example counts for each split.
    num_samples_splits = {
        "train": train_count,
        "val": val_count,
        "trainval": train_count + val_count,
        "test": test_count,
        "train800": 800,
        "val200": 200,
        "train800val200": 1000,
    }

    num_channels = 3
    if data_key == "sentinel2":
      num_channels = 13

    super(EurosatData, self).__init__(
        dataset_builder=dataset_builder,
        tfds_splits=tfds_splits,
        num_samples_splits=num_samples_splits,
        num_preprocessing_threads=100,
        shuffle_buffer_size=10000,
        base_preprocess_fn=base.make_get_and_cast_tensors_fn({
            data_key: ("image", None),
            "label": ("label", None),
        }),
        image_key=data_key,
        num_channels=num_channels,
        num_classes=dataset_builder.info.features["label"].num_classes)
示例#3
0
  def __init__(self, data_dir=None):
    dataset_builder = tfds.builder("svhn_cropped:3.*.*", data_dir=data_dir)
    dataset_builder.download_and_prepare()

    # Example counts are retrieved from the tensorflow dataset info.
    trainval_count = dataset_builder.info.splits[tfds.Split.TRAIN].num_examples
    test_count = dataset_builder.info.splits[tfds.Split.TEST].num_examples

    # Creates a dict with example counts for each split.
    num_samples_splits = {
        # Calculates the train/val split example count based on percent.
        "train": TRAIN_SPLIT_PERCENT * trainval_count // 100,
        "val": trainval_count - TRAIN_SPLIT_PERCENT * trainval_count // 100,
        "trainval": trainval_count,
        "test": test_count,
        "train800": 800,
        "val200": 200,
        "train800val200": 1000,
    }

    # Defines dataset specific train/val/trainval/test splits.
    # The validation set is split out of the original training set, and the
    # remaining examples are used as the "train" split. The "trainval" split
    # corresponds to the original training set.
    tfds_splits = {
        "train":
            "train[:{}]".format(num_samples_splits["train"]),
        "val":
            "train[{}:]".format(num_samples_splits["train"]),
        "trainval":
            "train",
        "test":
            "test",
        "train800":
            "train[:800]",
        "val200":
            "train[{}:{}]".format(num_samples_splits["train"],
                                  num_samples_splits["train"] + 200),
        "train800val200":
            "train[:800]+train[{}:{}]".format(
                num_samples_splits["train"], num_samples_splits["train"] + 200),
    }

    super(SvhnData, self).__init__(
        dataset_builder=dataset_builder,
        tfds_splits=tfds_splits,
        num_samples_splits=num_samples_splits,
        num_preprocessing_threads=400,
        shuffle_buffer_size=10000,
        # Note: Rename tensors but keep their original types.
        base_preprocess_fn=base.make_get_and_cast_tensors_fn({
            "image": ("image", None),
            "label": ("label", None),
        }),
        num_classes=dataset_builder.info.features["label"]
        .num_classes)
示例#4
0
    def __init__(self, data_dir=None):
        dataset_builder = tfds.builder("resisc45:3.*.*", data_dir=data_dir)
        dataset_builder.download_and_prepare()

        # Example counts are retrieved from the tensorflow dataset info.
        num_examples = dataset_builder.info.splits["train"].num_examples
        train_count = num_examples * TRAIN_SPLIT_PERCENT // 100
        val_count = num_examples * VALIDATION_SPLIT_PERCENT // 100
        test_count = num_examples * TEST_SPLIT_PERCENT // 100

        tfds_splits = {
            "train":
            "train[:{}]".format(train_count),
            "val":
            "train[{}:{}]".format(train_count, train_count + val_count),
            "trainval":
            "train[:{}]".format(train_count + val_count),
            "test":
            "train[{}:]".format(train_count + val_count),
            "train800":
            "train[:800]",
            "val200":
            "train[{}:{}]".format(train_count, train_count + 200),
            "train800val200":
            "train[:800]+train[{}:{}]".format(train_count, train_count + 200),
        }

        # Creates a dict with example counts for each split.
        num_samples_splits = {
            "train": train_count,
            "val": val_count,
            "trainval": train_count + val_count,
            "test": test_count,
            "train800": 800,
            "val200": 200,
            "train800val200": 1000,
        }

        super(Resisc45Data, self).__init__(
            dataset_builder=dataset_builder,
            tfds_splits=tfds_splits,
            num_samples_splits=num_samples_splits,
            num_preprocessing_threads=400,
            shuffle_buffer_size=10000,
            # Note: Rename tensors but keep their original types.
            base_preprocess_fn=base.make_get_and_cast_tensors_fn({
                "image": ("image", None),
                "label": ("label", None),
            }),
            num_classes=dataset_builder.info.features["label"].num_classes)
示例#5
0
    def __init__(self, year=2017, data_dir=None):
        supported_years = [2017]
        if year not in supported_years:
            raise ValueError(
                "Only competitions from years {!r} are supported, but {!r} was given"
                .format(supported_years, year))
        dataset_builder = tfds.builder("i_naturalist{}:0.1.0".format(year),
                                       data_dir=data_dir)

        tfds_splits = {
            "train": "train[:{}%]".format(TRAIN_SPLIT_PERCENT),
            "val": "train[{}%:]".format(TRAIN_SPLIT_PERCENT),
            "trainval": "train",
            "test": "validation"
        }

        # Example counts are retrieved from the tensorflow dataset info.
        trainval_count = dataset_builder.info.splits[
            tfds.Split.TRAIN].num_examples
        train_count = int(round(trainval_count * TRAIN_SPLIT_PERCENT / 100.0))
        val_count = trainval_count - train_count
        test_count = dataset_builder.info.splits[
            tfds.Split.VALIDATION].num_examples

        # Creates a dict with example counts for each split.
        num_samples_splits = {
            "train": train_count,
            "val": val_count,
            "trainval": trainval_count,
            "test": test_count
        }

        super(INaturalistData, self).__init__(
            dataset_builder=dataset_builder,
            tfds_splits=tfds_splits,
            num_samples_splits=num_samples_splits,
            num_preprocessing_threads=400,
            shuffle_buffer_size=10000,
            base_preprocess_fn=base.make_get_and_cast_tensors_fn({
                "image": ("image", None),
                "label": ("label", None),
            }),
            num_classes=dataset_builder.info.features["label"].num_classes,
            image_key="image")
示例#6
0
    def __init__(self, data_dir=None):

        dataset_builder = tfds.builder("dmlab:2.0.1", data_dir=data_dir)

        tfds_splits = {
            "train": "train",
            "val": "validation",
            "trainval": "train+validation",
            "test": "test",
            "train800": "train[:800]",
            "val200": "validation[:200]",
            "train800val200": "train[:800]+validation[:200]",
        }

        # Example counts are retrieved from the tensorflow dataset info.
        train_count = dataset_builder.info.splits["train"].num_examples
        val_count = dataset_builder.info.splits["validation"].num_examples
        test_count = dataset_builder.info.splits["test"].num_examples

        # Creates a dict with example counts for each split.
        num_samples_splits = {
            "train": train_count,
            "val": val_count,
            "trainval": train_count + val_count,
            "test": test_count,
            "train800": 800,
            "val200": 200,
            "train800val200": 1000,
        }

        super(DmlabData, self).__init__(
            dataset_builder=dataset_builder,
            tfds_splits=tfds_splits,
            num_samples_splits=num_samples_splits,
            num_preprocessing_threads=400,
            shuffle_buffer_size=10000,
            base_preprocess_fn=base.make_get_and_cast_tensors_fn({
                "image": ("image", None),
                "label": ("label", None),
            }),
            num_classes=dataset_builder.info.features["label"].num_classes,
            image_key="image")
示例#7
0
  def __init__(self, data_dir=None):
    dataset_builder = tfds.builder("oxford_flowers102:2.*.*", data_dir=data_dir)
    dataset_builder.download_and_prepare()

    tfds_splits = {
        "train": "train",
        "val": "validation",
        "trainval": "train+validation",
        "test": "test",
    }

    # Example counts are retrieved from the tensorflow dataset info.
    train_count = dataset_builder.info.splits[tfds.Split.TRAIN].num_examples
    val_count = dataset_builder.info.splits[tfds.Split.VALIDATION].num_examples
    test_count = dataset_builder.info.splits[tfds.Split.TEST].num_examples

    # Creates a dict with example counts for each split.
    num_samples_splits = {
        "train": train_count,
        "val": val_count,
        "trainval": train_count + val_count,
        "test": test_count
    }

    super(OxfordFlowers102Data, self).__init__(
        dataset_builder=dataset_builder,
        tfds_splits=tfds_splits,
        num_samples_splits=num_samples_splits,
        num_preprocessing_threads=400,
        shuffle_buffer_size=10000,
        # Note: Rename tensors but keep their original types.
        base_preprocess_fn=base.make_get_and_cast_tensors_fn({
            "image": ("image", None),
            "label": ("label", None),
        }),
        num_classes=dataset_builder.info.features["label"]
        .num_classes)
示例#8
0
    def __init__(self, data_dir=None, train_split_percent=None):
        dataset_builder = tfds.builder("oxford_flowers102:2.*.*",
                                       data_dir=data_dir)
        dataset_builder.download_and_prepare()

        # Example counts are retrieved from the tensorflow dataset info.
        train_count = dataset_builder.info.splits[
            tfds.Split.TRAIN].num_examples
        val_count = dataset_builder.info.splits[
            tfds.Split.VALIDATION].num_examples
        test_count = dataset_builder.info.splits[tfds.Split.TEST].num_examples

        if train_split_percent:
            tfds_splits = {
                "train":
                "train[:{s}%]+validation[:{s}%]".format(s=train_split_percent),
                "val":
                "train[-{s}%:]+validation[-{s}%:]".format(
                    s=train_split_percent),
                "trainval":
                "train+validation",
                "test":
                "test",
                "train800":
                "train[:800]",
                "val200":
                "validation[:200]",
                "train800val200":
                "train[:800]+validation[:200]",
            }
            num_samples_splits = {
                "train":
                (((train_count + val_count) // 100) * train_split_percent),
                "val": (((train_count + val_count) // 100) *
                        (100 - train_split_percent)),
                "trainval":
                train_count + val_count,
                "test":
                test_count,
                "train800":
                800,
                "val200":
                200,
                "train800val200":
                1000,
            }
        else:
            tfds_splits = {
                "train": "train",
                "val": "validation",
                "trainval": "train+validation",
                "test": "test",
                "train800": "train[:800]",
                "val200": "validation[:200]",
                "train800val200": "train[:800]+validation[:200]",
            }
            num_samples_splits = {
                "train": train_count,
                "val": val_count,
                "trainval": train_count + val_count,
                "test": test_count,
                "train800": 800,
                "val200": 200,
                "train800val200": 1000,
            }

        super(OxfordFlowers102Data, self).__init__(
            dataset_builder=dataset_builder,
            tfds_splits=tfds_splits,
            num_samples_splits=num_samples_splits,
            num_preprocessing_threads=400,
            shuffle_buffer_size=10000,
            # Note: Rename tensors but keep their original types.
            base_preprocess_fn=base.make_get_and_cast_tensors_fn({
                "image": ("image", None),
                "label": ("label", None),
            }),
            num_classes=dataset_builder.info.features["label"].num_classes)