示例#1
0
    def _make_train_and_valid_dataloader(self):
        if self._data_augmentation:
            transform = training_transform_augmented
        else:
            transform = training_transform_not_augmented

        train_dataset = datasets.CIFAR10(root=config.get_data_dir(), train=True, download=True, transform=transform)
        valid_dataset = datasets.CIFAR10(root=config.get_data_dir(), train=True, download=True, transform=training_transform_not_augmented)
        train_loader, valid_loader = self._make_train_and_valid_dataloader_helper(train_dataset, valid_dataset)
        return train_loader, valid_loader
示例#2
0
 def _make_train_and_valid_dataloader(self):
     transform = transforms.ToTensor()
     train_dataset = datasets.MNIST(root=config.get_data_dir(),
                                    train=True,
                                    download=True,
                                    transform=transform)
     valid_dataset = datasets.MNIST(root=config.get_data_dir(),
                                    train=True,
                                    download=True,
                                    transform=transform)
     train_loader, valid_loader = self._make_train_and_valid_dataloader_helper(
         train_dataset, valid_dataset)
     return train_loader, valid_loader
def load_label_dict(dataset):
    """Get dict that translates from label number to humanly-readable class
    (e.g. from 1 -> automobile on cifar 10)

    Args:
        dataset (str): Name of the dataset.

    Returns:
        dict: Dictionary that translates from class number to class label.

    """
    if dataset == "cifar10":
        with open(
                os.path.join(config.get_data_dir(),
                             "cifar-10/batches.meta.txt")) as lookup_file:
            label_dict = lookup_file.readlines()
    elif dataset == "cifar100":
        with open(
                os.path.join(config.get_data_dir(),
                             "cifar-100/fine_label_names.txt")) as lookup_file:
            label_dict = lookup_file.readlines()
    elif dataset == "fmnist":
        label_dict = dict([
            (0, "T-shirt"),
            (1, "Trouser"),
            (2, "Pullover"),
            (3, "Dress"),
            (4, "Coat"),
            (5, "Sandal"),
            (6, "Shirt"),
            (7, "Sneaker"),
            (8, "Bag"),
            (9, "Ankle boot"),
        ])
    elif dataset == "imagenet":
        label_file = os.path.join(
            os.path.realpath(
                os.path.join(os.getcwd(), os.path.dirname(__file__))),
            "imagenet_labels.txt",
        )
        # Read from text file
        label_dict = {}
        i = 0
        with open(label_file) as f:
            for line in f:
                label_dict[i] = line.rstrip()
                i += 1
    else:
        label_dict = IdentityDict()
    return label_dict
示例#4
0
 def _make_test_dataloader(self):
     transform = training_transform_not_augmented
     test_dataset = datasets.CIFAR100(root=config.get_data_dir(),
                                      train=False,
                                      download=True,
                                      transform=transform)
     return self._make_dataloader(test_dataset, sampler=None)
示例#5
0
    def _make_train_datasets(self):
        """Creates the three MNIST datasets stemming from the training
        part of the data set, i.e. the training set, the training
        evaluation set, and the validation set.

    Returns:
      A tf.data.Dataset instance with batches of training data.
      A tf.data.Dataset instance with batches of training eval data.
      A tf.data.Dataset instance with batches of validation data.
    """
        data_dir = config.get_data_dir()
        train_images_file = os.path.join(
            data_dir, "mnist", "train-images-idx3-ubyte.gz"
        )
        train_labels_file = os.path.join(
            data_dir, "mnist", "train-labels-idx1-ubyte.gz"
        )

        data = self._load_dataset(train_images_file, train_labels_file)
        valid_data = data.take(self._train_eval_size)
        train_data = data.skip(self._train_eval_size)

        train_data = self._make_dataset(train_data, shuffle=True)
        train_eval_data = train_data.take(
            self._train_eval_size // self._batch_size
        )

        valid_data = self._make_dataset(valid_data, shuffle=False)

        return train_data, train_eval_data, valid_data
示例#6
0
文件: svhn.py 项目: f-dangel/DeepOBS
    def _make_train_datasets(self):
        """Creates the three SVHN datasets stemming from the training
        part of the data set, i.e. the training set, the training
        evaluation set, and the validation set.

    Returns:
      A tf.data.Dataset instance with batches of training data.
      A tf.data.Dataset instance with batches of training eval data.
      A tf.data.Dataset instance with batches of validation data.
    """
        pattern = os.path.join(config.get_data_dir(), "svhn",
                               "data_batch_*.bin")

        data = self._load_dataset(pattern)
        valid_data = data.take(self._train_eval_size)
        train_data = data.skip(self._train_eval_size)

        train_data = self._make_dataset(
            train_data,
            data_augmentation=self._data_augmentation,
            shuffle=True)
        train_eval_data = train_data.take(self._train_eval_size //
                                          self._batch_size)

        valid_data = self._make_dataset(valid_data,
                                        data_augmentation=False,
                                        shuffle=False)

        return train_data, train_eval_data, valid_data
示例#7
0
 def _make_test_dataloader(self):
     transform = transforms.ToTensor()
     test_dataset = datasets.MNIST(root=config.get_data_dir(),
                                   train=False,
                                   download=True,
                                   transform=transform)
     return self._make_dataloader(test_dataset, sampler=None)
示例#8
0
文件: svhn.py 项目: jotaf98/DeepOBS
 def _make_test_dataloader(self):
     transform = training_transform_not_augmented
     test_dataset = datasets.SVHN(root=config.get_data_dir(),
                                  split='test',
                                  download=True,
                                  transform=transform)
     return self._make_dataloader(test_dataset, sampler=None)
示例#9
0
    def _make_train_dataset(self):
        """Creates the Tolstoi training dataset.

    Returns:
      A tf.data.Dataset instance with batches of training data.
    """
        filepath = os.path.join(config.get_data_dir(), "tolstoi", "train.npy")
        return self._make_dataset(filepath)
示例#10
0
        def _make_test_dataloader(self):
            test_dataset = datasets.CIFAR10(
                root=config.get_data_dir(),
                train=False,
                download=True,
                transform=self._transform,
            )

            return self._make_dataloader(test_dataset, sampler=None)
示例#11
0
文件: svhn.py 项目: jotaf98/DeepOBS
 def _make_train_and_valid_dataloader(self):
     if self._data_augmentation:
         transform = training_transform_augmented
     else:
         transform = training_transform_not_augmented
     train_dataset = datasets.SVHN(root=config.get_data_dir(),
                                   split='train',
                                   download=True,
                                   transform=transform)
     # we want the validation set to be of the same size as the test set, so we do NOT use the 'extra' dataset that is available for SVHN
     valid_dataset = datasets.SVHN(
         root=config.get_data_dir(),
         split='train',
         download=True,
         transform=training_transform_not_augmented)
     train_loader, valid_loader = self._make_train_and_valid_dataloader_helper(
         train_dataset, valid_dataset)
     return train_loader, valid_loader
示例#12
0
    def _make_test_dataset(self):
        """Creates the CIFAR-100 test dataset.

    Returns:
      A tf.data.Dataset instance with batches of test data.
    """
        pattern = os.path.join(config.get_data_dir(), "cifar-100", "test.bin")
        return self._make_dataset(
            pattern, data_augmentation=False, shuffle=False)
示例#13
0
        def _make_train_and_valid_dataloader(self):
            train_dataset = datasets.CIFAR10(
                root=config.get_data_dir(),
                train=True,
                download=True,
                transform=self._transform,
            )
            valid_dataset = datasets.CIFAR10(
                root=config.get_data_dir(),
                train=True,
                download=True,
                transform=self._transform,
            )

            train_loader, valid_loader = self._make_train_and_valid_dataloader_helper(
                train_dataset, valid_dataset)

            return train_loader, valid_loader
示例#14
0
    def _make_train_dataset(self):
        """Creates the CIFAR-100 training dataset.

    Returns:
      A tf.data.Dataset instance with batches of training data.
    """
        pattern = os.path.join(config.get_data_dir(), "cifar-100", "train.bin")
        return self._make_dataset(
            pattern, data_augmentation=self._data_augmentation, shuffle=True)
示例#15
0
    def _make_test_dataset(self):
        """Creates the Tolstoi test dataset.

    Returns:
      A tf.data.Dataset instance with batches of test data.
    """
        filepath = os.path.join(config.get_data_dir(), "tolstoi", "test.npy")

        data = np.load(filepath)

        return self._make_dataset(data)
示例#16
0
文件: mnist.py 项目: jotaf98/DeepOBS
    def _make_train_dataset(self):
        """Creates the MNIST training dataset.

    Returns:
      A tf.data.Dataset instance with batches of training data.
    """
        data_dir = config.get_data_dir()
        train_images_file = os.path.join(data_dir, "mnist",
                                         "train-images-idx3-ubyte.gz")
        train_labels_file = os.path.join(data_dir, "mnist",
                                         "train-labels-idx1-ubyte.gz")
        return self._make_dataset(
            train_images_file, train_labels_file, shuffle=True)
示例#17
0
文件: svhn.py 项目: f-dangel/DeepOBS
    def _make_test_dataset(self):
        """Creates the SVHN test dataset.

    Returns:
      A tf.data.Dataset instance with batches of test data.
    """
        pattern = os.path.join(config.get_data_dir(), "svhn", "test_batch.bin")

        test_data = self._load_dataset(pattern)

        return self._make_dataset(test_data,
                                  data_augmentation=False,
                                  shuffle=False)
示例#18
0
文件: mnist.py 项目: jotaf98/DeepOBS
    def _make_test_dataset(self):
        """Creates the MNIST test dataset.

    Returns:
      A tf.data.Dataset instance with batches of test data.
    """
        data_dir = config.get_data_dir()
        test_images_file = os.path.join(data_dir, "mnist",
                                        "t10k-images-idx3-ubyte.gz")
        test_labels_file = os.path.join(data_dir, "mnist",
                                        "t10k-labels-idx1-ubyte.gz")

        return self._make_dataset(
            test_images_file, test_labels_file, shuffle=False)
示例#19
0
def load_label_dict(dataset):
    """Get dict that translates from label number to humanly-readable class
    (e.g. from 1 -> automobile on cifar 10)

    Args:
        dataset (str): Name of the dataset.

    Returns:
        dict: Dictionary that translates from class number to class label.

    """
    if dataset == "tolstoi":
        filepath = os.path.join(config.get_data_dir(), "tolstoi/vocab.pkl")
        label_dict = pickle.load(open(filepath, "rb"))
    else:
        label_dict = IdentityDict()
    return label_dict
示例#20
0
    def _make_test_dataset(self):
        """Creates the ImageNet test dataset.

    Returns:
      A tf.data.Dataset instance with batches of test data.
    """
        pattern = os.path.join(config.get_data_dir(), "imagenet",
                               "validation-*")

        test_data = self._load_dataset(pattern)

        return self._make_dataset(
            test_data,
            per_image_standardization=True,
            random_crop=False,
            random_flip_left_right=False,
            distort_color=False,
            shuffle=False,
        )
示例#21
0
    def _make_train_datasets(self):
        """Creates the three ImageNet datasets stemming from the training
        part of the data set, i.e. the training set, the training
        evaluation set, and the validation set.

    Returns:
      A tf.data.Dataset instance with batches of training data.
      A tf.data.Dataset instance with batches of training eval data.
      A tf.data.Dataset instance with batches of validation data.
    """
        pattern = os.path.join(config.get_data_dir(), "imagenet", "train-*")

        data = self._load_dataset(pattern)
        valid_data = data.take(self._train_eval_size)
        train_data = data.skip(self._train_eval_size)

        train_data = self._make_dataset(
            train_data,
            per_image_standardization=True,
            random_crop=self._data_augmentation,
            random_flip_left_right=self._data_augmentation,
            distort_color=False,
            shuffle=True,
        )
        train_eval_data = train_data.take(self._train_eval_size //
                                          self._batch_size)

        valid_data = self._make_dataset(
            valid_data,
            per_image_standardization=True,
            random_crop=False,
            random_flip_left_right=False,
            distort_color=False,
            shuffle=False,
        )

        return train_data, train_eval_data, valid_data
示例#22
0
    def _make_train_datasets(self):
        """Creates the three Tolstoi datasets stemming from the training
        part of the data set, i.e. the training set, the training
        evaluation set, and the validation set.

    Returns:
      A tf.data.Dataset instance with batches of training data.
      A tf.data.Dataset instance with batches of training eval data.
      A tf.data.Dataset instance with batches of validation data.
    """
        filepath = os.path.join(config.get_data_dir(), "tolstoi", "train.npy")

        data = np.load(filepath)

        valid_data = data[0:self._train_eval_size]
        train_data = data[self._train_eval_size:]

        train_data = self._make_dataset(train_data)
        train_eval_data = train_data.take(
            self._train_eval_size // (self._batch_size * self._seq_length))

        valid_data = self._make_dataset(valid_data)

        return train_data, train_eval_data, valid_data