def create_validation_dataloader(data_dir,
                                 val_dir,
                                 batch_size,
                                 workers,
                                 num_classes=1000):
    """
    Configure Imagenet validation dataloader

    Creates :class:`torch.utils.data.DataLoader` using :class:`CachedDatasetFolder`
    or :class:`HDF5Dataset` pre-configured for the validation cycle.

    :param data_dir: The directory or hdf5 file containing the dataset
    :param val_dir: The directory containing or hdf5 group the validation data
    :param batch_size: Images per batch
    :param workers: how many data loading subprocesses to use
    :param num_classes: Limit the dataset size to the given number of classes
    :return: torch.utils.data.DataLoader
    """

    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225],
                             inplace=True),
    ])
    if h5py.is_hdf5(data_dir):
        if num_classes in IMAGENET_NUM_CLASSES:
            classes = IMAGENET_NUM_CLASSES[num_classes]
            dataset = HDF5Dataset(hdf5_file=data_dir,
                                  root=val_dir,
                                  classes=classes,
                                  transform=transform)
        else:
            dataset = HDF5Dataset(hdf5_file=data_dir,
                                  root=val_dir,
                                  num_classes=num_classes,
                                  transform=transform)
    else:
        dataset = CachedDatasetFolder(root=os.path.join(data_dir, val_dir),
                                      num_classes=num_classes,
                                      transform=transform)
    return torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=workers,
        pin_memory=False,
    )
Пример #2
0
    def get_train_dataset(self, iteration):
        if self.train_dataset is None:
            if self.use_auto_augment:
                transform = transforms.Compose(transforms=[
                    RandomResizedCrop(224),
                    transforms.RandomHorizontalFlip(),
                    aa.ImageNetPolicy(),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225],
                                         inplace=True),
                ], )
            else:
                transform = transforms.Compose(transforms=[
                    RandomResizedCrop(224),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225],
                                         inplace=True),
                ], )

            self.train_dataset = HDF5Dataset(
                hdf5_file=os.path.expanduser(
                    "~/nta/data/imagenet/imagenet.hdf5"),
                root="train",
                classes=IMAGENET_CLASS_SUBSETS[100],
                transform=transform)

        return self.train_dataset
Пример #3
0
def create_validation_dataset(data_dir, val_dir, num_classes=1000):
    """
    Configure Imagenet validation dataloader

    Creates :class:`CachedDatasetFolder` or :class:`HDF5Dataset` pre-configured
    for the validation cycle.

    :param data_dir: The directory or hdf5 file containing the dataset
    :param val_dir: The directory containing or hdf5 group the validation data
    :param num_classes: Limit the dataset size to the given number of classes
    :return: CachedDatasetFolder or HDF5Dataset
    """

    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225],
                             inplace=True),
    ])
    if h5py.is_hdf5(data_dir):
        if num_classes in IMAGENET_NUM_CLASSES:
            classes = IMAGENET_NUM_CLASSES[num_classes]
            dataset = HDF5Dataset(hdf5_file=data_dir,
                                  root=val_dir,
                                  classes=classes,
                                  transform=transform)
        else:
            dataset = HDF5Dataset(hdf5_file=data_dir,
                                  root=val_dir,
                                  num_classes=num_classes,
                                  transform=transform)
    else:
        dataset = CachedDatasetFolder(root=os.path.join(data_dir, val_dir),
                                      num_classes=num_classes,
                                      transform=transform)
    return dataset
Пример #4
0
    def get_test_dataset(self, noise_level=0.0):
        assert noise_level == 0.0

        if self.test_dataset is None:
            transform = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225],
                                     inplace=True),
            ])

            self.test_dataset = HDF5Dataset(
                hdf5_file=os.path.expanduser(
                    "~/nta/data/imagenet/imagenet.hdf5"),
                root="val",
                classes=IMAGENET_CLASS_SUBSETS[100],
                transform=transform)

        return self.test_dataset
def create_train_dataloader(
    data_dir,
    train_dir,
    batch_size,
    workers,
    distributed,
    num_classes=1000,
    use_auto_augment=False,
):
    """
    Configure Imagenet training dataloader

    Creates :class:`torch.utils.data.DataLoader` using :class:`CachedDatasetFolder`
    or :class:`HDF5Dataset` pre-configured for the training cycle

    :param data_dir: The directory or hdf5 file containing the dataset
    :param train_dir: The directory or hdf5 group containing the training data
    :param batch_size: Images per batch
    :param workers: how many data loading subprocesses to use
    :param distributed: Whether or not to use `DistributedSampler`
    :param num_classes: Limit the dataset size to the given number of classes
    :return: torch.utils.data.DataLoader
    """
    if use_auto_augment:
        transform = transforms.Compose(transforms=[
            RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            ImageNetPolicy(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225],
                                 inplace=True),
        ], )
    else:
        transform = transforms.Compose(transforms=[
            RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225],
                                 inplace=True),
        ], )
    if h5py.is_hdf5(data_dir):
        # Use fixed Imagenet classes if mapping is available
        if num_classes in IMAGENET_NUM_CLASSES:
            classes = IMAGENET_NUM_CLASSES[num_classes]
            dataset = HDF5Dataset(hdf5_file=data_dir,
                                  root=train_dir,
                                  classes=classes,
                                  transform=transform)
        else:
            dataset = HDF5Dataset(hdf5_file=data_dir,
                                  root=train_dir,
                                  num_classes=num_classes,
                                  transform=transform)
    else:
        dataset = CachedDatasetFolder(root=os.path.join(data_dir, train_dir),
                                      num_classes=num_classes,
                                      transform=transform)
    if distributed:
        train_sampler = DistributedSampler(dataset)
    else:
        train_sampler = None

    return torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=batch_size,
        shuffle=train_sampler is None,
        num_workers=workers,
        sampler=train_sampler,
        pin_memory=torch.cuda.is_available(),
    )
Пример #6
0
def create_train_dataset(data_dir,
                         train_dir,
                         num_classes=1000,
                         use_auto_augment=False,
                         sample_transform=None,
                         target_transform=None,
                         replicas_per_sample=1):
    """
    Configure Imagenet training dataset

    Creates :class:`CachedDatasetFolder` :class:`HDF5Dataset` pre-configured
    for the training cycle

    :param data_dir: The directory or hdf5 file containing the dataset
    :param train_dir: The directory or hdf5 group containing the training data
    :param num_classes: Limit the dataset size to the given number of classes
    :param sample_transform: List of transforms acting on the samples
                             to be added to the defaults below
    :param target_transform: List of transforms acting on the targets
    :param replicas_per_sample: Number of replicas to create per sample
                                in the batch (each replica is transformed
                                independently). Used in maxup.

    :return: CachedDatasetFolder or HDF5Dataset
    """
    if use_auto_augment:
        transform = transforms.Compose(transforms=[
            RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            ImageNetPolicy(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225],
                                 inplace=True),
        ], )
    else:
        transform = transforms.Compose(transforms=[
            RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225],
                                 inplace=True),
        ], )

    transform = transforms.Compose(transforms=[transform] +
                                   (sample_transform or []))

    if h5py.is_hdf5(data_dir):
        # Use fixed Imagenet classes if mapping is available
        if num_classes in IMAGENET_NUM_CLASSES:
            classes = IMAGENET_NUM_CLASSES[num_classes]
            dataset = HDF5Dataset(hdf5_file=data_dir,
                                  root=train_dir,
                                  classes=classes,
                                  transform=transform,
                                  target_transform=target_transform,
                                  replicas_per_sample=replicas_per_sample)
        else:
            dataset = HDF5Dataset(hdf5_file=data_dir,
                                  root=train_dir,
                                  num_classes=num_classes,
                                  transform=transform,
                                  target_transform=target_transform,
                                  replicas_per_sample=replicas_per_sample)
    else:
        dataset = CachedDatasetFolder(root=os.path.join(data_dir, train_dir),
                                      num_classes=num_classes,
                                      transform=transform,
                                      target_transform=target_transform)
    return dataset
Пример #7
0
def _create_train_dataloader(data_dir,
                             train_dir,
                             batch_size,
                             workers,
                             distributed,
                             progressive_resize,
                             num_classes=1000):
    """
    Configure Imagenet training dataloader

    Creates :class:`torch.utils.data.DataLoader` using :class:`CachedDatasetFolder`
    or or :class:`HDF5Dataset` pre-configured for the training cycle with an
    optional :class:`ProgressiveRandomResizedCrop` schedule where the images
    sizes can vary at different epochs during the cycle.

    :param data_dir: The directory or hdf5 file containing the dataset
    :param train_dir: The directory or hdf5 group containing the training data
    :param batch_size: Images per batch
    :param workers: how many data loading subprocesses to use
    :param distributed: Whether or not to use `DistributedSampler`
    :param progressive_resize: Dictionary containing the progressive resize schedule
    :param num_classes: Limit the dataset size to the given number of classes
    :return: torch.utils.data.DataLoader
    """
    if progressive_resize is None:
        # Standard size for all epochs
        resize_transform = RandomResizedCrop(224)
    else:
        # Convert progressive_resize dict from {str:int} to {int:int}
        progressive_resize = {int(k): v for k, v in progressive_resize.items()}
        resize_transform = ProgressiveRandomResizedCrop(
            progressive_resize=progressive_resize)

    transform = transforms.Compose(transforms=[
        resize_transform,
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ], )
    if h5py.is_hdf5(data_dir):
        if num_classes in IMAGENET_NUM_CLASSES:
            classes = IMAGENET_NUM_CLASSES[num_classes]
            dataset = HDF5Dataset(hdf5_file=data_dir,
                                  root=train_dir,
                                  classes=classes,
                                  transform=transform)
        else:
            dataset = HDF5Dataset(hdf5_file=data_dir,
                                  root=train_dir,
                                  num_classes=num_classes,
                                  transform=transform)
    else:
        dataset = CachedDatasetFolder(root=os.path.join(data_dir, train_dir),
                                      num_classes=num_classes,
                                      transform=transform)
    if distributed:
        train_sampler = DistributedSampler(dataset)
    else:
        train_sampler = None

    return torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=batch_size,
        shuffle=train_sampler is None,
        num_workers=workers,
        sampler=train_sampler,
        pin_memory=torch.cuda.is_available(),
    )