def create_validation_dataloader(data_dir, val_dir, batch_size, workers, num_classes=1000): """ Configure Imagenet validation dataloader Creates :class:`torch.utils.data.DataLoader` using :class:`CachedDatasetFolder` or :class:`HDF5Dataset` pre-configured for the validation cycle. :param data_dir: The directory or hdf5 file containing the dataset :param val_dir: The directory containing or hdf5 group the validation data :param batch_size: Images per batch :param workers: how many data loading subprocesses to use :param num_classes: Limit the dataset size to the given number of classes :return: torch.utils.data.DataLoader """ transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=True), ]) if h5py.is_hdf5(data_dir): if num_classes in IMAGENET_NUM_CLASSES: classes = IMAGENET_NUM_CLASSES[num_classes] dataset = HDF5Dataset(hdf5_file=data_dir, root=val_dir, classes=classes, transform=transform) else: dataset = HDF5Dataset(hdf5_file=data_dir, root=val_dir, num_classes=num_classes, transform=transform) else: dataset = CachedDatasetFolder(root=os.path.join(data_dir, val_dir), num_classes=num_classes, transform=transform) return torch.utils.data.DataLoader( dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=workers, pin_memory=False, )
def get_train_dataset(self, iteration): if self.train_dataset is None: if self.use_auto_augment: transform = transforms.Compose(transforms=[ RandomResizedCrop(224), transforms.RandomHorizontalFlip(), aa.ImageNetPolicy(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=True), ], ) else: transform = transforms.Compose(transforms=[ RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=True), ], ) self.train_dataset = HDF5Dataset( hdf5_file=os.path.expanduser( "~/nta/data/imagenet/imagenet.hdf5"), root="train", classes=IMAGENET_CLASS_SUBSETS[100], transform=transform) return self.train_dataset
def create_validation_dataset(data_dir, val_dir, num_classes=1000): """ Configure Imagenet validation dataloader Creates :class:`CachedDatasetFolder` or :class:`HDF5Dataset` pre-configured for the validation cycle. :param data_dir: The directory or hdf5 file containing the dataset :param val_dir: The directory containing or hdf5 group the validation data :param num_classes: Limit the dataset size to the given number of classes :return: CachedDatasetFolder or HDF5Dataset """ transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=True), ]) if h5py.is_hdf5(data_dir): if num_classes in IMAGENET_NUM_CLASSES: classes = IMAGENET_NUM_CLASSES[num_classes] dataset = HDF5Dataset(hdf5_file=data_dir, root=val_dir, classes=classes, transform=transform) else: dataset = HDF5Dataset(hdf5_file=data_dir, root=val_dir, num_classes=num_classes, transform=transform) else: dataset = CachedDatasetFolder(root=os.path.join(data_dir, val_dir), num_classes=num_classes, transform=transform) return dataset
def get_test_dataset(self, noise_level=0.0): assert noise_level == 0.0 if self.test_dataset is None: transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=True), ]) self.test_dataset = HDF5Dataset( hdf5_file=os.path.expanduser( "~/nta/data/imagenet/imagenet.hdf5"), root="val", classes=IMAGENET_CLASS_SUBSETS[100], transform=transform) return self.test_dataset
def create_train_dataloader( data_dir, train_dir, batch_size, workers, distributed, num_classes=1000, use_auto_augment=False, ): """ Configure Imagenet training dataloader Creates :class:`torch.utils.data.DataLoader` using :class:`CachedDatasetFolder` or :class:`HDF5Dataset` pre-configured for the training cycle :param data_dir: The directory or hdf5 file containing the dataset :param train_dir: The directory or hdf5 group containing the training data :param batch_size: Images per batch :param workers: how many data loading subprocesses to use :param distributed: Whether or not to use `DistributedSampler` :param num_classes: Limit the dataset size to the given number of classes :return: torch.utils.data.DataLoader """ if use_auto_augment: transform = transforms.Compose(transforms=[ RandomResizedCrop(224), transforms.RandomHorizontalFlip(), ImageNetPolicy(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=True), ], ) else: transform = transforms.Compose(transforms=[ RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=True), ], ) if h5py.is_hdf5(data_dir): # Use fixed Imagenet classes if mapping is available if num_classes in IMAGENET_NUM_CLASSES: classes = IMAGENET_NUM_CLASSES[num_classes] dataset = HDF5Dataset(hdf5_file=data_dir, root=train_dir, classes=classes, transform=transform) else: dataset = HDF5Dataset(hdf5_file=data_dir, root=train_dir, num_classes=num_classes, transform=transform) else: dataset = CachedDatasetFolder(root=os.path.join(data_dir, train_dir), num_classes=num_classes, transform=transform) if distributed: train_sampler = DistributedSampler(dataset) else: train_sampler = None return torch.utils.data.DataLoader( dataset=dataset, batch_size=batch_size, shuffle=train_sampler is None, num_workers=workers, sampler=train_sampler, pin_memory=torch.cuda.is_available(), )
def create_train_dataset(data_dir, train_dir, num_classes=1000, use_auto_augment=False, sample_transform=None, target_transform=None, replicas_per_sample=1): """ Configure Imagenet training dataset Creates :class:`CachedDatasetFolder` :class:`HDF5Dataset` pre-configured for the training cycle :param data_dir: The directory or hdf5 file containing the dataset :param train_dir: The directory or hdf5 group containing the training data :param num_classes: Limit the dataset size to the given number of classes :param sample_transform: List of transforms acting on the samples to be added to the defaults below :param target_transform: List of transforms acting on the targets :param replicas_per_sample: Number of replicas to create per sample in the batch (each replica is transformed independently). Used in maxup. :return: CachedDatasetFolder or HDF5Dataset """ if use_auto_augment: transform = transforms.Compose(transforms=[ RandomResizedCrop(224), transforms.RandomHorizontalFlip(), ImageNetPolicy(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=True), ], ) else: transform = transforms.Compose(transforms=[ RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=True), ], ) transform = transforms.Compose(transforms=[transform] + (sample_transform or [])) if h5py.is_hdf5(data_dir): # Use fixed Imagenet classes if mapping is available if num_classes in IMAGENET_NUM_CLASSES: classes = IMAGENET_NUM_CLASSES[num_classes] dataset = HDF5Dataset(hdf5_file=data_dir, root=train_dir, classes=classes, transform=transform, target_transform=target_transform, replicas_per_sample=replicas_per_sample) else: dataset = HDF5Dataset(hdf5_file=data_dir, root=train_dir, num_classes=num_classes, transform=transform, target_transform=target_transform, replicas_per_sample=replicas_per_sample) else: dataset = CachedDatasetFolder(root=os.path.join(data_dir, train_dir), num_classes=num_classes, transform=transform, target_transform=target_transform) return dataset
def _create_train_dataloader(data_dir, train_dir, batch_size, workers, distributed, progressive_resize, num_classes=1000): """ Configure Imagenet training dataloader Creates :class:`torch.utils.data.DataLoader` using :class:`CachedDatasetFolder` or or :class:`HDF5Dataset` pre-configured for the training cycle with an optional :class:`ProgressiveRandomResizedCrop` schedule where the images sizes can vary at different epochs during the cycle. :param data_dir: The directory or hdf5 file containing the dataset :param train_dir: The directory or hdf5 group containing the training data :param batch_size: Images per batch :param workers: how many data loading subprocesses to use :param distributed: Whether or not to use `DistributedSampler` :param progressive_resize: Dictionary containing the progressive resize schedule :param num_classes: Limit the dataset size to the given number of classes :return: torch.utils.data.DataLoader """ if progressive_resize is None: # Standard size for all epochs resize_transform = RandomResizedCrop(224) else: # Convert progressive_resize dict from {str:int} to {int:int} progressive_resize = {int(k): v for k, v in progressive_resize.items()} resize_transform = ProgressiveRandomResizedCrop( progressive_resize=progressive_resize) transform = transforms.Compose(transforms=[ resize_transform, transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ], ) if h5py.is_hdf5(data_dir): if num_classes in IMAGENET_NUM_CLASSES: classes = IMAGENET_NUM_CLASSES[num_classes] dataset = HDF5Dataset(hdf5_file=data_dir, root=train_dir, classes=classes, transform=transform) else: dataset = HDF5Dataset(hdf5_file=data_dir, root=train_dir, num_classes=num_classes, transform=transform) else: dataset = CachedDatasetFolder(root=os.path.join(data_dir, train_dir), num_classes=num_classes, transform=transform) if distributed: train_sampler = DistributedSampler(dataset) else: train_sampler = None return torch.utils.data.DataLoader( dataset=dataset, batch_size=batch_size, shuffle=train_sampler is None, num_workers=workers, sampler=train_sampler, pin_memory=torch.cuda.is_available(), )