def build_transform(is_train, config): resize_im = config.DATA.IMG_SIZE > 32 # RGB: mean, std rgbs = dict( default=(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD), inception=(IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD), clip=((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), ) mean, std = rgbs[config.DATA.MEAN_AND_STD_TYPE] if is_train: # this should always dispatch to transforms_imagenet_train create_transform_t = create_transform if not config.DISTILL.ENABLED else create_transform_record transform = create_transform_t( input_size=config.DATA.IMG_SIZE, is_training=True, color_jitter=config.AUG.COLOR_JITTER if config.AUG.COLOR_JITTER > 0 else None, auto_augment=config.AUG.AUTO_AUGMENT if config.AUG.AUTO_AUGMENT != 'none' else None, re_prob=config.AUG.REPROB, re_mode=config.AUG.REMODE, re_count=config.AUG.RECOUNT, interpolation=config.DATA.INTERPOLATION, mean=mean, std=std, ) if not resize_im: # replace RandomResizedCropAndInterpolation with # RandomCrop transform.transforms[0] = transforms.RandomCrop( config.DATA.IMG_SIZE, padding=4) return transform t = [] if resize_im: if config.TEST.CROP: size = int((256 / 224) * config.DATA.IMG_SIZE) t.append( transforms.Resize(size, interpolation=_pil_interp( config.DATA.INTERPOLATION)), # to maintain same ratio w.r.t. 224 images ) t.append(transforms.CenterCrop(config.DATA.IMG_SIZE)) else: t.append( transforms.Resize( (config.DATA.IMG_SIZE, config.DATA.IMG_SIZE), interpolation=_pil_interp(config.DATA.INTERPOLATION))) t.append(transforms.ToTensor()) t.append(transforms.Normalize(mean, std)) transform = transforms.Compose(t) return transform
def imagenet_no_augment_transform( size: Union[Sequence, int] = 224, interpolation: str = "bilinear") -> T.Compose: """ The default image transform without data augmentation. It is often useful for testing models on Imagenet. It sequentially resizes the image and takes a central cropping. """ interpolation = _pil_interp(interpolation) tfl = [T.Resize(size, _pil_interp(interpolation)), T.CenterCrop(size)] return T.Compose(tfl)
def build_transform(is_train, config): resize_im = config.DATA.IMG_SIZE > 32 if is_train: # this should always dispatch to transforms_imagenet_train transform = create_transform( input_size=config.DATA.IMG_SIZE, is_training=True, color_jitter=config.AUG.COLOR_JITTER if config.AUG.COLOR_JITTER > 0 else None, auto_augment=config.AUG.AUTO_AUGMENT if config.AUG.AUTO_AUGMENT != 'none' else None, re_prob=config.AUG.REPROB, re_mode=config.AUG.REMODE, re_count=config.AUG.RECOUNT, interpolation=config.DATA.INTERPOLATION, ) if not resize_im: # replace RandomResizedCropAndInterpolation with # RandomCrop transform.transforms[0] = transforms.RandomCrop( config.DATA.IMG_SIZE, padding=4) return transform t = [] if resize_im: if config.TEST.CROP: size = int((256 / 224) * config.DATA.IMG_SIZE) t.append( transforms.Resize(size, interpolation=_pil_interp( config.DATA.INTERPOLATION)), # to maintain same ratio w.r.t. 224 images ) t.append(transforms.CenterCrop(config.DATA.IMG_SIZE)) else: t.append( transforms.Resize( (config.DATA.IMG_SIZE, config.DATA.IMG_SIZE), interpolation=_pil_interp(config.DATA.INTERPOLATION))) t.append(transforms.ToTensor()) if config.DATA.DATASET == 'imagenet': t.append( transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)) elif config.DATA.DATASET == 'cifar100': CIFAR100_TRAIN_MEAN = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343) CIFAR100_TRAIN_STD = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404) t.append(transforms.Normalize(CIFAR100_TRAIN_MEAN, CIFAR100_TRAIN_STD)) return transforms.Compose(t)
def transforms_imagenet_eval(img_size=224, crop_pct=None, interpolation='bilinear', use_prefetcher=False, mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD): crop_pct = 1 if isinstance(img_size, tuple): assert len(img_size) == 2 if img_size[-1] == img_size[-2]: # fall-back to older behaviour so Resize scales to shortest edge if target is square scale_size = int(math.floor(img_size[0] / crop_pct)) else: scale_size = tuple([int(x / crop_pct) for x in img_size]) else: scale_size = int(math.floor(img_size / crop_pct)) tfl = [ transforms.Resize(scale_size, _pil_interp(interpolation)), transforms.CenterCrop(img_size), ] if use_prefetcher: # prefetcher and collate will handle tensor conversion and norm tfl += [ToNumpy()] else: tfl += [ transforms.ToTensor(), transforms.Normalize(mean=torch.tensor(mean), std=torch.tensor(std)) ] return transforms.Compose(tfl)
def imagenet_augment_transform( size: int = 224, scale: Optional[float] = None, ratio: Optional[float] = None, interpolation: str = "random", hflip: Union[float, bool] = 0.5, vflip: Union[float, bool] = False, color_jitter: Union[Sequence, float] = 0.4, auto_augment: Optional[str] = None, mean: Optional[Sequence[float]] = IMAGENET_DEFAULT_MEAN, ) -> T.Compose: """ The default image transform with data augmentation.It is often useful for training models on Imagenet. Adapted from: https://github.com/rwightman/pytorch-image-models/blob/master/timm/data/transforms_factory.py """ scale = tuple(scale or (0.08, 1.0)) # default imagenet scale range ratio = tuple(ratio or (3.0 / 4.0, 4.0 / 3.0)) # default imagenet ratio range transforms = [ RandomResizedCropAndInterpolation(size, scale, ratio, interpolation), ] if hflip and hflip > 0: transforms.append(T.RandomHorizontalFlip(p=hflip)) if vflip and vflip > 0.0: transforms.append(T.RandomVerticalFlip(p=vflip)) if auto_augment: assert isinstance(auto_augment, str) if isinstance(size, (tuple, list)): size_min = min(size) else: size_min = size aa_params = dict( translate_const=int(size_min * 0.45), img_mean=tuple([min(255, round(255 * x)) for x in mean]), ) if interpolation and interpolation != "random": aa_params["interpolation"] = _pil_interp(interpolation) if auto_augment.startswith("rand"): transforms += [rand_augment_transform(auto_augment, aa_params)] else: transforms += [auto_augment_transform(auto_augment, aa_params)] elif color_jitter is not None: # color jitter is enabled when not using AA if isinstance(color_jitter, (list, tuple)): # color jitter should be a 3-tuple/list if spec brightness/contrast/saturation # or 4 if also augmenting hue assert len(color_jitter) in (3, 4) else: # if it's a scalar, duplicate for brightness, contrast, and saturation, no hue color_jitter = (float(color_jitter), ) * 3 transforms += [T.ColorJitter(*color_jitter)] return T.Compose(transforms)
def val_dataloader(self): cfg = self.model_cfg crop_pct = cfg['crop_pct'] img_size = cfg['input_size'][1:] scale_size = tuple([int(x / crop_pct) for x in img_size]) normalize = transforms.Normalize( mean=cfg['mean'], std=cfg['std'], ) val_dir = os.path.join(self.data_path, 'val') val_loader = torch.utils.data.DataLoader( datasets.ImageFolder( val_dir, transforms.Compose([ transforms.Resize(scale_size, _pil_interp(cfg['interpolation'])), transforms.CenterCrop(img_size), transforms.ToTensor(), normalize, ])), batch_size=self.batch_size, shuffle=False, num_workers=self.workers, ) return val_loader
def transforms_noaug_train( img_size=224, interpolation='bilinear', use_prefetcher=False, mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, ): if interpolation == 'random': # random interpolation not supported with no-aug interpolation = 'bilinear' tfl = [ transforms.Resize(img_size, _pil_interp(interpolation)), transforms.CenterCrop(img_size) ] if use_prefetcher: # prefetcher and collate will handle tensor conversion and norm tfl += [ToNumpy()] else: tfl += [ transforms.ToTensor(), transforms.Normalize( mean=torch.tensor(mean), std=torch.tensor(std)) ] return transforms.Compose(tfl)
def instantiate_transforms(cfg: DictConfig, global_config: DictConfig = None): "loades in individual transformations" if cfg._target_ == "aa": img_size_min = global_config.input.input_size aa_params = dict( translate_const=int(img_size_min * 0.45), img_mean=tuple( [min(255, round(255 * x)) for x in global_config.input.mean]), ) if (global_config.input.interpolation and global_config.input.interpolation != "random"): aa_params["interpolation"] = _pil_interp( global_config.input.interpolation) # Load autoaugment transformations if cfg.policy.startswith("rand"): return rand_augment_transform(cfg.policy, aa_params) elif cfg.policy.startswith("augmix"): aa_params["translate_pct"] = 0.3 return augment_and_mix_transform(cfg.policy, aa_params) else: return auto_augment_transform(cfg.policy, aa_params) else: return instantiate(cfg)
def build_transform(is_train, config): resize_im = config.DATA.IMG_SIZE > 32 if is_train: # this should always dispatch to transforms_imagenet_train transform = create_transform( input_size=config.DATA.IMG_SIZE, is_training=True, hflip=config.AUG.RANDOM_HORIZONTAL_FLIP, vflip=config.AUG.RANDOM_VERTICAL_FLIP, scale=config.AUG.SCALE, color_jitter=config.AUG.COLOR_JITTER if config.AUG.COLOR_JITTER > 0 else None, auto_augment=config.AUG.AUTO_AUGMENT if config.AUG.AUTO_AUGMENT != 'none' else None, re_prob=config.AUG.REPROB, re_mode=config.AUG.REMODE, re_count=config.AUG.RECOUNT, interpolation=config.DATA.INTERPOLATION, ) if not resize_im: # replace RandomResizedCropAndInterpolation with # RandomCrop transform.transforms[0] = transforms.RandomCrop( config.DATA.IMG_SIZE, padding=4) return transform t = [] if resize_im: if config.TEST.CROP: size = int((256 / 224) * config.DATA.IMG_SIZE) t.append( transforms.Resize(size, interpolation=_pil_interp( config.DATA.INTERPOLATION)), # to maintain same ratio w.r.t. 224 images ) t.append(transforms.CenterCrop(config.DATA.IMG_SIZE)) else: t.append( transforms.Resize( (config.DATA.IMG_SIZE, config.DATA.IMG_SIZE), interpolation=_pil_interp(config.DATA.INTERPOLATION))) t.append(transforms.ToTensor()) t.append(transforms.Normalize(config.DATA.MEAN, config.DATA.STD)) return transforms.Compose(t)
def build_train_transform(self, image_size=None, print_log=True, auto_augment='rand-m9-mstd0.5'): if image_size is None: image_size = self.image_size # if print_log: # print('Color jitter: %s, resize_scale: %s, img_size: %s' % # (self.distort_color, self.resize_scale, image_size)) # if self.distort_color == 'torch': # color_transform = transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1) # elif self.distort_color == 'tf': # color_transform = transforms.ColorJitter(brightness=32. / 255., saturation=0.5) # else: # color_transform = None if isinstance(image_size, list): resize_transform_class = MyRandomResizedCrop print( 'Use MyRandomResizedCrop: %s, \t %s' % MyRandomResizedCrop.get_candidate_image_size(), 'sync=%s, continuous=%s' % (MyRandomResizedCrop.SYNC_DISTRIBUTED, MyRandomResizedCrop.CONTINUOUS)) img_size_min = min(image_size) else: resize_transform_class = transforms.RandomResizedCrop img_size_min = image_size train_transforms = [ resize_transform_class(image_size, scale=(self.resize_scale, 1.0)), transforms.RandomHorizontalFlip(), ] aa_params = dict( translate_const=int(img_size_min * 0.45), img_mean=tuple([ min(255, round(255 * x)) for x in [0.48933587508932375, 0.5183537408957618, 0.5387914411673883] ]), ) aa_params['interpolation'] = _pil_interp('bicubic') train_transforms += [rand_augment_transform(auto_augment, aa_params)] # if color_transform is not None: # train_transforms.append(color_transform) train_transforms += [ transforms.ToTensor(), self.normalize, ] train_transforms = transforms.Compose(train_transforms) return train_transforms
def transforms_imagenet_eval(img_size=224, crop_pct=None, interpolation='bilinear', use_prefetcher=False, mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD): crop_pct = crop_pct or DEFAULT_CROP_PCT scale = tuple((0.08, 1.0)) # default imagenet scale range ratio = tuple((3. / 4., 4. / 3.)) # default imagenet ratio range if isinstance(img_size, (tuple, list)): assert len(img_size) == 2 if img_size[-1] == img_size[-2]: # fall-back to older behaviour so Resize scales to shortest edge if target is square scale_size = int(math.floor(img_size[0] / crop_pct)) else: scale_size = tuple([int(x / crop_pct) for x in img_size]) else: scale_size = int(math.floor(img_size / crop_pct)) tfl = [ transforms.Resize(scale_size, _pil_interp(interpolation)), transforms.CenterCrop(img_size), RandomResizedCropAndInterpolation(img_size, scale=scale, ratio=ratio, interpolation=interpolation), ] color_jitter = (float(0.4), ) * 3 tfl += [transforms.ColorJitter(*color_jitter)] tfl += [transforms.RandomHorizontalFlip(p=0.5)] if use_prefetcher: # prefetcher and collate will handle tensor conversion and norm tfl += [ToNumpy()] else: tfl += [ transforms.ToTensor(), transforms.Normalize(mean=torch.tensor(mean), std=torch.tensor(std)) ] return transforms.Compose(tfl)
def transforms_imagenet_train( img_size=224, scale=(0.08, 1.0), color_jitter=0.4, auto_augment=None, interpolation='random', use_prefetcher=False, mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD, re_prob=0., re_mode='const', re_count=1, re_num_splits=0, separate=False, squish=False, do_8_rotations=False, ): """ If separate==True, the transforms are returned as a tuple of 3 separate transforms for use in a mixing dataset that passes * all data through the first (primary) transform, called the 'clean' data * a portion of the data through the secondary transform * normalizes and converts the branches above with the third, final transform """ if squish: if not isinstance(img_size, tuple): img_size = (img_size, img_size) resize = transforms.Resize(img_size, _pil_interp('bilinear')) else: resize = RandomResizedCropAndInterpolation(img_size, scale=scale, interpolation=interpolation) if do_8_rotations: primary_tfl = [resize, RandomRotation()] else: primary_tfl = [resize, transforms.RandomHorizontalFlip()] secondary_tfl = [] if auto_augment: assert isinstance(auto_augment, str) if isinstance(img_size, tuple): img_size_min = min(img_size) else: img_size_min = img_size aa_params = dict( translate_const=int(img_size_min * 0.45), img_mean=tuple([min(255, round(255 * x)) for x in mean]), ) if interpolation and interpolation != 'random': aa_params['interpolation'] = _pil_interp(interpolation) if auto_augment.startswith('rand'): secondary_tfl += [rand_augment_transform(auto_augment, aa_params)] elif auto_augment.startswith('augmix'): aa_params['translate_pct'] = 0.3 secondary_tfl += [ augment_and_mix_transform(auto_augment, aa_params) ] else: secondary_tfl += [auto_augment_transform(auto_augment, aa_params)] elif color_jitter is not None: # color jitter is enabled when not using AA if isinstance(color_jitter, (list, tuple)): # color jitter should be a 3-tuple/list if spec brightness/contrast/saturation # or 4 if also augmenting hue assert len(color_jitter) in (3, 4) else: # if it's a scalar, duplicate for brightness, contrast, and saturation, no hue color_jitter = (float(color_jitter), ) * 3 secondary_tfl += [transforms.ColorJitter(*color_jitter)] final_tfl = [] if use_prefetcher: # prefetcher and collate will handle tensor conversion and norm final_tfl += [ToNumpy()] else: final_tfl += [ transforms.ToTensor(), transforms.Normalize(mean=torch.tensor(mean), std=torch.tensor(std)) ] if re_prob > 0.: final_tfl.append( RandomErasing(re_prob, mode=re_mode, max_count=re_count, num_splits=re_num_splits, device='cpu')) if separate: return transforms.Compose(primary_tfl), transforms.Compose( secondary_tfl), transforms.Compose(final_tfl) else: return transforms.Compose(primary_tfl + secondary_tfl + final_tfl)