def __getitem__(self, index): """ Args: index: int, 当前的索引下标 Returns: image: [channel, height, width] tensor, 当前索引下标对应的图像数据 label: [1] tensor, 当前索引下标对应的图像数据对应的类标 """ sample_path = os.path.join(self.data_root, self.sample_list[index]) image = Image.open(sample_path).convert('RGB') label = self.label_list[index] if self.transforms: image = np.asarray(image) image = self.transforms(image) image = Image.fromarray(image) # 如果不进行多尺度训练,则将图片转换为指定的图片大小,并转换为tensor if self.multi_scale: image = T.Resize(self.size, interpolation=3)(image) image = np.asarray(image) else: transform_train_list = [T.Resize(self.size, interpolation=3)] if self.auto_aug: transform_train_list.append(ImageNetPolicy()) transform_train_list.extend( [T.ToTensor(), T.Normalize(self.mean, self.std)]) transform_compose = T.Compose(transform_train_list) image = transform_compose(image) label = torch.tensor(label).long() return image, label
def train(self, settings): """Create a reader for trainning Args: settings: arguments Returns: train reader """ file_list = os.path.join(settings.data_dir, 'train_list.txt') assert os.path.isfile( file_list), "{} doesn't exist, please check data list path".format( file_list) if 'use_aa' in settings and settings.use_aa: global policy policy = ImageNetPolicy() reader = self._reader_creator(settings, file_list, 'train', shuffle=True, color_jitter=False, rotate=False, data_dir=settings.data_dir) if settings.use_mixup == True: reader = create_mixup_reader(settings, reader) reader = fluid.io.batch( reader, batch_size=int(settings.batch_size / paddle.fluid.core.get_cuda_device_count()), drop_last=True) return reader
def __init__(self, image_file, autoaugment=False): super(ImageNetTrainingDataset, self).__init__() self.image_file = image_file # self.data = None with open(self.image_file, "r") as file: self.data = file.readlines() # shuffle the dataset for i in range(10): random.shuffle(self.data) self.imagenet_normalization_paramters = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 先resize到512 再crop到448 # 用config来指定模型的size self.model_size = ModelSize("resnet50_448") model_size = self.model_size.imagesize_choice() self.BASE_RESIZE_SIZE = model_size["resize"] self.INPUT_SIZE = model_size["input"] self.BRIGHTNESS = 0.4 self.HUE = 0.1 self.CONTRAST = 0.4 self.SATURATION = 0.4 # autoaugment self.Autoaugment = autoaugment self.index_sampler = [i for i in range(len(self.data))] # 当前的数据增强【随机crop, 随机水平翻转,颜色变换,随机灰度,】 if self.Autoaugment: self.image_transforms = transforms.Compose([ transforms.Resize( (self.BASE_RESIZE_SIZE, self.BASE_RESIZE_SIZE), Image.BILINEAR), transforms.RandomCrop(self.INPUT_SIZE), transforms.RandomHorizontalFlip(), ImageNetPolicy(), transforms.ToTensor(), self.imagenet_normalization_paramters ]) else: self.image_transforms = transforms.Compose([ # transforms.RandomResizedCrop(self.INPUT_SIZE, scale=(0.2, 1.)), transforms.Resize( (self.BASE_RESIZE_SIZE, self.BASE_RESIZE_SIZE), Image.BILINEAR), transforms.RandomCrop(self.INPUT_SIZE), transforms.RandomHorizontalFlip(), transforms.RandomRotation(degrees=15), transforms.ColorJitter(brightness=self.BRIGHTNESS, contrast=self.CONTRAST, hue=self.HUE, saturation=self.SATURATION), transforms.ToTensor(), self.imagenet_normalization_paramters ])
def batch_augmentation(race, age, aug_ratio, total_data, aug_data, autoaugment=False): # --------------------- # RANDOM SETTINGS # --------------------- fillcolor = 0 # 255 brightness = (0.5, 2.0) contrast = (0.5, 2.0) saturation = (0.7, 1.8) hue = (-0.08, 0.08) degrees = 45 translate = (0.25, 0.25) # (0.2, 0.4) scale = (0.6, 1.8) # (0.5, 2.0) shear = 30 # 30 h_flip = 0.5 for i in range(aug_ratio): for sample in total_data[race][age]: img = Image.open(sample[0]) if img.mode == 'L': img = img.convert("RGB") if autoaugment: policy = ImageNetPolicy() img = policy(img) else: img = random_augmentation(img, brightness=brightness, contrast=contrast, saturation=saturation, hue=hue, erase_p=0.0, degrees=degrees, translate=translate, scale=scale, shear=shear, fillcolor=fillcolor, h_flip=h_flip) # img.save(sample[0]+'aug_{}.jpg'.format(i)) # aug_datas.append([sample[0]+'aug_{}.jpg'.format(i),sample[1],sample[2],sample[3]]) aug_data.write('{}\t{}\t{}\t{}\n'.format( sample[0] + 'aug_{}.jpg'.format(i), sample[1], sample[2], sample[3]))
class TinyImagenetDataset(Dataset): _root: Path _df: DataFrame _auto_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), ImageNetPolicy(), transforms.ToTensor(), transforms.Normalize([0.4802, 0.4481, 0.3975], [0.2302, 0.2265, 0.2262]) ]) def __init__(self, path, transform): self._transform = transform if not os.path.isdir(path): raise NotADirectoryError(f"{path} is not a directory.") all_files = [ os.path.join(r, fyle) for r, d, f in os.walk(path) for fyle in f if ".JPEG" in fyle ] labels = [ FOLDERS_TO_NUM.get( os.path.basename(f).split("_")[0], FOLDERS_TO_NUM.get(VAL_LABELS.get(os.path.basename(f))), ) for f in all_files ] self._df = pd.DataFrame({"path": all_files, "label": labels}) def __getitem__(self, index: int): path, label = self._df.loc[index, :] if self._transform: image = cv2.imread(path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) data = {"image": image} augmented = self._transform(**data) return augmented['image'], label else: image = Image.open(path).convert("RGB") image = self._auto_transform(image) return image, label def __len__(self) -> int: return len(self._df)
def multi_scale_transforms(image_size, images, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), auto_aug=False): transform_train_list = [T.Resize(image_size, interpolation=3)] if auto_aug: transform_train_list.append(ImageNetPolicy()) transform_train_list.extend([T.ToTensor(), T.Normalize(mean, std)]) transform_compose = T.Compose(transform_train_list) images = images.numpy() images_resize = torch.zeros(images.shape[0], 3, image_size[0], image_size[1]) for index in range(images.shape[0]): image = transform_compose(Image.fromarray(images[index])) images_resize[index] = image return images_resize
def load_data_transformers(resize_reso=512, crop_reso=448, swap_num=[7, 7]): center_resize = 600 # rgb # Normalize = transforms.Normalize([0.485, 0.456, 0.406], [1, 1, 1]) # bgr Normalize = transforms.Normalize([0.406, 0.456, 0.485], [1, 1, 1]) data_transforms = { 'swap': transforms.Compose([ transforms.Randomswap((swap_num[0], swap_num[1])), ]), 'common_aug': transforms.Compose([ # transforms.Resize((resize_reso, resize_reso)), transforms.RandomRotation(degrees=15), transforms.RandomCrop((crop_reso, crop_reso)), transforms.RandomHorizontalFlip(), ]), 'train_totensor': transforms.Compose([ # transforms.Resize((crop_reso, crop_reso)), ImageNetPolicy(), transforms.ToTensor(), transforms.Normalize([0.406, 0.456, 0.485], [1, 1, 1]), ]), 'val_totensor': transforms.Compose([ # transforms.Resize((crop_reso, crop_reso)), transforms.ToTensor(), transforms.Normalize([0.406, 0.456, 0.485], [1, 1, 1]), ]), 'test_totensor': transforms.Compose([ transforms.Resize((crop_reso, crop_reso)), # transforms.CenterCrop((crop_reso, crop_reso)), transforms.ToTensor(), transforms.Normalize([0.406, 0.456, 0.485], [1, 1, 1]), ]), 'None': None, } return data_transforms