def __call__(self, img_group): if random.random() < self.p: img_group = [F.hflip(img) for img in img_group] return img_group
def apply_transform(self, img, mask, current_transform=None): if current_transform is None: current_transform = self.transform if isinstance(current_transform, (transforms.Compose)): for transform in current_transform.transforms: img, mask = self.apply_transform(img, mask, transform) elif isinstance(current_transform, (transforms.RandomApply)): if current_transform.p >= random.random(): img, mask = self.apply_transform(img, mask, current_transform.transforms) elif isinstance(current_transform, (transforms.RandomChoice)): t = random.choice(current_transform.transforms) img, mask = self.apply_transform(img, mask, t) elif isinstance(current_transform, (transforms.RandomOrder)): order = list(range(len(current_transform.transforms))) random.shuffle(order) for i in order: img, mask = self.apply_transform( img, mask, current_transform.transforms[i]) elif isinstance( current_transform, ( transforms.CenterCrop, transforms.FiveCrop, transforms.TenCrop, transforms.ToTensor, transforms.Grayscale, transforms.Resize, ), ): img = current_transform(img) mask = current_transform(mask) elif isinstance( current_transform, (transforms.Normalize, transforms.Lambda, transforms.Pad)): img = current_transform(img) # mask = current_transform(mask) # apply on input only elif isinstance(current_transform, (transforms.ColorJitter)): transform = current_transform.get_params( current_transform.brightness, current_transform.contrast, current_transform.saturation, current_transform.hue, ) for lambda_transform in transform.transforms: img = lambda_transform(img) elif isinstance(current_transform, (transforms.RandomAffine)): ret = current_transform.get_params( current_transform.degrees, current_transform.translate, current_transform.scale, current_transform.shear, img.size, ) img = F.affine( img, *ret, resample=current_transform.resample, fillcolor=current_transform.fillcolor, ) mask = F.affine( mask, *ret, resample=current_transform.resample, fillcolor=current_transform.fillcolor, ) elif isinstance(current_transform, (transforms.RandomCrop)): i, j, h, w = current_transform.get_params(img, current_transform.size) img = F.crop(img, i, j, h, w) mask = F.crop(mask, i, j, h, w) elif isinstance(current_transform, (transforms.RandomHorizontalFlip)): if random.random() < current_transform.p: img = F.hflip(img) mask = F.hflip(mask) elif isinstance(current_transform, (transforms.RandomVerticalFlip)): if random.random() < current_transform.p: img = F.vflip(img) mask = F.vflip(mask) elif isinstance(current_transform, (transforms.RandomPerspective)): if random.random() < current_transform.p: width, height = img.size startpoints, endpoints = current_transform.get_params( width, height, current_transform.distortion_scale) img = F.perspective(img, startpoints, endpoints, current_transform.interpolation) mask = F.perspective(mask, startpoints, endpoints, current_transform.interpolation) elif isinstance(current_transform, (transforms.RandomResizedCrop)): ret = current_transform.get_params(img, current_transform.scale, current_transform.ratio) img = F.resized_crop(img, *ret, current_transform.size, current_transform.interpolation) mask = F.resized_crop(mask, *ret, current_transform.size, current_transform.interpolation) elif isinstance(current_transform, (transforms.RandomRotation)): angle = current_transform.get_params(current_transform.degrees) img = F.rotate( img, angle, current_transform.resample, current_transform.expand, current_transform.center, ) mask = F.rotate( mask, angle, current_transform.resample, current_transform.expand, current_transform.center, ) elif isinstance(current_transform, (transforms.RandomErasing)): if random.uniform(0, 1) < current_transform.p: x, y, h, w, v = current_transform.get_params( img, scale=current_transform.scale, ratio=current_transform.ratio, value=current_transform.value, ) img = F.erase(img, x, y, h, w, v, current_transform.inplace) # mask = F.erase(mask, x, y, h, w, v, current_transform.inplace) else: raise NotImplementedError( f'Transform "{current_transform}" not implemented yet') return img, mask
def hflip(in_dict, cfg): if np.random.random() < 0.5: in_dict['im'] = F.hflip(in_dict['im']) if 'ps_label' in in_dict: in_dict['ps_label'] = F.hflip(in_dict['ps_label'])
def __call__(self, image, target): if random.random() < self.flip_prob: image = functional.hflip(image) target = functional.hflip(target) return image, target
def transform(self, image): hr_image = image # downscale to obtain low-resolution image resize = transforms.Resize(size=self.lr_shape, interpolation=self.downgrade) lr_image = resize(hr_image) # print(np.array(hr_image).max(), np.array(hr_image).min()) # apply blur if self.include_blur: lr_image = lr_image.filter(ImageFilter.GaussianBlur(radius=self.blur_radius)) # apply random transforms if self.random_flips: horiz_random, vert_random = self.randomGenerator() # random horizontal flip if horiz_random > 0.5: hr_image = tvF.hflip(hr_image) lr_image = tvF.hflip(lr_image) # random vertical flip if vert_random > 0.5: hr_image = tvF.vflip(hr_image) lr_image = tvF.vflip(lr_image) # apply noise lr_image = np.array(lr_image) hr_image = np.array(hr_image) if self.include_noise: lr_image = np.array(np.clip((lr_image + np.random.normal(self.noise_mean, self.noise_sigma, lr_image.shape)), a_min=0, a_max=255).astype("uint8")) # desired channel number should be checked if self.channel_number == 3 and lr_image.shape[-1] == 1: lr_image = np.stack([lr_image[np.newaxis, ...]] * 3, axis=0) hr_image = np.stack([hr_image[np.newaxis, ...]] * 3, axis=0) # Transform to tensor hr_image = tvF.to_tensor(Image.fromarray(hr_image)) lr_image = tvF.to_tensor(Image.fromarray(lr_image)) # # apply normalization # if self.normalize == "zeroMean": # # todo Mean & STD of the dataset should be given or It can be calculated in a method # hr_means = [hr_image.mean() for i in range(hr_image.shape[0])] # lr_means = [lr_image.mean() for i in range(lr_image.shape[0])] # hr_stds = [hr_image.std() for i in range(hr_image.shape[0])] # lr_stds = [lr_image.std() for i in range(lr_image.shape[0])] # hr_image = tvF.normalize(hr_image, hr_means, hr_stds) # lr_image = tvF.normalize(lr_image, lr_means, lr_stds) # elif self.normalize == "between01": # hr_mins = [hr_image.min() for i in range(hr_image.shape[0])] # lr_mins = [lr_image.min() for i in range(lr_image.shape[0])] # hr_ranges = [hr_image.max() - hr_image.min() for i in range(hr_image.shape[0])] # lr_ranges = [lr_image.max() - lr_image.min() for i in range(lr_image.shape[0])] # hr_image = tvF.normalize(hr_image, hr_mins, hr_ranges) # lr_image = tvF.normalize(lr_image, lr_mins, lr_ranges) # apply normalization if self.normalize == "zeroMean": # todo Mean & STD of the dataset should be given or It can be calculated in a method hr_means = [hr_image.mean() for i in range(hr_image.shape[0])] lr_means = [lr_image.mean() for i in range(lr_image.shape[0])] hr_stds = [hr_image.std() for i in range(hr_image.shape[0])] lr_stds = [lr_image.std() for i in range(lr_image.shape[0])] if hr_stds[0].item() == 0 or lr_stds[0].item() == 0: hr_image = tvF.normalize(hr_image, hr_means, [1, ]) lr_image = tvF.normalize(lr_image, lr_means, [1, ]) else: hr_image = tvF.normalize(hr_image, hr_means, hr_stds) lr_image = tvF.normalize(lr_image, lr_means, lr_stds) elif self.normalize == "between01": hr_mins = [hr_image.min() for i in range(hr_image.shape[0])] lr_mins = [lr_image.min() for i in range(lr_image.shape[0])] hr_ranges = [hr_image.max() - hr_image.min() for i in range(hr_image.shape[0])] lr_ranges = [lr_image.max() - lr_image.min() for i in range(lr_image.shape[0])] if not (hr_ranges[0].item() == 0 or lr_ranges[0].item() == 0): hr_image = tvF.normalize(hr_image, hr_mins, hr_ranges) lr_image = tvF.normalize(lr_image, lr_mins, lr_ranges) # hr_image = tvF.normalize(hr_image, [0.5,], [0.5,]) # lr_image = tvF.normalize(lr_image, [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) else: hr_image = tvF.normalize(hr_image, hr_mins, [1, ]) lr_image = tvF.normalize(lr_image, lr_mins, [1, ]) elif self.normalize == "divideBy255": hr_image = tvF.normalize(hr_image, [0, ], [1, ]) lr_image = tvF.normalize(lr_image, [0, ], [1, ]) else: hr_image = hr_image * 2 - 1 lr_image = lr_image * 2 - 1 # if self.channel_number == 3 & hr_image.size[-1] == 1: # hr_image = hr_image # lr_image = lr_image # print(lr_image.cpu().detach().numpy().max(), hr_image.cpu().detach().numpy().min()) return lr_image, hr_image #Testing purposes # from options import options # CONFIG_FILE_NAME = "../configs/encoderDecoderFusionv2ADAS_HSVsingleChannel.ini" # args = options(CONFIG_FILE_NAME) # kaist = KaistDataset(args.argsDataset) # kaist.hr_shape = [256, 256] # kaist.lr_shape = [256, 256] # print(len(kaist)) # data = kaist.__getitem__(random.randint(0, len(kaist))) # # print(data['gts'][1].numpy().transpose((1, 2, 0)).squeeze().max(), data['gts'][1].numpy().transpose((1, 2, 0)).squeeze().min()) # import matplotlib.pyplot as plt # plt.ion() # # tmp = data['inputs'][1].numpy().transpose((1, 2, 0)).squeeze() # plt.imshow(data['inputs'][1].numpy().transpose((1, 2, 0)).squeeze(), cmap='gray') # plt.waitforbuttonpress() # plt.figure() # plt.imshow(data['gts'][0].numpy().transpose((1, 2, 0)).squeeze(), cmap='gray') # plt.waitforbuttonpress() #tmp = 0
def transform(self, img, depth, region, segments): """transform :param img: :param depth: """ img = img[:, :, :] img = img.astype(np.float32) / 255 #print(img.shape) # Resize scales images from 0 to 255, thus we need # to divide by 255.0 #img = torch.from_numpy(img).float() depth = torch.from_numpy(depth).float().unsqueeze(0).unsqueeze(0) #print(d) segments = torch.from_numpy(segments).float().unsqueeze(0) region = torch.from_numpy(region).float().unsqueeze(0) #img = img.astype(float) / 255.0 # NHWC -> NCHW #img = img.transpose(1,2,0) topil = transforms.ToPILImage() totensor = transforms.ToTensor() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) img = totensor(img) image = img.unsqueeze(0) + 0 #image=image/torch.max(image) #print(img.shape,depth.shape) #depth=depth[0,:,:] #depth = depth.astype(float)/32 #depth = np.round(depth) #depth = m.imresize(depth, (self.img_size[0], self.img_size[1]), 'nearest', mode='F') #depth = depth.astype(int) #depth=np.reshape(depth,[1,depth.shape[0],depth.shape[1]]) #classes = np.unique(depth) #print(classes) #depth = depth.transpose(2,0,1) #if not np.all(classes == np.unique(depth)): # print("WARN: resizing segmentss yielded fewer classes") #if not np.all(classes < self.n_classes): # raise ValueError("Segmentation map contained invalid class values") # img=F.interpolate(img,scale_factor=1/2,mode='bilinear',align_corners=False).squeeze()[:,6:-6,8:-8] # image=F.interpolate(image,scale_factor=1/2,mode='bilinear',align_corners=False).squeeze()[:,6:-6,8:-8] # #print(depth.shape) # depth=F.interpolate(depth,scale_factor=1/2,mode='bilinear',align_corners=False).squeeze()[6:-6,8:-8] # region=F.interpolate(region,scale_factor=1/2,mode='bilinear',align_corners=False).squeeze()[6:-6,8:-8] # segments=F.interpolate(segments,scale_factor=1/2,mode='bilinear',align_corners=False).squeeze()[6:-6,8:-8] if self.split == 'train': one = torch.ones(1) scale = random.uniform(1, 1.3) mask = (depth > alpha) & (depth < beta) mask = mask.float() #print(torch.sum(mask)) h = int(240 * scale) w = int(320 * scale) md = torch.max(depth) #print(md) mr = torch.max(region) ms = torch.max(segments) #print(torch.max(image)) img = tf.resize(topil(img.squeeze(0)), [h, w]) image = tf.resize(topil(image.squeeze(0)), [h, w]) #print(torch.max(totensor(image))) depth = tf.resize(topil(depth.squeeze(0) / md), [h, w]) mask = tf.resize(topil(mask.squeeze(0)), [h, w]) #print(segments.shape) segments = tf.resize(topil(segments.squeeze(0) / ms), [h, w]) region = tf.resize(topil(region.squeeze(0) / mr), [h, w]) i, j, h, w = transforms.RandomCrop.get_params( img, output_size=[228, 304]) r = random.uniform(-5, 5) sigma = random.uniform(0, 0.04) img = tf.rotate(img, r) image = tf.rotate(image, r) depth = tf.rotate(depth, r) mask = tf.rotate(mask, r) segments = tf.rotate(segments, r) region = tf.rotate(region, r) img = tf.crop(img, i, j, h, w) image = tf.crop(image, i, j, h, w) depth = tf.crop(depth, i, j, h, w) mask = tf.crop(mask, i, j, h, w) segments = tf.crop(segments, i, j, h, w) region = tf.crop(region, i, j, h, w) if random.uniform(0, 1) > 0.5: img = tf.hflip(img) image = tf.hflip(image) depth = tf.hflip(depth) mask = tf.hflip(mask) segments = tf.hflip(segments) region = tf.hflip(region) brightness = random.uniform(0, 0.2) contrast = random.uniform(0, 0.2) saturation = random.uniform(0, 0.2) hue = random.uniform(0, 0.2) color = transforms.ColorJitter(brightness, contrast, saturation, hue) img = color(img) gamma = random.uniform(0.7, 1.5) img = tf.adjust_gamma(img, gamma) img = totensor(img) r = random.uniform(0.8, 1.2) g = random.uniform(0.8, 1.2) b = random.uniform(0.8, 1.2) img[:, :, 0] *= r img[:, :, 1] *= g img[:, :, 2] *= b gaussian = torch.zeros_like(img).normal_() * sigma img = img + gaussian img = img.clamp(min=0, max=1) image = img + 0 img = normalize(img) #image=totensor(image) #print(torch.max(image)) #print(torch.max(depth),scale) depth = totensor(depth) * md / scale mask = totensor(mask) #print(torch.sum(mask)) depth = torch.where(mask > 0, depth, torch.zeros(1).float()) #print(torch.max(depth)) #print(torch.max(depth),scale) region = totensor(region) * mr segments = totensor(segments) * ms depth = torch.where(depth > beta, beta * one, depth) depth = torch.where(depth < alpha, alpha * one, depth) #exit() else: one = torch.ones(1) mask = (depth > alpha) & (depth < beta) mask = mask.float() img = img.unsqueeze(0) img = F.interpolate(img, scale_factor=1 / 2, mode='bilinear', align_corners=False).squeeze() image = F.interpolate(image, scale_factor=1 / 2, mode='bilinear', align_corners=False).squeeze() #print(depth.shape) #depth=F.interpolate(depth,scale_factor=1/2,mode='bilinear',align_corners=False).squeeze() #mask=F.interpolate(mask,scale_factor=1/2,mode='bilinear',align_corners=False).squeeze() #print(torch.sum(mask)) #depth=torch.where(mask>=1,depth,torch.zeros(1).float()) depth = torch.where(depth > beta, beta * one, depth) depth = torch.where(depth < alpha, alpha * one, depth) #depth=depth.squeeze() region = F.interpolate(region, scale_factor=1 / 2, mode='bilinear', align_corners=False).squeeze() segments = F.interpolate(segments, scale_factor=1 / 2, mode='bilinear', align_corners=False).squeeze() image = img[..., 6:-6, 8:-8] + 0 img = normalize(img)[..., 6:-6, 8:-8] #exit() # img=img.squeeze() # #print(depth.shape) # depth=depth.squeeze() # region=region.squeeze() # segments=segments.squeeze() #print(img.shape,image.shape,region.shape,segments.shape) return img, depth, region, segments, image
def __getitem__(self, idx): # print(idx) HE_path = self.HE_list[idx] CK_path = self.CK_list[idx] file_name = HE_path.split('/')[-1].split('.')[0] if CK_path.split('/')[-1].replace( 'CK_', '') != HE_path.split('/')[-1].replace('HE_', ''): print('mismatch file names CK-HE') assert True HE_image = cv2.imread(HE_path)[:, :, [2, 1, 0]] HE_image = cv2.resize(HE_image, None, fx=0.5, fy=0.5) HE_image = transforms.ToPILImage()(HE_image) CK_image = cv2.imread(CK_path)[:, :, [2, 1, 0]] CK_image = cv2.resize(CK_image, None, fx=0.5, fy=0.5) CK_image = transforms.ToPILImage()(CK_image) if self.transform == True and \ random.random() <= 1.0: # random crop if self.crop == True: crop = transforms.RandomResizedCrop(256) i, j, h, w = crop.get_params( CK_image, #scale=(0.08, 1.0), #0.9 0.1 #ratio=(0.75, 1.333)) #0.9 1.1 scale=(0.9, 1.0), ratio=(0.9, 1.1)) HE_image = TF.crop(HE_image, i, j, h, w) CK_image = TF.crop(CK_image, i, j, h, w) else: pass # flip if random.random() > 0.5: HE_image = TF.hflip(HE_image) CK_image = TF.hflip(CK_image) if random.random() > 0.5: HE_image = TF.vflip(HE_image) CK_image = TF.vflip(CK_image) # jitter HE_image = transforms.ColorJitter( brightness=0.25, #0.25 contrast=0.75, #0.75 saturation=0.25, #0.25 hue=0.1)(HE_image) HE_image = transforms.ToTensor()(HE_image) CK_image = transforms.ToTensor()(CK_image) staining_data = { 'filname': file_name, 'CK_image': CK_image, 'HE_image': HE_image } return staining_data
def __call__(self, image, target): if random.random() < self.prob: image = F.hflip(image) target = target.transpose(0) return image, target
def torchvision_transform(self, img): return torchvision.hflip(img)
def __init__( self, seed=None, optimizer=Adam, optimizer_kwargs={}, learning_rate_init=0.04, gamma=0.995, # learning rate decay factor considered_groups=list( range(12)), ## group layers to be considered from start sample_variance_threshold=0.002, weight_loss_sample_variance=0, # 10. evaluation_steps=250, # number of batches between loss tracking N_batches_test=1, # number of batches considered for evaluation ): super(ImageClassifier, self).__init__(considered_groups=considered_groups) if seed is not None: torch.manual_seed(seed) #''' resnet = models.resnet18(pretrained=False) self.conv = Sequential( *(list(resnet.children())[:-1]), Flatten(), ) ''' architecture used by Dielemann et al 2015 self.conv = Sequential( # Conv2dUntiedBias(41, 41, 3, 32, kernel_size=6), Conv2d(3,32, kernel_size=6), ReLU(), MaxPool2d(2), # Conv2dUntiedBias(16, 16, 32, 64, kernel_size=5), Conv2d(32, 64, kernel_size=5), ReLU(), MaxPool2d(2), # Conv2dUntiedBias(6, 6, 64, 128, kernel_size=3), Conv2d(64, 128, kernel_size=3), ReLU(), # Conv2dUntiedBias(4, 4, 128, 128, kernel_size=3), #weight_std=0.1), Conv2d(128, 128, kernel_size=3), ReLU(), MaxPool2d(2), Flatten(), ) #''' self.dense1 = MaxOut(8192, 2048, bias=0.01) self.dense2 = MaxOut(2048, 2048, bias=0.01) self.dense3 = Sequential( MaxOut(2048, 37, bias=0.1), # LeakyReLU(negative_slope=1e-7), ALReLU(negative_slope=1e-2), ) self.dropout = Dropout(p=0.5) self.augment = Compose([ Lambda(lambda img: torch.cat([img, hflip(img)], 0)), Lambda(lambda img: torch.cat([img, rotate(img, 45)], 0)), FiveCrop(45), Lambda(lambda crops: torch.cat([ rotate(crop, ang) for crop, ang in zip(crops, (0, 90, 270, 180)) ], 0)), ]) self.N_augmentations = 16 self.N_conv_outputs = 512 self.set_optimizer(optimizer, lr=learning_rate_init, **optimizer_kwargs) # self.scheduler = ExponentialLR(self.optimizer, gamma=gamma) self.scheduler = MultiStepLR(self.optimizer, milestones=[292, 373], gamma=gamma) self.make_labels_hierarchical = False # if True, output probabilities are renormalized to fit the hierarchical label structure self.N_batches_test = N_batches_test self.evaluation_steps = evaluation_steps # number of batches between loss tracking self.weight_loss_sample_variance = weight_loss_sample_variance self.sample_variance_threshold = sample_variance_threshold self.iteration = 0 self.epoch = 0 self.losses_train = Losses("loss", "train") self.losses_valid = Losses("loss", "valid") self.sample_variances_train = Losses("sample variance", "train") self.sample_variances_valid = Losses("sample variance", "valid") for g in range(1, 12): setattr(self, f"accuracies_Q{g}_train", Accuracies("accuracy train", f"Q{g}")) setattr(self, f"accuracies_Q{g}_valid", Accuracies("accuracy valid", f"Q{g}")) self.losses_regression = Losses("loss", "regression") self.losses_variance = Losses("loss", "sample variance") ## return to random seed if seed is not None: sd = np.random.random() * 10000 torch.manual_seed(sd)
def __call__(self, image, *args): if random.random() < self.flip_prob: image = F.hflip(image) args = tuple(np.array([image.width-point[0], point[1]], dtype=point.dtype) for point in args) return (image,) + args
def data_augmentation(input_image, output_mask, img_input_size=(256, 448), img_output_size=(256, 448), aug=True): image = TF.resize(input_image, size=img_input_size) mask = TF.resize(output_mask, size=img_input_size) if output_mask is not None else None if aug: # Random horizontal flipping if random.random() > 0.5: image = TF.hflip(image) mask = TF.hflip(mask) # Random vertical flipping if random.random() > 0.5: image = TF.vflip(image) mask = TF.vflip(mask) # Random rotation if random.random() > 0.5 and img_input_size[0] == img_input_size[1]: augmented = RandomRotate90(p=1)(image=np.array(image), mask=np.array(mask)) image = Image.fromarray(augmented['image']) mask = Image.fromarray(augmented['mask']) # Random transpose if random.random() > 0.5 and img_input_size[0] == img_input_size[1]: augmented = Transpose(p=1)(image=np.array(image), mask=np.array(mask)) image = Image.fromarray(augmented['image']) mask = Image.fromarray(augmented['mask']) # Random elastic transformation if random.random() > 0.5: alpha = random.randint(100, 200) augmented = ElasticTransform(p=1, alpha=alpha, sigma=alpha * 0.05, alpha_affine=alpha * 0.03)( image=np.array(image), mask=np.array(mask)) image = Image.fromarray(augmented['image']) mask = Image.fromarray(augmented['mask']) # Random GridDistortion if random.random() > 0.5: augmented = GridDistortion(p=1)(image=np.array(image), mask=np.array(mask)) image = Image.fromarray(augmented['image']) mask = Image.fromarray(augmented['mask']) # Random OpticalDistortion if random.random() > 0.5: augmented = OpticalDistortion(p=1, distort_limit=1, shift_limit=0.5)( image=np.array(image), mask=np.array(mask)) image = Image.fromarray(augmented['image']) mask = Image.fromarray(augmented['mask']) # Transform to grayscale (1 channel) # image = TF.to_grayscale(image, num_output_channels=1) mask = TF.to_grayscale(mask, num_output_channels=1) if mask is not None else None # Crop the mask to the desired output size # mask = transforms.CenterCrop(img_output_size)(mask) if mask is not None else None # Transform to pytorch tensor and binarize the mask image = TF.to_tensor(image).float() # mask = binarize(TF.to_tensor(mask)).long() if mask is not None else torch.zeros(img_output_size, img_output_size) # mask = binarize(TF.to_tensor(mask)).float() if mask is not None else torch.zeros(img_output_size) mask = TF.to_tensor(np_to_pil(hysteresis_threshold( np_img=pil_to_np(mask)))).squeeze( 0).float() if mask is not None else torch.zeros(img_output_size) return image, mask
def transform(self, image): if self.channel_number == 1: image = image.convert('L') # Resize input image if its dimensions smaller than desired dimensions resize = transforms.Resize(size=self.hr_shape, interpolation=self.downgrade) if not (image.width > self.hr_shape[0] and image.height > self.hr_shape[1]): image = resize(image) # random crop crop = transforms.RandomCrop(size=self.hr_shape) hr_image = crop(image) # downscale to obtain low-resolution image resize = transforms.Resize(size=self.lr_shape, interpolation=self.downgrade) lr_image = resize(hr_image) # apply blur if self.include_blur: lr_image = lr_image.filter( ImageFilter.GaussianBlur(radius=self.blur_radius)) # apply random transforms if self.random_flips: horiz_random, vert_random = self.randomGenerator() # random horizontal flip if horiz_random > 0.5: hr_image = tvF.hflip(hr_image) lr_image = tvF.hflip(lr_image) # random vertical flip if vert_random > 0.5: hr_image = tvF.vflip(hr_image) lr_image = tvF.vflip(lr_image) # apply noise lr_image = np.array(lr_image) hr_image = np.array(hr_image) # if (hr_image.max() - hr_image.min()) == 0 or (lr_image.max() - lr_image.min()) == 0: # print('zero image') if self.include_noise: lr_image = np.array( np.clip((lr_image + np.random.normal( self.noise_mean, self.noise_sigma, lr_image.shape)), a_min=0, a_max=255).astype("uint8")) self.check_nan(lr_image) self.check_nan(hr_image) # desired channel number should be checked if self.channel_number == 3 and lr_image.shape[-1] == 1: lr_image = np.stack([lr_image[np.newaxis, ...]] * 3, axis=0) hr_image = np.stack([hr_image[np.newaxis, ...]] * 3, axis=0) # Transform to tensor hr_image = tvF.to_tensor(Image.fromarray(hr_image)) lr_image = tvF.to_tensor(Image.fromarray(lr_image)) # apply normalization if self.normalize == "zeroMean": # todo Mean & STD of the dataset should be given or It can be calculated in a method hr_means = [hr_image.mean() for i in range(hr_image.shape[0])] lr_means = [lr_image.mean() for i in range(lr_image.shape[0])] hr_stds = [hr_image.std() for i in range(hr_image.shape[0])] lr_stds = [lr_image.std() for i in range(lr_image.shape[0])] if hr_stds[0].item() == 0 or lr_stds[0].item() == 0: hr_image = tvF.normalize(hr_image, hr_means, [1, 1, 1]) lr_image = tvF.normalize(lr_image, lr_means, [1, 1, 1]) else: hr_image = tvF.normalize(hr_image, hr_means, hr_stds) lr_image = tvF.normalize(lr_image, lr_means, lr_stds) elif self.normalize == "between01": hr_mins = [hr_image.min() for i in range(hr_image.shape[0])] lr_mins = [lr_image.min() for i in range(lr_image.shape[0])] hr_ranges = [ hr_image.max() - hr_image.min() for i in range(hr_image.shape[0]) ] lr_ranges = [ lr_image.max() - lr_image.min() for i in range(lr_image.shape[0]) ] if not (hr_ranges[0].item() == 0 or lr_ranges[0].item() == 0): hr_image = tvF.normalize(hr_image, hr_mins, hr_ranges) lr_image = tvF.normalize(lr_image, lr_mins, lr_ranges) # hr_image = tvF.normalize(hr_image, [0.5,], [0.5,]) #lr_image = tvF.normalize(lr_image, [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) else: hr_image = tvF.normalize(hr_image, hr_mins, [1, 1, 1]) lr_image = tvF.normalize(lr_image, lr_mins, [1, 1, 1]) # print('image not correct') elif self.normalize == "divideBy255": hr_image = tvF.normalize(hr_image, [ 0, ], [ 1, ]) lr_image = tvF.normalize(lr_image, [ 0, ], [ 1, ]) else: hr_image = hr_image * 2 - 1 lr_image = lr_image * 2 - 1 # if self.channel_number == 3 & hr_image.size[-1] == 1: # hr_image = hr_image # lr_image = lr_image self.check_nan(lr_image) self.check_nan(hr_image) return lr_image, hr_image
def __call__(self, image, *args): if random.random() < self.flip_prob: image = F.hflip(image) return (image, ) + args
def __getitem__(self, idx): """Returns a pair of images with the given identifier. This is lazy loading of data into memory. Only those image pairs needed for the current batch are loaded. :param idx: image pair identifier :returns: dictionary containing input and output images and their identifier :rtype: dictionary """ while True: if (self.is_inference) or (self.is_valid): input_img = util.ImageProcessing.load_image( self.data_dict[idx]['input_img'], normaliser=self.normaliser) output_img = util.ImageProcessing.load_image( self.data_dict[idx]['output_img'], normaliser=self.normaliser) if self.normaliser == 1: input_img = input_img.astype(np.uint8) output_img = output_img.astype(np.uint8) input_img = TF.to_pil_image(input_img) input_img = TF.to_tensor(input_img) output_img = TF.to_pil_image(output_img) output_img = TF.to_tensor(output_img) if input_img.shape[1] == output_img.shape[2]: output_img = output_img.permute(0, 2, 1) return { 'input_img': input_img, 'output_img': output_img, 'name': self.data_dict[idx]['input_img'].split("/")[-1] } else: output_img = util.ImageProcessing.load_image( self.data_dict[idx]['output_img'], normaliser=self.normaliser) input_img = util.ImageProcessing.load_image( self.data_dict[idx]['input_img'], normaliser=self.normaliser) if self.normaliser == 1: input_img = input_img.astype(np.uint8) output_img = output_img.astype(np.uint8) input_img = TF.to_pil_image(input_img) output_img = TF.to_pil_image(output_img) if not self.is_valid: # Random horizontal flipping if random.random() > 0.5: input_img = TF.hflip(input_img) output_img = TF.hflip(output_img) # Random vertical flipping if random.random() > 0.5: input_img = TF.vflip(input_img) output_img = TF.vflip(output_img) # Random rotation +90 if random.random() > 0.5: input_img = TF.rotate(input_img, 90, expand=True) output_img = TF.rotate(output_img, 90, expand=True) #input_img.save("./"+self.data_dict[idx]['input_img'].split("/")[-1]+"1.png") #output_img.save("./"+self.data_dict[idx]['output_img'].split("/")[-1]+"2.png") # Random rotation -90 if random.random() > 0.5: input_img = TF.rotate(input_img, -90, expand=True) output_img = TF.rotate(output_img, -90, expand=True) #output_img.save("./"+self.data_dict[idx]['output_img'].split("/")[-1]+"2.png") # Transform to tensor #print(output_img.shape) #plt.imsave("./"+self.data_dict[idx]['input_img'].split("/")[-1]+".png", output_img,format='png') input_img = TF.to_tensor(input_img) output_img = TF.to_tensor(output_img) return { 'input_img': input_img, 'output_img': output_img, 'name': self.data_dict[idx]['input_img'].split("/")[-1] }
def flipImages(self, imgs): for idx in range(len(imgs)): imgs[idx] = F.hflip(imgs[idx]) return imgs
def makeAgumentedTest(test, test_labels): num_image = test.shape[0] translatedImages = [] brightenedImages = [] darkenedImages = [] hContrasted = [] lContrasted = [] flippedImages = [] invertedImages = [] for i in range(num_image): img = test[i, :, :, :] img = Image.fromarray(img) # Do Translation translatedIMG = transforms.affine(img, angle=0, translate=(50, 50), scale=1, shear=0) # Do Brigthened brightenedIMG = transforms.adjust_brightness(img, 1.5) # Do Darkened darkenedIMG = transforms.adjust_brightness(img, 0.75) # Do High Contrast highContrastIMG = transforms.adjust_contrast(img, 1.5) # Do Low Contrast lowContrastIMG = transforms.adjust_contrast(img, 0.75) # Flipped Upside Down flippedIMG = transforms.hflip(img) # Colors Inverted invertedIMG = ImageOps.invert(img) translatedImages.append(np.expand_dims(np.array(translatedIMG), axis=0)) brightenedImages.append(np.expand_dims(np.array(brightenedIMG), axis=0)) darkenedImages.append(np.expand_dims(np.array(darkenedIMG), axis=0)) hContrasted.append(np.expand_dims(np.array(highContrastIMG), axis=0)) lContrasted.append(np.expand_dims(np.array(lowContrastIMG), axis=0)) flippedImages.append(np.expand_dims(np.array(flippedIMG), axis=0)) invertedImages.append(np.expand_dims(np.array(invertedIMG), axis=0)) translatedImages = np.vstack(translatedImages) brightenedImages = np.vstack(brightenedImages) darkenedImages = np.vstack(darkenedImages) hContrasted = np.vstack(hContrasted) lContrasted = np.vstack(lContrasted) flippedImages = np.vstack(flippedImages) invertedImages = np.vstack(invertedImages) augmentedTest = { 'translated': translatedImages, 'brightened': brightenedImages, 'darkened': darkenedImages, 'high_contrast': hContrasted, 'low_contrast': lContrasted, 'flipped': flippedImages, 'inverted': invertedImages, 'labels': test_labels } with open('augmented_test.pkl', 'wb') as outfile: pickle.dump(augmentedTest, outfile) print('Done!')
def random_horizontal_flip(images, probability=0.5): if random.random() < probability: images = [tr.hflip(img) for img in images] return images
def __getitem__(self, idx): ## load images self.img_gt = Image.open(os.path.join( self.op_dir, self.files_op[idx])).convert('RGB') self.img = Image.open(os.path.join(self.ip_dir, self.files_ip[idx])).convert('RGB') ## patch generation width, height = self.img.size w = width // 15 h = height // 15 #we ignore the border by not considering `ignore' numbers of rows and columns in the beginning and end ignore = 0 th = (h // 2) * 2 - ignore tw = (w // 2) * 2 - ignore i = ignore j = ignore n_views = self.view_end - self.view_beg + 1 # We do not perform any data augmentation if random.randint(0, 100) > 50: flip_flag = False else: flip_flag = False if random.randint(0, 100) < 20: v_flag = False else: v_flag = False if random.randint(0, 100) > 50: color_jitter_flag = False jitter_order = np.random.permutation(3) else: color_jitter_flag = False for ii in range(self.view_beg, self.view_end + 1): for jj in range(self.view_beg, self.view_end + 1): img_ip_small_c = Ft.crop(self.img, (i + (h * (jj - 1))), (j + (w * (ii - 1))), th, tw) img_ip_small_n = Ft.crop(self.img, (i + (h * (jj))), (j + (w * (ii - 1))), th, tw) img_ip_small_e = Ft.crop(self.img, (i + (h * (jj - 2))), (j + (w * (ii - 1))), th, tw) img_ip_small_s = Ft.crop(self.img, (i + (h * (jj - 1))), (j + (w * (ii))), th, tw) img_ip_small_w = Ft.crop(self.img, (i + (h * (jj - 1))), (j + (w * (ii - 2))), th, tw) img_op_small = Ft.crop(self.img_gt, (i + (h * (jj - 1))), (j + (w * (ii - 1))), th, tw) if flip_flag: img_ip_small_c = Ft.hflip(img_ip_small_c) img_ip_small_n = Ft.hflip(img_ip_small_n) img_ip_small_e = Ft.hflip(img_ip_small_e) img_ip_small_s = Ft.hflip(img_ip_small_s) img_ip_small_w = Ft.hflip(img_ip_small_w) img_op_small = Ft.hflip(img_op_small) if v_flag: img_ip_small_c = Ft.vflip(img_ip_small_c) img_ip_small_n = Ft.vflip(img_ip_small_n) img_ip_small_e = Ft.vflip(img_ip_small_e) img_ip_small_s = Ft.vflip(img_ip_small_s) img_ip_small_w = Ft.vflip(img_ip_small_w) img_op_small = Ft.vflip(img_op_small) img_ip_small_c = self.transforms(img_ip_small_c) img_ip_small_w = self.transforms(img_ip_small_w) img_ip_small_s = self.transforms(img_ip_small_s) img_ip_small_e = self.transforms(img_ip_small_e) img_ip_small_n = self.transforms(img_ip_small_n) img_op_small = self.transforms(img_op_small) if color_jitter_flag: img_ip_small_c = img_ip_small_c[jitter_order, ...] img_ip_small_w = img_ip_small_w[jitter_order, ...] img_ip_small_s = img_ip_small_s[jitter_order, ...] img_ip_small_e = img_ip_small_e[jitter_order, ...] img_ip_small_n = img_ip_small_n[jitter_order, ...] img_op_small = img_op_small[jitter_order, ...] img_ip_small_concat = torch.cat([ img_ip_small_c, img_ip_small_e, img_ip_small_n, img_ip_small_s, img_ip_small_w ], dim=0) img_ip_small_concat = torch.unsqueeze(img_ip_small_concat, 0) img_op_small = torch.unsqueeze(img_op_small, 0) if ii == self.view_beg and jj == self.view_beg: imgs_ip_indv = img_ip_small_concat imgs_ip_hid = img_ip_small_c imgs_op = img_op_small else: imgs_ip_indv = torch.cat( (imgs_ip_indv, img_ip_small_concat), 0) imgs_ip_hid = torch.cat((imgs_ip_hid, img_ip_small_c), 0) imgs_op = torch.cat((imgs_op, img_op_small), 0) ##### permute = [] imgs_ip_hid = torch.unsqueeze(imgs_ip_hid, 0) # LFs are very huge and may take lot RAM. Thus to save computation for your system only 2 SAIs are restored. Remove the below `choice' variable for restoring all 64 SAIs choice = np.asarray([12, 35]) return { 'imgs_op': imgs_op[choice, ...], 'imgs_ip_indv': imgs_ip_indv[choice, ...], 'imgs_ip_hid': imgs_ip_hid }
def __call__(self, sample): sample['image'] = TF.hflip(sample['image']) for i in range(len(sample['label'])): sample['label'][i] = TF.hflip(sample['label'][i]) return sample
def __call__(self, tensors: List[Tensor]) -> List[Tensor]: if torch.rand(1) < self.p: return [TF.hflip(x) for x in tensors] return tensors
def test_eval(): id_net.eval() fnames = [] ids = [] ids_list = list(range(2874)) im_name_list = [] root = "./../face_a/train" encoder = DataEncoder() list_file = "./../face_a/train.csv" file_list = csv.reader(open(list_file,'r')) file_list = list(file_list) # 2874 for content_counter in range(len(file_list)): fnames.append(os.path.join(root, file_list[content_counter][0])) ids.append(int(file_list[content_counter][1])) for id_counter in range(2874): seq_num = ids.index(id_counter) im_name_list.append(fnames[seq_num]) del(ids[seq_num]) del(fnames[seq_num]) im_name_valid = fnames[:400] im_name_train = fnames[400:]+im_name_list ids_valid = ids[:400] ids_train = ids[400:]+ids_list eval_list_feature = torch.zeros(len(ids_list), 1024) for i in range(len(ids_list)): name = im_name_list[i] img = Image.open(name).convert('RGB') img = alignment(img) img, img_ = transform(img), transform(F.hflip(img)) img, img_ = Variable(img.unsqueeze(0).cuda(), volatile=True), Variable(img_.unsqueeze(0).cuda(), volatile=True) print(i) face_feature = torch.cat((id_net(img), id_net(img_)), 1).data.cpu()[0] eval_list_feature[i,:] = face_feature id_ = [] for i in range(len(ids_valid)): #pdb.set_trace() name = im_name_valid[i] img = Image.open(name).convert('RGB') img = alignment(img) img, img_ = transform(img), transform(F.hflip(img)) img, img_ = Variable(img.unsqueeze(0).cuda(), volatile=True), Variable(img_.unsqueeze(0).cuda(), volatile=True) face_feature = torch.cat((id_net(img), id_net(img_)), 1).data.cpu()[0] dis = [] for gallery_counter in range(eval_list_feature.size(0)): f1 = eval_list_feature[gallery_counter, :] f2 = face_feature cos_dis = f1.dot(f2) / (f1.norm() * f2.norm() + 1e-5) dis.append(float(cos_dis)) id_num = dis.index(max(dis)) id_.append(str(ids_list[id_num])) pdb.set_trace() acc_counter =0 for id_counter in range(len(id_)): if id_[id_counter] == ids_valid[id_counter]: acc_counter +=1 print(acc_counter/400.0)
def transform(self, img1, img2): # resize image and covert to tensor img1 = TF.to_pil_image(img1) img1 = TF.resize(img1, [self.img_size, self.img_size], interpolation=3) img2 = TF.to_pil_image(img2) img2 = TF.resize(img2, [self.img_size, self.img_size], interpolation=3) if self.with_random_hflip and random.random() > 0.5: img1 = TF.hflip(img1) img2 = TF.hflip(img2) if self.with_random_vflip and random.random() > 0.5: img1 = TF.vflip(img1) img2 = TF.vflip(img2) if self.with_random_rot90 and random.random() > 0.5: img1 = TF.rotate(img1, 90) img2 = TF.rotate(img2, 90) if self.with_random_rot180 and random.random() > 0.5: img1 = TF.rotate(img1, 180) img2 = TF.rotate(img2, 180) if self.with_random_rot270 and random.random() > 0.5: img1 = TF.rotate(img1, 270) img2 = TF.rotate(img2, 270) if self.with_random_crop and random.random() > 0.5: i, j, h, w = transforms.RandomResizedCrop(size=self.img_size). \ get_params(img=img1, scale=(0.5, 1.0), ratio=(0.9, 1.1)) img1 = TF.resized_crop(img1, i, j, h, w, size=(self.img_size, self.img_size)) img2 = TF.resized_crop(img2, i, j, h, w, size=(self.img_size, self.img_size)) if self.with_random_patch: i, j, h, w = transforms.RandomResizedCrop(size=self.img_size). \ get_params(img=img1, scale=(1/16.0, 1/9.0), ratio=(0.9, 1.1)) img1 = TF.resized_crop(img1, i, j, h, w, size=(self.img_size, self.img_size)) img2 = TF.resized_crop(img2, i, j, h, w, size=(self.img_size, self.img_size)) # to tensor img1 = TF.to_tensor(img1) img2 = TF.to_tensor(img2) return img1, img2
def __getitem__(self, idx): """ Function to get a sample from the dataset. First both RGB and Semantic images are read in PIL format. Then transformations are applied from PIL to Numpy arrays to Tensors. For regular usage: - Images should be outputed with dimensions (3, W, H) - Semantic Images should be outputed with dimensions (1, W, H) In the case that 10-crops are used: - Images should be outputed with dimensions (10, 3, W, H) - Semantic Images should be outputed with dimensions (10, 1, W, H) :param idx: Index :return: Dictionary containing {RGB image, semantic segmentation mask, scene category index} """ # Get RGB image path and load it img_name = os.path.join(self.image_dir, "images", self.set, (self.filenames[idx] + ".jpg")) img = Image.open(img_name) # Convert it to RGB if gray-scale if img.mode is not "RGB": img = img.convert("RGB") # Load semantic segmentation ground-truth # semGT_name = os.path.join(self.image_dir, "annotations", self.set, (self.filenames[idx] + ".png")) # semGT = Image.open(semGT_name) # Load semantic segmentation mask sem_name = os.path.join(self.image_dir, ("noisy_annotations" + self.RGB), self.set, (self.filenames[idx] + ".png")) sem = Image.open(sem_name) # Load semantic segmentation scores sem_score_name = os.path.join(self.image_dir, ("noisy_scores" + self.RGB), self.set, (self.filenames[idx] + ".png")) semScore = Image.open(sem_score_name) # Apply transformations depending on the set (train, val) if self.set is "training": # Define Random crop. If image is smaller resize first. bilinearResize_trans = transforms.Resize(self.resizeSize) nearestResize_trans = transforms.Resize( self.resizeSize, interpolation=Image.NEAREST) img = bilinearResize_trans(img) # semGT = nearestResize_trans(semGT) sem = nearestResize_trans(sem) semScore = bilinearResize_trans(semScore) # Extract Random Crop parameters i, j, h, w = transforms.RandomCrop.get_params( img, output_size=(self.outputSize, self.outputSize)) # Apply Random Crop parameters img = TF.crop(img, i, j, h, w) # semGT = TF.crop(semGT, i, j, h, w) sem = TF.crop(sem, i, j, h, w) semScore = TF.crop(semScore, i, j, h, w) # Random horizontal flipping if random.random() > 0.5: img = TF.hflip(img) # semGT = TF.hflip(semGT) sem = TF.hflip(sem) semScore = TF.hflip(semScore) # Apply transformations from ImgAug library img = np.asarray(img) # semGT = np.asarray(semGT) sem = np.asarray(sem) semScore = np.asarray(semScore) img = np.squeeze( self.seq.augment_images(np.expand_dims(img, axis=0))) # semGT = np.squeeze(self.seq_sem.augment_images(np.expand_dims(np.expand_dims(semGT, 0), 3))) if self.SemRGB: sem = np.squeeze( self.seq_sem.augment_images(np.expand_dims(sem, 0))) semScore = np.squeeze( self.seq_sem.augment_images(np.expand_dims(semScore, 0))) else: sem = np.squeeze( self.seq_sem.augment_images( np.expand_dims(np.expand_dims(sem, 0), 3))) semScore = np.squeeze( self.seq_sem.augment_images( np.expand_dims(np.expand_dims(semScore, 0), 3))) # Apply not random transforms. To tensor and normalization for RGB. To tensor for semantic segmentation. img = self.train_transforms_img(img) # semGT = self.train_transforms_sem(semGT) sem = self.train_transforms_sem(sem) semScore = self.train_transforms_scores(semScore) else: img = self.val_transforms_img(img) # semGT = self.val_transforms_sem(semGT) sem = self.val_transforms_sem(sem) semScore = self.val_transforms_scores(semScore) if not self.TenCrop: if not self.SemRGB: assert img.shape[0] == 3 and img.shape[ 1] == self.outputSize and img.shape[2] == self.outputSize # assert semGT.shape[0] == 1 and semGT.shape[1] == self.outputSize and semGT.shape[2] == self.outputSize assert sem.shape[0] == 1 and sem.shape[ 1] == self.outputSize and sem.shape[2] == self.outputSize assert semScore.shape[0] == 1 and semScore.shape[ 1] == self.outputSize and semScore.shape[ 2] == self.outputSize else: assert img.shape[0] == 3 and img.shape[ 1] == self.outputSize and img.shape[2] == self.outputSize assert sem.shape[0] == 3 and sem.shape[ 1] == self.outputSize and sem.shape[2] == self.outputSize assert semScore.shape[0] == 3 and semScore.shape[ 1] == self.outputSize and semScore.shape[ 2] == self.outputSize else: if not self.SemRGB: assert img.shape[0] == 10 and img.shape[ 2] == self.outputSize and img.shape[3] == self.outputSize # assert semGT.shape[0] == 10 and semGT.shape[2] == self.outputSize and semGT.shape[3] == self.outputSize assert sem.shape[0] == 10 and sem.shape[ 2] == self.outputSize and sem.shape[3] == self.outputSize assert semScore.shape[0] == 10 and semScore.shape[ 2] == self.outputSize and semScore.shape[ 3] == self.outputSize else: assert img.shape[0] == 10 and img.shape[ 2] == self.outputSize and img.shape[3] == self.outputSize assert sem.shape[0] == 10 and sem.shape[ 2] == self.outputSize and sem.shape[3] == self.outputSize assert semScore.shape[0] == 10 and semScore.shape[ 2] == self.outputSize and semScore.shape[ 3] == self.outputSize # Create dictionary self.sample = { 'Image': img, 'Semantic': sem, 'Semantic Scores': semScore, 'Scene Index': self.classes.index(self.labels[idx]) } return self.sample
def transform(self, image): hr_image = image # downscale to obtain low-resolution image resize = transforms.Resize(size=self.lr_shape, interpolation=self.downgrade) lr_image = resize(hr_image) # apply blur if self.include_blur: lr_image = lr_image.filter( ImageFilter.GaussianBlur(radius=self.blur_radius)) # apply random transforms if self.random_flips: horiz_random, vert_random = self.randomGenerator() # random horizontal flip if horiz_random > 0.5: hr_image = tvF.hflip(hr_image) lr_image = tvF.hflip(lr_image) # random vertical flip if vert_random > 0.5: hr_image = tvF.vflip(hr_image) lr_image = tvF.vflip(lr_image) # apply noise lr_image = np.array(lr_image) hr_image = np.array(hr_image) if self.include_noise: lr_image = np.array( np.clip((lr_image + np.random.normal( self.noise_mean, self.noise_sigma, lr_image.shape)), a_min=0, a_max=255).astype("uint8")) # desired channel number should be checked if self.channel_number == 3 and lr_image.shape[-1] == 1: lr_image = np.stack([lr_image[np.newaxis, ...]] * 3, axis=0) hr_image = np.stack([hr_image[np.newaxis, ...]] * 3, axis=0) # Transform to tensor hr_image = tvF.to_tensor(Image.fromarray(hr_image)) lr_image = tvF.to_tensor(Image.fromarray(lr_image)) # apply normalization if self.normalize == "zeroMean": # todo Mean & STD of the dataset should be given or It can be calculated in a method hr_means = [hr_image.mean() for i in range(hr_image.shape[0])] lr_means = [lr_image.mean() for i in range(lr_image.shape[0])] hr_stds = [hr_image.std() for i in range(hr_image.shape[0])] lr_stds = [lr_image.std() for i in range(lr_image.shape[0])] if hr_stds[0].item() == 0 or lr_stds[0].item() == 0: hr_image = tvF.normalize(hr_image, hr_means, [ 1, ]) lr_image = tvF.normalize(lr_image, lr_means, [ 1, ]) else: hr_image = tvF.normalize(hr_image, hr_means, hr_stds) lr_image = tvF.normalize(lr_image, lr_means, lr_stds) elif self.normalize == "between01": hr_mins = [hr_image.min() for i in range(hr_image.shape[0])] lr_mins = [lr_image.min() for i in range(lr_image.shape[0])] hr_ranges = [ hr_image.max() - hr_image.min() for i in range(hr_image.shape[0]) ] lr_ranges = [ lr_image.max() - lr_image.min() for i in range(lr_image.shape[0]) ] if not (hr_ranges[0].item() == 0 or lr_ranges[0].item() == 0): hr_image = tvF.normalize(hr_image, hr_mins, hr_ranges) lr_image = tvF.normalize(lr_image, lr_mins, lr_ranges) else: hr_image = tvF.normalize(hr_image, hr_mins, [ 1, ]) lr_image = tvF.normalize(lr_image, lr_mins, [ 1, ]) elif self.normalize == "divideBy255": hr_image = tvF.normalize(hr_image, [ 0, ], [ 1, ]) lr_image = tvF.normalize(lr_image, [ 0, ], [ 1, ]) else: hr_image = hr_image * 2 - 1 lr_image = lr_image * 2 - 1 # print(lr_image.cpu().detach().numpy().max(), hr_image.cpu().detach().numpy().min()) return lr_image, hr_image
def __call__(self, img, lbl): if random.random() < self.p: img = F.hflip(img) lbl = F.hflip(lbl) return img, lbl
def __call__(self, image, bboxes): if random.random() < self.p: return F.hflip(image), bboxes.hflip() else: return image, bboxes
def __call__(self, x): if self.horiz: return F.hflip(x) return F.vflip(x)
def __getitem__(self, index): # get day files from index rgb_day_file = self.rgb_day_files[index] ir_day_file = self.ir_day_files[index] label_day_file = self.label_day_files[index] sun_altitude_day = self.sun_altitudes_day[index] # get night files randomly rand_idx = random.randint(0, len(self.fl_rgb_night_files) - 1) rgb_night_file = self.rgb_night_files[rand_idx] ir_night_file = self.ir_night_files[rand_idx] sun_altitude_night = self.sun_altitudes_night[rand_idx] # READ rgb_day = cv2.imread(rgb_day_file) ir_day = cv2.imread(ir_day_file, cv2.IMREAD_ANYDEPTH) rgb_night = cv2.imread(rgb_night_file) ir_night = cv2.imread(ir_night_file, cv2.IMREAD_ANYDEPTH) label_day = cv2.imread(label_day_file, cv2.IMREAD_GRAYSCALE) # COLORS rgb_day = cv2.cvtColor(rgb_day, cv2.COLOR_BGR2RGB) rgb_night = cv2.cvtColor(rgb_night, cv2.COLOR_BGR2RGB) # resizing res = (960, 320) rgb_day = cv2.resize(rgb_day, res, interpolation=cv2.INTER_LINEAR) ir_day = cv2.resize(ir_day, res, interpolation=cv2.INTER_LINEAR) rgb_night = cv2.resize(rgb_night, res, interpolation=cv2.INTER_LINEAR) ir_night = cv2.resize(ir_night, res, interpolation=cv2.INTER_LINEAR) label_day = cv2.resize(label_day, res, interpolation=cv2.INTER_NEAREST) if self.contrast_enhancement: applyClaheCV(self.clahe, rgb_day) applyClaheCV(self.clahe, rgb_night) # Crop results in 320 * 700 ir_day = ir_day[:, 150:850] ir_night = ir_night[:, 150:850] label_day = label_day[:, 150:850] rgb_day = rgb_day[:, 150:850, :] rgb_night = rgb_night[:, 150:850, :] i, j, h, w = transforms.RandomCrop.get_params( Image.fromarray(rgb_day), (self.height, self.width)) ir_day = ir_day[i:(i + h), j:(j + w)] ir_night = ir_night[i:(i + h), j:(j + w)] label_day = label_day[i:(i + h), j:(j + w)] rgb_day = rgb_day[i:(i + h), j:(j + w), :] rgb_night = rgb_night[i:(i + h), j:(j + w), :] # normalize IR data (is in range 0, 2**16 --> crop to relevant range(20800, 27000)) minval = 21800 maxval = 25000 ir_day[ir_day < minval] = minval ir_day[ir_day > maxval] = maxval ir_night[ir_night < minval] = minval ir_night[ir_night > maxval] = maxval ir_day = (ir_day - minval) / (maxval - minval) ir_night = (ir_night - minval) / (maxval - minval) # Modality block dropping (i_d, j_d, h_d, w_d) drop_lenght_h = int(random.uniform(100, 300)) drop_lenght_w = int(random.uniform(100, 500)) i_d, j_d, h_d, w_d = transforms.RandomCrop.get_params( Image.fromarray(rgb_day), (drop_lenght_h, drop_lenght_w)) mod_drop_params = torch.Tensor([i_d, j_d, h_d, w_d]) ''' Perform other data augmentations (random crop already implemented): - FlipLR - Normalize to [-1, 1] - Rotate ''' # convert to PIL images ir_day = ir_day.astype(np.float32) ir_night = ir_night.astype(np.float32) ir_day = Image.fromarray(ir_day) ir_night = Image.fromarray(ir_night) rgb_day = Image.fromarray(rgb_day) rgb_night = Image.fromarray(rgb_night) label_day = Image.fromarray(label_day, mode='L') # Random horizontal flipping if random.random() > 0.5: ir_day = F.hflip(ir_day) rgb_day = F.hflip(rgb_day) label_day = F.hflip(label_day) if random.random() > 0.5: ir_night = F.hflip(ir_night) rgb_night = F.hflip(rgb_night) # random rotation if random.random() > 0.5: angle = (random.random() - 0.5) * 40 # random angle in [-20, 20] ir_day = F.rotate(ir_day, angle, resample=Image.BILINEAR) rgb_day = F.rotate(rgb_day, angle, resample=Image.BILINEAR) # label_day = F.rotate(label_day, angle, resample=Image.NEAREST, fill=12) label_day = F.rotate(label_day, angle, resample=Image.NEAREST) if random.random() > 0.5: angle = (random.random() - 0.5) * 40 # random angle in [-20, 20] ir_night = F.rotate(ir_night, angle, resample=Image.BILINEAR) rgb_night = F.rotate(rgb_night, angle, resample=Image.BILINEAR) # To Tensor label_day = np.array(label_day).astype(np.uint8) rgb_day = F.to_tensor(rgb_day) rgb_night = F.to_tensor(rgb_night) label_day = torch.from_numpy(label_day) ir_day = F.to_tensor(ir_day) ir_night = F.to_tensor(ir_night) # Normalization if self.db_stats: rgb_day = F.normalize(rgb_day, **self.db_stats) rgb_night = F.normalize(rgb_night, **self.db_stats) else: rgb_day = F.normalize(rgb_day, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) rgb_night = F.normalize(rgb_night, mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ir_day = F.normalize(ir_day, mean=[0.5], std=[0.5]) ir_night = F.normalize(ir_night, mean=[0.5], std=[0.5]) out_dict = {} out_dict['rgb_day'] = rgb_day out_dict['label_day'] = label_day out_dict['rgb_night'] = rgb_night out_dict['ir_day'] = ir_day out_dict['ir_night'] = ir_night out_dict['sun_altitude_day'] = sun_altitude_day out_dict['sun_altitude_night'] = sun_altitude_night out_dict['mod_drop_params'] = mod_drop_params return out_dict
def torchvision(self, img): return torchvision.hflip(img)
def __getitem__(self, index): """Reads an image from a file and preprocesses it and returns.""" image_path = self.image_paths[index] filename = image_path.split('_')[-1][:-len(".jpg")] GT_path = self.GT_paths + 'ISIC_' + filename + '_segmentation.png' image = Image.open(image_path) GT = Image.open(GT_path) # 宽高比 aspect_ratio = image.size[1] / image.size[0] Transform = [] ResizeRange = random.randint(300, 320) Transform.append( T.Resize((int(ResizeRange * aspect_ratio), ResizeRange))) p_transform = random.random() if (self.mode == 'train') and p_transform <= self.augmentation_prob: RotationDegree = random.randint(0, 3) RotationDegree = self.RotationDegree[RotationDegree] if (RotationDegree == 90) or (RotationDegree == 270): aspect_ratio = 1 / aspect_ratio Transform.append(T.RandomRotation( (RotationDegree, RotationDegree))) RotationRange = random.randint(-10, 10) Transform.append(T.RandomRotation((RotationRange, RotationRange))) CropRange = random.randint(250, 270) Transform.append( T.CenterCrop((int(CropRange * aspect_ratio), CropRange))) Transform = T.Compose(Transform) image = Transform(image) GT = Transform(GT) ShiftRange_left = random.randint(0, 20) ShiftRange_upper = random.randint(0, 20) ShiftRange_right = image.size[0] - random.randint(0, 20) ShiftRange_lower = image.size[1] - random.randint(0, 20) image = image.crop(box=(ShiftRange_left, ShiftRange_upper, ShiftRange_right, ShiftRange_lower)) GT = GT.crop(box=(ShiftRange_left, ShiftRange_upper, ShiftRange_right, ShiftRange_lower)) if random.random() < 0.5: image = F.hflip(image) GT = F.hflip(GT) if random.random() < 0.5: image = F.vflip(image) GT = F.vflip(GT) Transform = T.ColorJitter(brightness=0.2, contrast=0.2, hue=0.02) image = Transform(image) Transform = [] Transform.append( T.Resize( (int(256 * aspect_ratio) - int(256 * aspect_ratio) % 16, 256))) Transform.append(T.ToTensor()) Transform = T.Compose(Transform) image = Transform(image) GT = Transform(GT) Norm_ = T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) image = Norm_(image) return image, GT