def perspective(img, mask): startpoint, endpoint = transforms.RandomPerspective.get_params( 512, 512, 0.5) img = TF.perspective(img, startpoints=startpoint, endpoints=endpoint) mask = TF.perspective(mask, startpoints=startpoint, endpoints=endpoint) return img, mask
def test_perspective_interpolation_warning(tester): # assert changed type warning spoints = [[0, 0], [33, 0], [33, 25], [0, 25]] epoints = [[3, 2], [32, 3], [30, 24], [2, 25]] tensor = torch.randint(0, 256, (3, 26, 26)) with tester.assertWarnsRegex(UserWarning, r"Argument interpolation should be of type InterpolationMode"): res1 = F.perspective(tensor, startpoints=spoints, endpoints=epoints, interpolation=2) res2 = F.perspective(tensor, startpoints=spoints, endpoints=epoints, interpolation=BILINEAR) tester.assertTrue(res1.equal(res2))
def __call__(self, img, mask): if not F._is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) if random.random() < self.p: width, height = img.size startpoints, endpoints = self.get_params(width, height, self.distortion_scale) return F.perspective(img, startpoints, endpoints, self.interpolation), \ F.perspective(mask, startpoints, endpoints, Image.NEAREST) return img, mask
def forward(self, image, mask): if torch.rand(1) < self.p: width, height = F._get_image_size(image) startpoints, endpoints = self.get_params(width, height, self.distortion_scale) return F.perspective(image, startpoints, endpoints, self.interpolation, self.fill), F.perspective( mask, startpoints, endpoints, self.interpolation, self.fill) return image, mask
def __call__(self, sample): image, mask = sample['image'], sample['mask'] if np.random.rand() < self.p: width, height = image.size startpoints, endpoints = self.get_params(width, height, self.distortion_scale) image = F.perspective(image, startpoints, endpoints, self.interpolation) mask = F.perspective(mask, startpoints, endpoints, self.interpolation) return {'image': image, 'mask': mask}
def __call__(self, image, target): if random.random() < self.p: width, height = image.size startpoints, endpoints = self.get_params(width, height, self.distortion_scale) image = F.perspective(image, startpoints, endpoints, self.interpolation, self.fill) target = F.perspective(target, startpoints, endpoints, self.interpolation, self.fill) return image, target return image, target
def __call__(self, sample): """ Args: img (PIL Image or Tensor): Image to be Perspectively transformed. Returns: PIL Image or Tensor: Randomly transformed image. """ img, target = sample['image'], sample['target'] if torch.rand(1) < self.p: width, height = F._get_image_size(img) startpoints, endpoints = self.get_params(width, height, self.distortion_scale) return {'image': F.perspective(img, startpoints, endpoints, self.interpolation, self.fill), 'target': F.perspective(img, startpoints, endpoints, Image.NEAREST, self.ignore_label)} return {'image': img,'target': target}
def test_perspective_pil_vs_tensor(device, dims_and_points, dt, fill, fn, tester): if dt == torch.float16 and device == "cpu": # skip float16 on CPU case return data_dims, (spoints, epoints) = dims_and_points tensor, pil_img = tester._create_data(*data_dims, device=device) if dt is not None: tensor = tensor.to(dtype=dt) interpolation = NEAREST fill_pil = int(fill[0]) if fill is not None and len(fill) == 1 else fill out_pil_img = F.perspective(pil_img, startpoints=spoints, endpoints=epoints, interpolation=interpolation, fill=fill_pil) out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))) out_tensor = fn(tensor, startpoints=spoints, endpoints=epoints, interpolation=interpolation, fill=fill).cpu() if out_tensor.dtype != torch.uint8: out_tensor = out_tensor.to(torch.uint8) num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2] # Tolerance : less than 5% of different pixels assert ratio_diff_pixels < 0.05
def normal_perspective(trainData, distortion_scale=0.5, nstd=1 / 3): s = trainData.size() width = s[-1] height = s[-2] half_height = height // 2 half_width = width // 2 startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]] r = torch.normal(0, nstd, (len(trainData), 4, 2)).abs() r[r > 1] = 1 r[:, :, 0] *= distortion_scale * half_width r[:, :, 1] *= distortion_scale * half_height r[:, 1, 0] = width - r[:, 1, 0] r[:, 2, 0] = width - r[:, 2, 0] r[:, 2, 1] = height - r[:, 2, 1] r[:, 3, 1] = height - r[:, 3, 1] for i in range(len(trainData)): # rw=torch.randint(0,int(distortion_scale*half_width),size=[4]).tolist() # rh=torch.randint(0,int(distortion_scale*half_height),size=[4]).tolist() # topleft=[rw[0],rh[0]] # topright=[width-rw[1],rh[1]] # botright=[width-rw[2],height-rh[2]] # botleft=[rw[3],height-rh[3]] endpoints = r[i].tolist() trainData[i] = F.perspective(trainData[i], startpoints=startpoints, endpoints=endpoints) pass
def uniform_perspective(trainData, distortion_scale=0.5, p=0.5): s = trainData.size() width = s[-1] height = s[-2] half_height = height // 2 half_width = width // 2 startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]] for i in range(len(trainData)): r = torch.rand(size=(1, ), dtype=torch.float32).item() if r > p: continue rw = torch.randint(0, int(distortion_scale * half_width), size=[4]).tolist() rh = torch.randint(0, int(distortion_scale * half_height), size=[4]).tolist() topleft = [rw[0], rh[0]] topright = [width - rw[1], rh[1]] botright = [width - rw[2], height - rh[2]] botleft = [rw[3], height - rh[3]] endpoints = [topleft, topright, botright, botleft] trainData[i] = F.perspective(trainData[i], startpoints=startpoints, endpoints=endpoints) pass
def _test_perspective(self, tensor, pil_img, scripted_transform, test_configs): dt = tensor.dtype for r in [NEAREST, ]: for spoints, epoints in test_configs: out_pil_img = F.perspective(pil_img, startpoints=spoints, endpoints=epoints, interpolation=r) out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))) for fn in [F.perspective, scripted_transform]: out_tensor = fn(tensor, startpoints=spoints, endpoints=epoints, interpolation=r).cpu() if out_tensor.dtype != torch.uint8: out_tensor = out_tensor.to(torch.uint8) num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2] # Tolerance : less than 5% of different pixels self.assertLess( ratio_diff_pixels, 0.05, msg="{}: {}\n{} vs \n{}".format( (r, dt, spoints, epoints), ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7] ) )
def test_perspective(self): from torchvision.transforms import RandomPerspective data = [self._create_data(26, 34, device=self.device), self._create_data(26, 26, device=self.device)] scripted_transform = torch.jit.script(F.perspective) for tensor, pil_img in data: test_configs = [ [[[0, 0], [33, 0], [33, 25], [0, 25]], [[3, 2], [32, 3], [30, 24], [2, 25]]], [[[3, 2], [32, 3], [30, 24], [2, 25]], [[0, 0], [33, 0], [33, 25], [0, 25]]], [[[3, 2], [32, 3], [30, 24], [2, 25]], [[5, 5], [30, 3], [33, 19], [4, 25]]], ] n = 10 test_configs += [ RandomPerspective.get_params(pil_img.size[0], pil_img.size[1], i / n) for i in range(n) ] for dt in [None, torch.float32, torch.float64, torch.float16]: if dt == torch.float16 and torch.device(self.device).type == "cpu": # skip float16 on CPU case continue if dt is not None: tensor = tensor.to(dtype=dt) self._test_perspective(tensor, pil_img, scripted_transform, test_configs) batch_tensors = self._create_data_batch(26, 36, num_samples=4, device=self.device) if dt is not None: batch_tensors = batch_tensors.to(dtype=dt) for spoints, epoints in test_configs: self._test_fn_on_batch( batch_tensors, F.perspective, startpoints=spoints, endpoints=epoints, interpolation=NEAREST ) # assert changed type warning spoints = [[0, 0], [33, 0], [33, 25], [0, 25]] epoints = [[3, 2], [32, 3], [30, 24], [2, 25]] with self.assertWarnsRegex(UserWarning, r"Argument interpolation should be of type InterpolationModes"): res1 = F.perspective(tensor, startpoints=spoints, endpoints=epoints, interpolation=2) res2 = F.perspective(tensor, startpoints=spoints, endpoints=epoints, interpolation=BILINEAR) self.assertTrue(res1.equal(res2))
def test_perspective(self): from torchvision.transforms import RandomPerspective data = [ self._create_data(26, 34, device=self.device), self._create_data(26, 26, device=self.device) ] for tensor, pil_img in data: scripted_tranform = torch.jit.script(F.perspective) test_configs = [ [[[0, 0], [33, 0], [33, 25], [0, 25]], [[3, 2], [32, 3], [30, 24], [2, 25]]], [[[3, 2], [32, 3], [30, 24], [2, 25]], [[0, 0], [33, 0], [33, 25], [0, 25]]], [[[3, 2], [32, 3], [30, 24], [2, 25]], [[5, 5], [30, 3], [33, 19], [4, 25]]], ] n = 10 test_configs += [ RandomPerspective.get_params(pil_img.size[0], pil_img.size[1], i / n) for i in range(n) ] for r in [ 0, ]: for spoints, epoints in test_configs: out_pil_img = F.perspective(pil_img, startpoints=spoints, endpoints=epoints, interpolation=r) out_pil_tensor = torch.from_numpy( np.array(out_pil_img).transpose((2, 0, 1))) for fn in [F.perspective, scripted_tranform]: out_tensor = fn(tensor, startpoints=spoints, endpoints=epoints, interpolation=r).cpu() num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 ratio_diff_pixels = num_diff_pixels / out_tensor.shape[ -1] / out_tensor.shape[-2] # Tolerance : less than 5% of different pixels self.assertLess(ratio_diff_pixels, 0.05, msg="{}: {}\n{} vs \n{}".format( (r, spoints, epoints), ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]))
def random_affine(self, image, label): # 随机仿射(随机偏移,随机旋转,随机放缩等整合) if random.random() > 0.5: # 透视变换 RandomPerspective width, height = image.size startpoints, endpoints = transforms.RandomPerspective.get_params( width, height, 0.5) # 0值填充,仍是原始图像大小,需要resize image = tf.perspective(image, startpoints, endpoints, interpolation=Image.BICUBIC, fill=self.image_fill) label = tf.perspective(label, startpoints, endpoints, interpolation=Image.NEAREST, fill=self.label_fill) elif random.random() < 0.5: # TODO 将degrees等参数传出,由用户设置 # 随机旋转-平移-缩放-错切 4种仿射变换 pytorch实现的是保持中心不变 不错切 ret = transforms.RandomAffine.get_params(degrees=(-180, 180), translate=(0.3, 0.3), scale_ranges=(0.3, 3), shears=None, img_size=image.size) # angle, translations, scale, shear = ret # 0值填充,仍是原始图像大小,需要resize image = tf.affine(image, *ret, resample=0, fillcolor=self.image_fill) # PIL.Image.NEAREST label = tf.affine(label, *ret, resample=0, fillcolor=self.label_fill) # 将图像处理成要求的大小 image = tf.resize(image, self.input_hw, interpolation=Image.BILINEAR) label = tf.resize(label, self.input_hw, interpolation=Image.NEAREST) return image, label
def four_point_transform(image, pts, imshape): # obtain a consistent order of the points and unpack them # individually rect = order_points(pts) original = np.array([[0, 0], [imshape[1] - 1, 0], [imshape[1] - 1, imshape[0] - 1], [0, imshape[0] - 1]], dtype="float32") warped = TF.perspective(image, original, rect) return np.asarray(warped)
def __call__(self, img, target): """ Args: img (PIL Image): Image to be Perspectively transformed. Returns: PIL Image: Random perspectivley transformed image. """ if not F._is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) if self.custom != None: target = self.custom(target) if random.random() < self.p: width, height = img.size startpoints, endpoints = self.get_params(width, height, self.distortion_scale) if isinstance(target, Image) and target.size == img.size: target = F.perspective(target, startpoints, endpoints, self.interpolation) return F.perspective(img, startpoints, endpoints, self.interpolation), target return img, target
def transform(self, index): input, target = map(lambda im: Image.open(im), (self.images[index], self.labels[index])) tfm_input, tfm_target = (tf.resize(input, (360, 480)), tf.resize(target, (360, 480), interpolation=Image.NEAREST)) if self.is_train and False: # Affine transformations max_dx = 0.1 * tfm_input.size[0] max_dy = 0.1 * tfm_input.size[1] translations = (np.round(random.uniform(-max_dx, max_dx)), np.round(random.uniform(-max_dy, max_dy))) rotation = random.uniform(0, 15) tfm_input, tfm_target = tf.affine(tfm_input, rotation, translations, 1, 0), \ tf.affine(tfm_target, rotation, translations, 1, 0) if random.random() < 0.5: width, height = input.size startpoints, endpoints = self.get_params(width, height, 0.5) tfm_input, tfm_target = tf.perspective(tfm_input, startpoints, endpoints), \ tf.perspective(tfm_target, startpoints, endpoints) tfm_input, tfm_target = map(tf.to_tensor, (tfm_input, tfm_target)) torch.clamp((255 * tfm_target), 0, 32, out=tfm_target) return tf.normalize(tfm_input, (0.5, ), (0.5, )), tfm_target.long()
def modify(self, image, seed): random.seed(seed) rand_n = random.uniform(0, 1) image = TF.to_pil_image(image) if rand_n > 0.5 and self.transforms: image = TF.hflip(image) angle = transforms.RandomRotation.get_params((-20, 20)) image = TF.rotate(image, angle) if rand_n > 0.3: w, h = image.size start, end = transforms.RandomPerspective.get_params(w, h, 0.2) image = TF.perspective(image, start, end, interpolation=Image.BICUBIC) image = TF.to_tensor(image) mean = [image[i, :, :].mean() / 255 for i in range(3)] image = TF.normalize(image, mean=mean, std=[1, 1, 1]) return image
def __call__(self, img): return F.perspective(img, self.startpoints, self.endpoints, self.interpolation)
def get_perspective(distortion_scale, p=0.5): distort = random.random() < p return transforms.Lambda(lambda x: TF.perspective( x, *transforms.RandomPerspective.get_params(*x.size, distortion_scale)) if distort else x)
def apply_transform(self, img, mask, current_transform=None): if current_transform is None: current_transform = self.transform if isinstance(current_transform, (transforms.Compose)): for transform in current_transform.transforms: img, mask = self.apply_transform(img, mask, transform) elif isinstance(current_transform, (transforms.RandomApply)): if current_transform.p >= random.random(): img, mask = self.apply_transform(img, mask, current_transform.transforms) elif isinstance(current_transform, (transforms.RandomChoice)): t = random.choice(current_transform.transforms) img, mask = self.apply_transform(img, mask, t) elif isinstance(current_transform, (transforms.RandomOrder)): order = list(range(len(current_transform.transforms))) random.shuffle(order) for i in order: img, mask = self.apply_transform( img, mask, current_transform.transforms[i]) elif isinstance( current_transform, ( transforms.CenterCrop, transforms.FiveCrop, transforms.TenCrop, transforms.ToTensor, transforms.Grayscale, transforms.Resize, ), ): img = current_transform(img) mask = current_transform(mask) elif isinstance( current_transform, (transforms.Normalize, transforms.Lambda, transforms.Pad)): img = current_transform(img) # mask = current_transform(mask) # apply on input only elif isinstance(current_transform, (transforms.ColorJitter)): transform = current_transform.get_params( current_transform.brightness, current_transform.contrast, current_transform.saturation, current_transform.hue, ) for lambda_transform in transform.transforms: img = lambda_transform(img) elif isinstance(current_transform, (transforms.RandomAffine)): ret = current_transform.get_params( current_transform.degrees, current_transform.translate, current_transform.scale, current_transform.shear, img.size, ) img = F.affine( img, *ret, resample=current_transform.resample, fillcolor=current_transform.fillcolor, ) mask = F.affine( mask, *ret, resample=current_transform.resample, fillcolor=current_transform.fillcolor, ) elif isinstance(current_transform, (transforms.RandomCrop)): i, j, h, w = current_transform.get_params(img, current_transform.size) img = F.crop(img, i, j, h, w) mask = F.crop(mask, i, j, h, w) elif isinstance(current_transform, (transforms.RandomHorizontalFlip)): if random.random() < current_transform.p: img = F.hflip(img) mask = F.hflip(mask) elif isinstance(current_transform, (transforms.RandomVerticalFlip)): if random.random() < current_transform.p: img = F.vflip(img) mask = F.vflip(mask) elif isinstance(current_transform, (transforms.RandomPerspective)): if random.random() < current_transform.p: width, height = img.size startpoints, endpoints = current_transform.get_params( width, height, current_transform.distortion_scale) img = F.perspective(img, startpoints, endpoints, current_transform.interpolation) mask = F.perspective(mask, startpoints, endpoints, current_transform.interpolation) elif isinstance(current_transform, (transforms.RandomResizedCrop)): ret = current_transform.get_params(img, current_transform.scale, current_transform.ratio) img = F.resized_crop(img, *ret, current_transform.size, current_transform.interpolation) mask = F.resized_crop(mask, *ret, current_transform.size, current_transform.interpolation) elif isinstance(current_transform, (transforms.RandomRotation)): angle = current_transform.get_params(current_transform.degrees) img = F.rotate( img, angle, current_transform.resample, current_transform.expand, current_transform.center, ) mask = F.rotate( mask, angle, current_transform.resample, current_transform.expand, current_transform.center, ) elif isinstance(current_transform, (transforms.RandomErasing)): if random.uniform(0, 1) < current_transform.p: x, y, h, w, v = current_transform.get_params( img, scale=current_transform.scale, ratio=current_transform.ratio, value=current_transform.value, ) img = F.erase(img, x, y, h, w, v, current_transform.inplace) # mask = F.erase(mask, x, y, h, w, v, current_transform.inplace) else: raise NotImplementedError( f'Transform "{current_transform}" not implemented yet') return img, mask