def test_pad_with_non_constant_padding_modes(self): """Unit tests for edge, reflect, symmetric padding""" img = torch.zeros(3, 27, 27).byte() img[:, :, 0] = 1 # Constant value added to leftmost edge img = transforms.ToPILImage()(img) img = F.pad(img, 1, (200, 200, 200)) # pad 3 to all sidess edge_padded_img = F.pad(img, 3, padding_mode='edge') # First 6 elements of leftmost edge in the middle of the image, values are in order: # edge_pad, edge_pad, edge_pad, constant_pad, constant value added to leftmost edge, 0 edge_middle_slice = np.asarray(edge_padded_img).transpose(2, 0, 1)[0][17][:6] assert np.all(edge_middle_slice == np.asarray([200, 200, 200, 200, 1, 0])) assert transforms.ToTensor()(edge_padded_img).size() == (3, 35, 35) # Pad 3 to left/right, 2 to top/bottom reflect_padded_img = F.pad(img, (3, 2), padding_mode='reflect') # First 6 elements of leftmost edge in the middle of the image, values are in order: # reflect_pad, reflect_pad, reflect_pad, constant_pad, constant value added to leftmost edge, 0 reflect_middle_slice = np.asarray(reflect_padded_img).transpose(2, 0, 1)[0][17][:6] assert np.all(reflect_middle_slice == np.asarray([0, 0, 1, 200, 1, 0])) assert transforms.ToTensor()(reflect_padded_img).size() == (3, 33, 35) # Pad 3 to left, 2 to top, 2 to right, 1 to bottom symmetric_padded_img = F.pad(img, (3, 2, 2, 1), padding_mode='symmetric') # First 6 elements of leftmost edge in the middle of the image, values are in order: # sym_pad, sym_pad, sym_pad, constant_pad, constant value added to leftmost edge, 0 symmetric_middle_slice = np.asarray(symmetric_padded_img).transpose(2, 0, 1)[0][17][:6] assert np.all(symmetric_middle_slice == np.asarray([0, 1, 200, 200, 1, 0])) assert transforms.ToTensor()(symmetric_padded_img).size() == (3, 32, 34)
def __call__(self, img, mask): assert img.size == mask.size x_offset = int(2 * (random.random() - 0.5) * self.offset[0]) y_offset = int(2 * (random.random() - 0.5) * self.offset[1]) x_crop_offset = x_offset y_crop_offset = y_offset if x_offset < 0: x_crop_offset = 0 if y_offset < 0: y_crop_offset = 0 cropped_img = tf.crop( img, y_crop_offset, x_crop_offset, img.size[1] - abs(y_offset), img.size[0] - abs(x_offset), ) if x_offset >= 0 and y_offset >= 0: padding_tuple = (0, 0, x_offset, y_offset) elif x_offset >= 0 and y_offset < 0: padding_tuple = (0, abs(y_offset), x_offset, 0) elif x_offset < 0 and y_offset >= 0: padding_tuple = (abs(x_offset), 0, 0, y_offset) elif x_offset < 0 and y_offset < 0: padding_tuple = (abs(x_offset), abs(y_offset), 0, 0) return ( tf.pad(cropped_img, padding_tuple, padding_mode="reflect"), tf.affine( mask, translate=(-x_offset, -y_offset), scale=1.0, angle=0.0, shear=0.0, fillcolor=250, ), )
def pad_with_mask(self, image, target, param): # assumes that we only pad on the bottom right corners padded_image = F.pad(image, param) assert padded_image.size == self.target_size if target is None: return padded_image, None target = target.copy() h, w = padded_image.size mask = torch.ones(size=(h, w), dtype=torch.bool) mask[param[1]:h - param[3], param[0]:w - param[2]] = 0 # should we do something wrt the original size? target.update({"nest_mask": mask}) target["size"] = torch.tensor(padded_image.size[::-1]) target["boxes"] = target["boxes"] + torch.tensor( [param[0], param[1], param[0], param[1]]) if "masks" in target: _param = (param[0], param[2], param[1], param[3]) target['masks'] = torch.nn.functional.pad(target['masks'], _param) return padded_image, target
def forward( self, image: Tensor, target: Optional[Dict[str, Tensor]] = None ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: if isinstance(image, torch.Tensor): if image.ndimension() not in {2, 3}: raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.") elif image.ndimension() == 2: image = image.unsqueeze(0) if torch.rand(1) >= self.p: return image, target _, orig_h, orig_w = F.get_dimensions(image) r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0]) canvas_width = int(orig_w * r) canvas_height = int(orig_h * r) r = torch.rand(2) left = int((canvas_width - orig_w) * r[0]) top = int((canvas_height - orig_h) * r[1]) right = canvas_width - (left + orig_w) bottom = canvas_height - (top + orig_h) if torch.jit.is_scripting(): fill = 0 else: fill = self._get_fill_value(F._is_pil_image(image)) image = F.pad(image, [left, top, right, bottom], fill=fill) if isinstance(image, torch.Tensor): # PyTorch's pad supports only integers on fill. So we need to overwrite the colour v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1) image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h) :, :] = image[ ..., :, (left + orig_w) : ] = v if target is not None: target["boxes"][:, 0::2] += left target["boxes"][:, 1::2] += top return image, target
def __call__(self, image): width, height = image.size left_pad = 0 right_pad = 0 top_pad = 0 bottom_pad = 0 if width > height: total_pad_size = width - height top_pad = int(total_pad_size / 2) bottom_pad = total_pad_size - top_pad elif width < height: total_pad_size = height - width left_pad = int(total_pad_size / 2) right_pad = total_pad_size - left_pad image = image.convert('RGB') return pad(image, (left_pad, top_pad, right_pad, bottom_pad), fill=(255, 255, 255))
def pad( img: torch.Tensor, new_size: Union[int, Tuple[int, int]], ) -> torch.Tensor: """torchscript-compatible implementation of pad. Args: img (torch.Tensor): image with shape [..., height, width] to pad new_size (Union[int, Tuple[int, int]]): size to pad to. If int, resizes to square image of that size. Returns: torch.Tensor: padded image of size [..., size[0], size[1]] or [..., size, size] if size is int. """ new_size = to_tuple(new_size) old_size = img.shape[-2:] pad_size = (torch.tensor(new_size) - torch.tensor(old_size)) / 2 padding = torch.cat((torch.floor(pad_size), torch.ceil(pad_size))) padding[padding < 0] = 0 padding = [int(x) for x in padding] return F.pad(img, padding=padding, padding_mode="edge")
def __call__(self, img, target): w, h = img.size if isinstance(self.size, int): w_ratio, h_ratio = self.size / min(w, h), self.size / min(w, h) else: if w / h != self.size[1] / self.size[0] and self.scale_with_padding: if w / h < self.size[1] / self.size[0]: pad = (int((h * self.size[1] / self.size[0] - w) / 2), 0) else: pad = (0, int((w * self.size[0] / self.size[1] - h) / 2)) img = F.pad(img, pad) target['boxes'][:, (0, 2)] = target['boxes'][:, (0, 2)] + pad[0] target['boxes'][:, (1, 3)] = target['boxes'][:, (1, 3)] + pad[1] w_ratio, h_ratio = self.size[1] / img.size[0], self.size[0] / img.size[1] img = F.resize(img, self.size) target['boxes'][:, (0, 2)] = (target['boxes'][:, (0, 2)] * w_ratio).floor() target['boxes'][:, (1, 3)] = (target['boxes'][:, (1, 3)] * h_ratio).floor() return img, target
def __call__(self, img): width, height = img.size max_size = max(width, height) left, right, top, bottom = 0, 0, 0, 0 if width > height: top = bottom = (width - height) // 2 if (width - height) % 2 == 1: top += 1 elif height > width: left = right = (height - width) // 2 if (height - width) % 2 == 1: left += 1 img = pad(img, (left, top, right, bottom), fill=self.fill, padding_mode=self.padding_mode) img = resize(img, size=self.size, interpolation=self.interpolation) return img
def undo_transform(self, sample): rdict = {} input_data = sample['input'] params = self.get_params(sample) th, tw = self.size for i in range(len(input_data)): fh, fw, w, h = params[i] pad_left = fw pad_right = w - pad_left - tw pad_top = fh pad_bottom = h - pad_top - th padding = (pad_left, pad_top, pad_right, pad_bottom) input_data[i] = F.pad(input_data[i], padding) rdict['input'] = input_data sample.update(rdict) return sample
def __call__(self, input_image, target_image): if self.padding is not None: input_image = F.pad(input_image, self.padding, self.fill, self.padding_mode) target_image = F.pad(target_image, self.padding, self.fill, self.padding_mode) # pad the width if needed if self.pad_if_needed and input_image.size[0] < self.size[1]: input_image = F.pad(input_image, (self.size[1] - input_image.size[0], 0), self.fill, self.padding_mode) target_image = F.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode) # pad the height if needed if self.pad_if_needed and input_image.size[1] < self.size[0]: input_image = F.pad(input_image, (0, self.size[0] - input_image.size[1]), self.fill, self.padding_mode) target_image = F.pad(target_image, (0, self.size[0] - target_image.size[1]), self.fill, self.padding_mode) i, j, h, w = self.get_params(input_image, self.size) return F.crop(input_image, i, j, h, w), F.crop(target_image, i, j, h, w)
def __getitem__(self, index): filename = self.filenames[index] with open(image_path(self.images_root, filename, '.jpg'), 'rb') as f: image = load_image(f).convert('RGB') with open(image_path(self.labels_root, filename, '.png'), 'rb') as f: label = load_image(f).convert('P') if self.input_transform is None and self.target_transform is None: tw, th = 256, 256 # tw = random.randint(image.size[0]//2, image.size[0]) # th = random.randint(image.size[1]//2, image.size[1]) padding = (max(0, tw - image.size[0]), max(0, th - image.size[1])) image = F.pad(image, padding) iw, ih = image.size[0], image.size[1] if iw == tw and tw == th: bi, bj = 0, 0 else: bi = random.randint(0, ih - th) bj = random.randint(0, iw - tw) self.input_transform = Compose([ Crop(bi, bj, th, tw), ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) self.target_transform = Compose([ Crop(bi, bj, th, tw), ToLabel(), Relabel(255, 0), ]) if self.input_transform is not None: image = self.input_transform(image) if self.target_transform is not None: label = self.target_transform(label) return image, label
def __getitem__(self, index): """Return one image and its corresponding attribute label.""" dataset = self.train_dataset if self.mode == 'train' else self.test_dataset facefile, keypointfile = dataset[index] faceimage = Image.open(os.path.join(self.face_dir, facefile)) keypointimage = Image.open( os.path.join(self.keypoints_dir, keypointfile)) angle = random.randint(-5, 5) rotate_faceimage = TF.resize(faceimage, (224, 224)) rotate_faceimage = TF.pad(rotate_faceimage, padding=(62, 62), padding_mode='edge') rotate_keypointimage = TF.resize(keypointimage, (224, 224)) rotate_faceimage = TF.rotate(rotate_faceimage, angle) rotate_faceimage = TF.center_crop(rotate_faceimage, (224, 224)) rotate_keypointimage = TF.rotate(rotate_keypointimage, angle) return self.transform(rotate_faceimage), self.transform( rotate_keypointimage)
def pad(image, target, padding): # pad_left, pad_top, pad_right, pad_bottom padded_image = F.pad(image, padding) if target is None: return padded_image, None target = target.copy() # should we do something wrt the original size? w, h = padded_image.size if "boxes" in target: # correct xyxy from left and right paddings target["boxes"] += torch.tensor( [padding[0], padding[1], padding[0], padding[1]]) target["size"] = torch.tensor([h, w]) if "masks" in target: # padding_left, padding_right, padding_top, padding_bottom target['masks'] = torch.nn.functional.pad( target['masks'], (padding[0], padding[2], padding[1], padding[3])) return padded_image, target
def __call__(self, x): w, h = x.size if w >= self.size: hp = 0 else: hp = int((self.size - w) / 2) if h >= self.size: vp = 0 else: vp = int((self.size - h) / 2) padding = (hp + (1 if (self.size - w) % 2 == 0 else 0), vp + (1 if (self.size - h) % 2 == 0 else 0), hp, vp) res = tvF.pad(x, padding, 0, 'constant') res = tvF.crop(res, randint(0, max(0, w - self.size)), randint(0, max(0, h - self.size)), self.size, self.size) return res
def __call__(self, img, expand=False, pad=False): # pad = False CIFAR if pad and self.angle % 90 != 0: w, h = img.size # # deterimne crop size (without cutting the image) # nw, nh = F.rotate(img, self.angle, expand=True).size rad_angle = np.deg2rad(self.angle) dw = np.abs(np.ceil( w * (np.cos(rad_angle) * np.sin(rad_angle)))).astype(int) dh = np.abs(np.ceil( h * (np.cos(rad_angle) * np.sin(rad_angle)))).astype(int) img = F.pad(img, padding=(dw, dh), padding_mode='reflect') # actual rotation img = F.rotate(img, self.angle, fill=(0, )) #img = F.center_crop(img, (nw, nh)) #img = F.center_crop(img, (w, h)) # no remove for CIFAR else: img = F.rotate(img, self.angle, expand=expand, fill=(0, )) return img
def __getitem__(self, index): image_path = self.images[index] # image_path = image_path.replace('SW_VBCus', 'SW_VBSoft') # image_path = image_path.replace('/DB/rhome/bllai/Data/DATA3/Vertebrae/Sagittal', # for ai-research server # '/mnt/lustre/ai-vision/home/yz891/bllai/Data/Vertebrae_Collapse') image = Image.open(image_path) image = Image.fromarray(np.asarray(image)[:, :, 0]) if not self.useRGB else image # 得到的RGB图片三通道数值相等,只选择其中一个 if self.padding: # 调整图像长边为224,以下代码出自torchvision.transforms.functional.resize size = 224 w, h = image.size if max(w, h) == size: ow, oh = w, h pass elif w < h: ow = int(size * w / h) oh = size image = image.resize((ow, oh), resample=Image.BILINEAR) else: ow = size oh = int(size * h / w) image = image.resize((ow, oh), resample=Image.BILINEAR) # 将短边补齐到224 image = functional.pad(image, fill=0, padding_mode='constant', padding=((size - ow) // 2, (size - oh) // 2, (size - ow) - (size - ow) // 2, (size - oh) - (size - oh) // 2)) else: # resize到224*224 image = functional.resize(image, (224, 224)) image = self.trans(image) label = self.labels[index] # if label == 0: # image = self.neg_trans(image) # else: # image = self.trans(image) return image, label, image_path
def __call__(self, sample): image, polygon, labels = sample["image"], sample["polygon"], sample[ "labels"] """ :param sample: :type sample: :return: :rtype: """ w, h = image.size y1, x1 = polygon[1, 1] - polygon[0, 1], polygon[1, 0] - polygon[0, 0] y2, x2 = polygon[2, 1] - polygon[1, 1], polygon[2, 0] - polygon[1, 0] object_w = max((np.sqrt(x1 * x1 + y1 * y1)), (np.sqrt(x2 * x2 + y2 * y2))) if (x1 * x1 + y1 * y1) > (x2 * x2 + y2 * y2): angle = np.arctan2(y1, x1) * 180 / np.pi else: angle = np.arctan2(y2, x2) * 180 / np.pi pad = max(0, int(10 - (w - object_w) / 2)) image = functional.pad(img=image, padding=[pad], padding_mode='edge') for i in range(polygon.shape[0]): polygon[i][0] = polygon[i][0] + pad polygon[i][1] = polygon[i][1] + pad w, h = image.size image = functional.rotate(img=image, angle=angle) angle_rad = np.pi * angle / 180 w_r, h_r = w / 2, -h / 2 for i in range(polygon.shape[0]): x0, y0 = polygon[i][0], -polygon[i][1] polygon[i][0] = (x0 - w_r) * np.cos(angle_rad) - ( y0 - h_r) * np.sin(angle_rad) + w_r polygon[i][1] = -((x0 - w_r) * np.sin(angle_rad) + (y0 - h_r) * np.cos(angle_rad) + h_r) sample = {'image': image, 'polygon': polygon, 'labels': labels} return sample
def __call__(self, sample): """ Args: sample (dict): image (PIL Image) to be cropped and landmarks points to be adjusted Returns: PIL Image: Cropped image. """ image, landmarks = sample['image'], sample['landmarks'] orig_w, orig_h = image.size if self.padding > 0: image = F.pad(image, self.padding) left = top = right = bottom = 0 if type(self.padding) == int: left = top = right = bottom = self.padding elif len(self.padding) == 2: left, top = self.padding right = left bottom = left elif len(self.padding) == 4: left, top, right, bottom = self.padding landmarks[:, 0] += left / orig_w landmarks[:, 1] += top / orig_h # i: upper pixel coordinate # j: left pixel coordinate i, j, h, w = self.get_params(image, self.size) landmarks -= [j / orig_w, i / orig_h] landmarks *= [orig_w / w, orig_h / h] image = F.crop(image, i, j, h, w) return {'image': image, 'landmarks': landmarks}
def __call__(self, img): """Randomly resize and 0-pad the given PIL. Parameters ---------- img PIL.Image : input image. Returns ------- img PIL.Image : trasnsormed image. """ # Randomly resize the image. resize = random.randint(img.width, self.size) resized_img = F.resize(img, resize) # 0-pad the resized image. 0-pad to all left, right, top and bottom. pad_size = self.size - resize padded_img = F.pad(resized_img, pad_size, fill=0) # Crop the padded image to get (size, size) image. pos_top = random.randint(0, pad_size) pos_left = random.randint(0, pad_size) transformed_img = F.crop(padded_img, pos_top, pos_left, self.size, self.size) return transformed_img
def __call__(self, img): w, h = img.size if h > w: r = self.size / float(h) resized_w = int(w * r) resized_h = self.size img = img.resize((resized_w, resized_h)) pad_x_left = int((self.size - resized_w) / 2.) pad_x_right = self.size - resized_w - pad_x_left pad_y_top = 0 pad_y_bottom = 0 else: r = self.size / float(w) resized_w = self.size resized_h = int(h * r) img = img.resize((resized_w, resized_h)) pad_x_left = 0 pad_x_right = 0 pad_y_top = int((self.size - resized_h) / 2.) pad_y_bottom = self.size - resized_h - pad_y_top padding = (pad_x_left, pad_y_top, pad_x_right, pad_y_bottom) return TF.pad(img, padding=padding, padding_mode='symmetric')
def __call__(self, img, target=None, mask=None): """ Args: img (PIL Image): Image to be cropped. target (PIL Image): (optional) Target to be cropped Returns: PIL Images: Cropped image(s). """ if self.padding > 0: img = F.pad(img, self.padding) if target is not None: target = F.pad(target, self.padding) # pad the width if needed if self.pad_if_needed and img.size[0] < self.size[1]: img = F.pad(img, (int((1 + self.size[1] - img.size[0]) / 2), 0)) if target is not None: target = F.pad(target, (int((1 + self.size[1] - target.size[0]) / 2), 0)) if mask is not None: target = F.pad(mask, (int((1 + self.size[1] - mask.size[0]) / 2), 0)) # pad the height if needed if self.pad_if_needed and img.size[1] < self.size[0]: img = F.pad(img, (0, int((1 + self.size[0] - img.size[1]) / 2))) if target is not None: target = F.pad(target, (0, int((1 + self.size[0] - target.size[1]) / 2))) if mask is not None: mask = F.pad(mask, (0, int((1 + self.size[0] - mask.size[1]) / 2))) i, j, h, w = self.get_params(img, self.size) if target is not None and mask is None: return F.crop(img, i, j, h, w), F.crop(target, i, j, h, w) if target is not None and mask is not None: return F.crop(img, i, j, h, w), F.crop(target, i, j, h, w), F.crop(mask, i, j, h, w) return F.crop(img, i, j, h, w)
def __call__(self, img, lab): if self.padding is not None: img = F.pad(img, self.padding, self.fill, self.padding_mode) if isinstance(lab, np.ndarray): lab = np.pad(lab, ((self.padding[1], self.padding[3]), (self.padding[0], self.padding[2]), (0, 0)), mode='constant') else: lab = F.pad(lab, self.padding, self.label_fill, self.padding_mode) # pad the width if needed if self.pad_if_needed and img.size[0] < self.size[1]: img = F.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode) if isinstance(lab, np.ndarray): lab = np.pad(lab, ((0, 0), (self.size[1] - img.size[0], self.size[1] - img.size[0]), (0, 0)), mode='constant') else: lab = F.pad(lab, (self.size[1] - lab.size[0], 0), self.label_fill, self.padding_mode) # pad the height if needed if self.pad_if_needed and img.size[1] < self.size[0]: img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode) if isinstance(lab, np.ndarray): lab = np.pad( lab, ((self.size[0] - img.size[1], self.size[0] - img.size[1]), (0, 0), (0, 0)), mode='constant') else: lab = F.pad(lab, (0, self.size[0] - lab.size[1]), self.label_fill, self.padding_mode) i, j, h, w = self.get_params(img, self.size) img = F.crop(img, i, j, h, w) if isinstance(lab, np.ndarray): # assert the shape of label is in the order of (h, w, c) lab = lab[i:i + h, j:j + w, :] else: lab = F.crop(lab, i, j, h, w) return img, lab
def resize_image(image, desired_width=768, desired_height=384, random_pad=False): """Resizes an image keeping the aspect ratio mostly unchanged. Returns: image: the resized image window: (x1, y1, x2, y2). If max_dim is provided, padding might be inserted in the returned image. If so, this window is the coordinates of the image part of the full image (excluding the padding). The x2, y2 pixels are not included. scale: The scale factor used to resize the image padding: Padding added to the image [left, top, right, bottom] """ # Default window (x1, y1, x2, y2) and default scale == 1. w, h = image.size width_scale = desired_width / w height_scale = desired_height / h scale = min(width_scale, height_scale) # Resize image using bilinear interpolation if scale != 1: image = functional.resize(image, (round(h * scale), round(w * scale))) w, h = image.size y_pad = desired_height - h # 其实是有一个为0的,要么x要么y x_pad = desired_width - w top_pad = random.randint(0, y_pad) if random_pad else y_pad // 2 left_pad = random.randint(0, x_pad) if random_pad else x_pad // 2 padding = (left_pad, top_pad, x_pad - left_pad, y_pad - top_pad ) # https://blog.csdn.net/Rocky6688/article/details/104475911 assert all([x >= 0 for x in padding]) image = functional.pad(image, padding) window = [left_pad, top_pad, w + left_pad, h + top_pad] return image, window, scale, padding
def __call__(self, image, target): w, h = image.size new_h = min(h, self.crop_size) new_w = min(w, self.crop_size) while True: top = np.random.randint(0, h - new_h + 1) left = np.random.randint(0, w - new_w + 1) box = (left, top, left + new_w, top + new_h) # should make sure target crop method does not modify itself new_target = target.crop(box, remove_empty=True) # Attention: If Densebox does not support empty targets, random crop # should not provide empty targets # if len(new_target) > 0 or random.random() > self.discard_prob: if len(new_target) > 0: target = new_target break image = F.crop(image, top, left, new_h, new_w) if new_h < self.crop_size or new_w < self.crop_size: padding = (0, 0, (self.crop_size - new_w), (self.crop_size - new_h)) image = F.pad(image, padding=padding) target = target.pad(padding) return image, target
def __getitem__(self, idx): image = Image.open(self.image_list[idx]) label_1 = Image.open(self.label_list_1[idx]) label_2 = Image.open(self.label_list_2[idx]) w, h = image.size if self.random_scale: scale = int(min(w, h) * (np.random.uniform() + 0.5)) resize_bl = transforms.Resize(size=scale, interpolation=PIL.Image.BILINEAR) resize_nn = transforms.Resize(size=scale, interpolation=PIL.Image.NEAREST) image = resize_bl(image) label_1 = resize_nn(label_1) label_2 = resize_nn(label_2) if self.random_mirror: if np.random.uniform() < 0.5: image = TF.hflip(image) label_1 = TF.hflip(label_1) label_2 = TF.hflip(label_2) if self.random_crop: # pad the width if needed if image.size[0] < self.output_size[1]: image = TF.pad(image, (self.output_size[1] - image.size[0], 0)) label_1 = TF.pad(label_1, (self.output_size[1] - label_1.size[0], 0), self.ignore_label, 'constant') label_2 = TF.pad(label_2, (self.output_size[1] - label_2.size[0], 0), tuple([self.ignore_label] * 3), 'constant') # pad the height if needed if image.size[1] < self.output_size[0]: image = TF.pad(image, (0, self.output_size[0] - image.size[1])) label_1 = TF.pad(label_1, (0, self.output_size[0] - label_1.size[1]), self.ignore_label, 'constant') label_2 = TF.pad(label_2, (0, self.output_size[0] - label_2.size[1]), tuple([self.ignore_label] * 3), 'constant') i, j, h, w = transforms.RandomCrop.get_params( image, output_size=self.output_size) image = TF.crop(image, i, j, h, w) label_1 = TF.crop(label_1, i, j, h, w) label_2 = TF.crop(label_2, i, j, h, w) image = self.normalize(self.to_tensor(np.array(image) - 255.).float() + 255.) label_1 = self.to_tensor(np.array(label_1) - 255.) + 255. label_2 = self.to_tensor(np.array(label_2) - 255.) + 255. return image, label_1.long(), label_2.float()
def __call__(self, x): height, width = x.shape[-2:] aspect_ratio = width / height if width > height: new_width = self._size new_height = round(new_width / aspect_ratio) else: new_height = self._size new_width = round(aspect_ratio * new_height) resized = FileCheck.resize(x, [new_height, new_width], interpolation=self._interpolation) pad_width = self._size - new_width pad_left = pad_width // 2 pad_right = pad_width - pad_left pad_height = self._size - new_height pad_top = pad_height // 2 pad_bottom = pad_height - pad_top return F.pad(resized, [pad_left, pad_top, pad_right, pad_bottom])
def __call__(self, img): """ Pad the image to a specified size. Args: img (PIL Image): Image to be padded. Returns: PIL Image: Padded image. """ if self.padding > 100: sz = img.size delta_w = self.padding - sz[0] delta_h = self.padding - sz[1] t, b = delta_h // 2, delta_h - (delta_h // 2) l, r = delta_w // 2, delta_w - (delta_w // 2) pad = (l, t, r, b) else: pad = self.padding return F.pad(img, pad, self.fill, self.padding_mode)
def image_to_tensor(image, resolution=None, paddingval=None, padding_mode='constant', do_imagenet_norm=True, do_padding=True): if isinstance(image, str): image = Image.open(image).convert('RGB') if image.width != image.height: # if not square image, crop the long side's edges to make it square r = min(image.width, image.height) image = tr.center_crop(image, (r, r)) if do_padding: # if not square image, crop the long side's edges to make it square image = tr.pad(image, padding=paddingval, padding_mode=padding_mode, fill=0) # image = tr.pad(input=data, mode='reflect', value=0) if resolution is not None: #f size is an int, smaller edge of the image will be matched to this number image = tr.resize(image, resolution) image = tr.to_tensor(image) if do_imagenet_norm: image = imagenet_norm(image) return image
def _transform_image(self, x): # PIL images must be in format float 0-1 gray scale: min_x = torch.min(x) x_transformed = x - min_x max_x = torch.max(x) x_transformed /= max_x fillcolor = int( -min_x / max_x * 255 ) # Value to use to fill so that when reconverted to tensor, fill value is 0. self.affine_transform.fillcolor = fillcolor x_pil = tf.to_pil_image( x_transformed ) # Conversion to PIl image looses quality because it is converted to 0-255 gray scale. x_pil = tf.crop( self.affine_transform(tf.pad(x_pil, self.padding, fill=fillcolor)), self.padding, self.padding, self.bank_height, self.bank_width) x_transformed = tf.to_tensor(x_pil) x_transformed *= max_x x_transformed += min_x return x_transformed
def extend_mnist(Xtr, Ytr, N=1000, degrees=15, scale=(.85, 1.11), shear=15): Xtr_torch = torch.from_numpy(Xtr).reshape((-1, 1, 28, 28)) AffineTransform = RandomAffine(degrees=degrees, scale=scale, shear=shear) ex_Xtr = np.zeros((N, 28, 28), dtype=Xtr.dtype) ex_Ytr = np.zeros((N, ), dtype=Ytr.dtype) for i in range(N): idx = np.random.randint(Xtr.shape[0]) X = Xtr_torch[idx] X_pil = tf.pad(tf.to_pil_image(X), 3) # params, X_transform = AffineTransform(X_pil) X_transform = AffineTransform(X_pil) X_transform = tf.to_tensor(tf.crop(X_transform, 3, 3, 28, 28)).numpy().reshape(28, 28) if Xtr.dtype == np.uint8: X_transform *= 255 # trans_title = f'trans-d={params[0]:.2f}-scale={params[2]:.2f}-shear={params[3]:.2f}' # trans_title = f'trans' # plot_images([Xtr[i].reshape(28,28), X_transform], ['orig', trans_title]) ex_Xtr[i] = X_transform ex_Ytr[i] = Ytr[idx] return np.concatenate((Xtr, ex_Xtr)), np.concatenate((Ytr, ex_Ytr))
def __call__(self, img): """ Args: img (PIL Image): Image to be scaled. Returns: PIL Image: Rescaled image. """ img_w, img_h = img.size h, w = self.size if img_w / img_h > w / h: resized_w = w resized_h = int(img_h / img_w * resized_w) padding = int((h - resized_h) / 2) padding = (0, padding, 0, h - resized_h - padding) else: resized_h = h resized_w = int(img_w / img_h * resized_h) padding = int((w - resized_w) / 2) padding = (padding, 0, w - resized_w - padding, 0) resized_image = F.resize(img, (resized_h, resized_w), self.interpolation) img = F.pad(resized_image, padding) return img
def torchvision(self, img): if img.size[0] < 512: img = torchvision.pad(img, (int((1 + 512 - img.size[0]) / 2), 0), padding_mode='reflect') if img.size[1] < 512: img = torchvision.pad(img, (0, int((1 + 512 - img.size[1]) / 2)), padding_mode='reflect') return img