def forward( self, image: Tensor, target: Optional[Dict[str, Tensor]] = None ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: _, orig_height, orig_width = F.get_dimensions(image) min_size = self.min_size[torch.randint(len(self.min_size), (1, )).item()] r = min(min_size / min(orig_height, orig_width), self.max_size / max(orig_height, orig_width)) new_width = int(orig_width * r) new_height = int(orig_height * r) image = F.resize(image, [new_height, new_width], interpolation=self.interpolation) if target is not None: target["boxes"][:, 0::2] *= new_width / orig_width target["boxes"][:, 1::2] *= new_height / orig_height if "masks" in target: target["masks"] = F.resize( target["masks"], [new_height, new_width], interpolation=InterpolationMode.NEAREST) return image, target
def forward( self, image: Tensor, target: Optional[Dict[str, Tensor]] = None ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: if isinstance(image, torch.Tensor): if image.ndimension() not in {2, 3}: raise ValueError( f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions." ) elif image.ndimension() == 2: image = image.unsqueeze(0) _, orig_height, orig_width = F.get_dimensions(image) scale = self.scale_range[0] + torch.rand(1) * (self.scale_range[1] - self.scale_range[0]) r = min(self.target_size[1] / orig_height, self.target_size[0] / orig_width) * scale new_width = int(orig_width * r) new_height = int(orig_height * r) image = F.resize(image, [new_height, new_width], interpolation=self.interpolation) if target is not None: target["boxes"][:, 0::2] *= new_width / orig_width target["boxes"][:, 1::2] *= new_height / orig_height if "masks" in target: target["masks"] = F.resize( target["masks"], [new_height, new_width], interpolation=InterpolationMode.NEAREST) return image, target
def preprocessing(img, target): img = trans(img) size = F.get_dimensions(img)[1:] target = F.resize(target, size, interpolation=InterpolationMode.NEAREST) return img, F.pil_to_tensor(target)
def forward(self, batch: Tensor, target: Tensor) -> Tuple[Tensor, Tensor]: """ Args: batch (Tensor): Float tensor of size (B, C, H, W) target (Tensor): Integer tensor of size (B, ) Returns: Tensor: Randomly transformed batch. """ if batch.ndim != 4: raise ValueError(f"Batch ndim should be 4. Got {batch.ndim}") if target.ndim != 1: raise ValueError(f"Target ndim should be 1. Got {target.ndim}") if not batch.is_floating_point(): raise TypeError( f"Batch dtype should be a float tensor. Got {batch.dtype}.") if target.dtype != torch.int64: raise TypeError( f"Target dtype should be torch.int64. Got {target.dtype}") if not self.inplace: batch = batch.clone() target = target.clone() if target.ndim == 1: target = torch.nn.functional.one_hot( target, num_classes=self.num_classes).to(dtype=batch.dtype) if torch.rand(1).item() >= self.p: return batch, target # It's faster to roll the batch by one instead of shuffling it to create image pairs batch_rolled = batch.roll(1, 0) target_rolled = target.roll(1, 0) # Implemented as on cutmix paper, page 12 (with minor corrections on typos). lambda_param = float( torch._sample_dirichlet(torch.tensor([self.alpha, self.alpha]))[0]) _, H, W = F.get_dimensions(batch) r_x = torch.randint(W, (1, )) r_y = torch.randint(H, (1, )) r = 0.5 * math.sqrt(1.0 - lambda_param) r_w_half = int(r * W) r_h_half = int(r * H) x1 = int(torch.clamp(r_x - r_w_half, min=0)) y1 = int(torch.clamp(r_y - r_h_half, min=0)) x2 = int(torch.clamp(r_x + r_w_half, max=W)) y2 = int(torch.clamp(r_y + r_h_half, max=H)) batch[:, :, y1:y2, x1:x2] = batch_rolled[:, :, y1:y2, x1:x2] lambda_param = float(1.0 - (x2 - x1) * (y2 - y1) / (W * H)) target_rolled.mul_(1.0 - lambda_param) target.mul_(lambda_param).add_(target_rolled) return batch, target
def forward( self, image: Tensor, target: Optional[Dict[str, Tensor]] = None ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: if torch.rand(1) < self.p: image = F.hflip(image) if target is not None: _, _, width = F.get_dimensions(image) target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]] if "masks" in target: target["masks"] = target["masks"].flip(-1) if "keypoints" in target: keypoints = target["keypoints"] keypoints = _flip_coco_person_keypoints(keypoints, width) target["keypoints"] = keypoints return image, target
def forward( self, image: Tensor, target: Optional[Dict[str, Tensor]] = None ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: if isinstance(image, torch.Tensor): if image.ndimension() not in {2, 3}: raise ValueError( f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions." ) elif image.ndimension() == 2: image = image.unsqueeze(0) if torch.rand(1) >= self.p: return image, target _, orig_h, orig_w = F.get_dimensions(image) r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0]) canvas_width = int(orig_w * r) canvas_height = int(orig_h * r) r = torch.rand(2) left = int((canvas_width - orig_w) * r[0]) top = int((canvas_height - orig_h) * r[1]) right = canvas_width - (left + orig_w) bottom = canvas_height - (top + orig_h) if torch.jit.is_scripting(): fill = 0 else: fill = self._get_fill_value(F._is_pil_image(image)) image = F.pad(image, [left, top, right, bottom], fill=fill) if isinstance(image, torch.Tensor): # PyTorch's pad supports only integers on fill. So we need to overwrite the colour v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1) image[..., :top, :] = image[..., :, :left] = image[..., ( top + orig_h):, :] = image[..., :, (left + orig_w):] = v if target is not None: target["boxes"][:, 0::2] += left target["boxes"][:, 1::2] += top return image, target
def forward( self, image: Tensor, target: Optional[Dict[str, Tensor]] = None ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: if isinstance(image, torch.Tensor): if image.ndimension() not in {2, 3}: raise ValueError( f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions." ) elif image.ndimension() == 2: image = image.unsqueeze(0) r = torch.rand(7) if r[0] < self.p: image = self._brightness(image) contrast_before = r[1] < 0.5 if contrast_before: if r[2] < self.p: image = self._contrast(image) if r[3] < self.p: image = self._saturation(image) if r[4] < self.p: image = self._hue(image) if not contrast_before: if r[5] < self.p: image = self._contrast(image) if r[6] < self.p: channels, _, _ = F.get_dimensions(image) permutation = torch.randperm(channels) is_pil = F._is_pil_image(image) if is_pil: image = F.pil_to_tensor(image) image = F.convert_image_dtype(image) image = image[..., permutation, :, :] if is_pil: image = F.to_pil_image(image) return image, target
def forward(self, img, target=None): _, height, width = F.get_dimensions(img) new_height = min(height, self.crop_height) new_width = min(width, self.crop_width) if new_height != height or new_width != width: offset_height = max(height - self.crop_height, 0) offset_width = max(width - self.crop_width, 0) r = torch.rand(1) top = int(offset_height * r) left = int(offset_width * r) img, target = self._crop(img, target, top, left, new_height, new_width) pad_bottom = max(self.crop_height - new_height, 0) pad_right = max(self.crop_width - new_width, 0) if pad_bottom != 0 or pad_right != 0: img, target = self._pad(img, target, [0, 0, pad_right, pad_bottom]) return img, target
def forward( self, image: Tensor, target: Optional[Dict[str, Tensor]] = None ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: if target is None: raise ValueError("The targets can't be None for this transform.") if isinstance(image, torch.Tensor): if image.ndimension() not in {2, 3}: raise ValueError( f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions." ) elif image.ndimension() == 2: image = image.unsqueeze(0) _, orig_h, orig_w = F.get_dimensions(image) while True: # sample an option idx = int(torch.randint(low=0, high=len(self.options), size=(1, ))) min_jaccard_overlap = self.options[idx] if min_jaccard_overlap >= 1.0: # a value larger than 1 encodes the leave as-is option return image, target for _ in range(self.trials): # check the aspect ratio limitations r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2) new_w = int(orig_w * r[0]) new_h = int(orig_h * r[1]) aspect_ratio = new_w / new_h if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio): continue # check for 0 area crops r = torch.rand(2) left = int((orig_w - new_w) * r[0]) top = int((orig_h - new_h) * r[1]) right = left + new_w bottom = top + new_h if left == right or top == bottom: continue # check for any valid boxes with centers within the crop area cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2]) cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3]) is_within_crop_area = (left < cx) & (cx < right) & ( top < cy) & (cy < bottom) if not is_within_crop_area.any(): continue # check at least 1 box with jaccard limitations boxes = target["boxes"][is_within_crop_area] ious = torchvision.ops.boxes.box_iou( boxes, torch.tensor([[left, top, right, bottom]], dtype=boxes.dtype, device=boxes.device)) if ious.max() < min_jaccard_overlap: continue # keep only valid boxes and perform cropping target["boxes"] = boxes target["labels"] = target["labels"][is_within_crop_area] target["boxes"][:, 0::2] -= left target["boxes"][:, 1::2] -= top target["boxes"][:, 0::2].clamp_(min=0, max=new_w) target["boxes"][:, 1::2].clamp_(min=0, max=new_h) image = F.crop(image, top, left, new_h, new_w) return image, target