def _simple_roialign_with_grad(self, img, box, resolution, device): if isinstance(resolution, int): resolution = (resolution, resolution) op = ROIAlign(resolution, 1.0, 0, aligned=True) input = torch.from_numpy(img[None, None, :, :].astype("float32")) rois = [0] + list(box) rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32")) input = input.to(device=device) rois = rois.to(device=device) input.requires_grad = True output = op.forward(input, rois) return input, output
def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: """ Crop each bitmask by the given box, and resize results to (mask_size, mask_size). This can be used to prepare training targets for Mask R-CNN. It has less reconstruction error compared to rasterization with polygons. However we observe no difference in accuracy, but BitMasks requires more memory to store all the masks. Args: boxes (Tensor): Nx4 tensor storing the boxes for each mask mask_size (int): the size of the rasterized mask. Returns: Tensor: A bool tensor of shape (N, mask_size, mask_size), where N is the number of predicted boxes for this image. """ assert len(boxes) == len(self), "{} != {}".format( len(boxes), len(self)) device = self.tensor.device batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[:, None] rois = torch.cat([batch_inds, boxes], dim=1) # Nx5 bit_masks = self.tensor.to(dtype=torch.float32) rois = rois.to(device=device) output = (ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True).forward(bit_masks[:, None, :, :], rois).squeeze(1)) output = output >= 0.5 return output
def _simple_roialign(self, img, box, resolution, aligned=True): """ RoiAlign with scale 1.0 and 0 sample ratio. """ if isinstance(resolution, int): resolution = (resolution, resolution) op = ROIAlign(resolution, 1.0, 0, aligned=aligned) input = torch.from_numpy(img[None, None, :, :].astype("float32")) rois = [0] + list(box) rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32")) output = op.forward(input, rois) if torch.cuda.is_available(): output_cuda = op.forward(input.cuda(), rois.cuda()).cpu() self.assertTrue(torch.allclose(output, output_cuda)) return output[0, 0]
def test_roi_align_rotated_gradient_cuda(self): """ Compute gradients for ROIAlignRotated with multiple bounding boxes on the GPU, and compare the result with ROIAlign """ # torch.manual_seed(123) dtype = torch.float64 device = torch.device("cuda") pool_h, pool_w = (5, 5) roi_align = ROIAlign(output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(device=device) roi_align_rotated = ROIAlignRotated(output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to(device=device) x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True) # x_rotated = x.clone() won't work (will lead to grad_fun=CloneBackward)! x_rotated = Variable(x.data.clone(), requires_grad=True) # roi_rotated format is (batch index, x_center, y_center, width, height, angle) rois_rotated = torch.tensor( [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]], dtype=dtype, device=device, ) y_rotated = roi_align_rotated(x_rotated, rois_rotated) s_rotated = y_rotated.sum() s_rotated.backward() # roi format is (batch index, x1, y1, x2, y2) rois = torch.tensor( [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9]], dtype=dtype, device=device) y = roi_align(x, rois) s = y.sum() s.backward() assert torch.allclose( x.grad, x_rotated.grad ), "gradients for ROIAlign and ROIAlignRotated mismatch on CUDA"
def test_empty_batch(self): input = torch.zeros(0, 3, 10, 10, dtype=torch.float32) rois = torch.zeros(0, 5, dtype=torch.float32) op = ROIAlign((7, 7), 1.0, 0, aligned=True) output = op.forward(input, rois) self.assertTrue(output.shape == (0, 3, 7, 7))