示例#1
0
    def _simple_roialign_with_grad(self, img, box, resolution, device):
        if isinstance(resolution, int):
            resolution = (resolution, resolution)

        op = ROIAlign(resolution, 1.0, 0, aligned=True)
        input = torch.from_numpy(img[None, None, :, :].astype("float32"))

        rois = [0] + list(box)
        rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
        input = input.to(device=device)
        rois = rois.to(device=device)
        input.requires_grad = True
        output = op.forward(input, rois)
        return input, output
示例#2
0
    def crop_and_resize(self, boxes: torch.Tensor,
                        mask_size: int) -> torch.Tensor:
        """
        Crop each bitmask by the given box, and resize results to (mask_size, mask_size).
        This can be used to prepare training targets for Mask R-CNN.
        It has less reconstruction error compared to rasterization with polygons.
        However we observe no difference in accuracy,
        but BitMasks requires more memory to store all the masks.

        Args:
            boxes (Tensor): Nx4 tensor storing the boxes for each mask
            mask_size (int): the size of the rasterized mask.

        Returns:
            Tensor:
                A bool tensor of shape (N, mask_size, mask_size), where
                N is the number of predicted boxes for this image.
        """
        assert len(boxes) == len(self), "{} != {}".format(
            len(boxes), len(self))
        device = self.tensor.device

        batch_inds = torch.arange(len(boxes),
                                  device=device).to(dtype=boxes.dtype)[:, None]
        rois = torch.cat([batch_inds, boxes], dim=1)  # Nx5

        bit_masks = self.tensor.to(dtype=torch.float32)
        rois = rois.to(device=device)
        output = (ROIAlign((mask_size, mask_size), 1.0, 0,
                           aligned=True).forward(bit_masks[:, None, :, :],
                                                 rois).squeeze(1))
        output = output >= 0.5
        return output
示例#3
0
    def _simple_roialign(self, img, box, resolution, aligned=True):
        """
        RoiAlign with scale 1.0 and 0 sample ratio.
        """
        if isinstance(resolution, int):
            resolution = (resolution, resolution)
        op = ROIAlign(resolution, 1.0, 0, aligned=aligned)
        input = torch.from_numpy(img[None, None, :, :].astype("float32"))

        rois = [0] + list(box)
        rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
        output = op.forward(input, rois)
        if torch.cuda.is_available():
            output_cuda = op.forward(input.cuda(), rois.cuda()).cpu()
            self.assertTrue(torch.allclose(output, output_cuda))
        return output[0, 0]
示例#4
0
    def test_roi_align_rotated_gradient_cuda(self):
        """
        Compute gradients for ROIAlignRotated with multiple bounding boxes on the GPU,
        and compare the result with ROIAlign
        """
        # torch.manual_seed(123)
        dtype = torch.float64
        device = torch.device("cuda")
        pool_h, pool_w = (5, 5)

        roi_align = ROIAlign(output_size=(pool_h, pool_w),
                             spatial_scale=1,
                             sampling_ratio=2).to(device=device)

        roi_align_rotated = ROIAlignRotated(output_size=(pool_h, pool_w),
                                            spatial_scale=1,
                                            sampling_ratio=2).to(device=device)

        x = torch.rand(1,
                       1,
                       10,
                       10,
                       dtype=dtype,
                       device=device,
                       requires_grad=True)
        # x_rotated = x.clone() won't work (will lead to grad_fun=CloneBackward)!
        x_rotated = Variable(x.data.clone(), requires_grad=True)

        # roi_rotated format is (batch index, x_center, y_center, width, height, angle)
        rois_rotated = torch.tensor(
            [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]],
            dtype=dtype,
            device=device,
        )

        y_rotated = roi_align_rotated(x_rotated, rois_rotated)
        s_rotated = y_rotated.sum()
        s_rotated.backward()

        # roi format is (batch index, x1, y1, x2, y2)
        rois = torch.tensor(
            [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9]],
            dtype=dtype,
            device=device)

        y = roi_align(x, rois)
        s = y.sum()
        s.backward()

        assert torch.allclose(
            x.grad, x_rotated.grad
        ), "gradients for ROIAlign and ROIAlignRotated mismatch on CUDA"
示例#5
0
 def test_empty_batch(self):
     input = torch.zeros(0, 3, 10, 10, dtype=torch.float32)
     rois = torch.zeros(0, 5, dtype=torch.float32)
     op = ROIAlign((7, 7), 1.0, 0, aligned=True)
     output = op.forward(input, rois)
     self.assertTrue(output.shape == (0, 3, 7, 7))