def _simple_roialign(self, img, box, resolution, aligned=True):
        """
        RoiAlign with scale 1.0 and 0 sample ratio.
        """
        if isinstance(resolution, int):
            resolution = (resolution, resolution)
        op = ROIAlign(resolution, 1.0, 0, aligned=aligned)
        input = torch.from_numpy(img[None, None, :, :].astype("float32"))

        rois = [0] + list(box)
        rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
        output = op.forward(input, rois)
        if torch.cuda.is_available():
            output_cuda = op.forward(input.cuda(), rois.cuda()).cpu()
            self.assertTrue(torch.allclose(output, output_cuda))
        return output[0, 0]
Exemplo n.º 2
0
    def _simple_roialign_with_grad(self, img, box, resolution, device):
        if isinstance(resolution, int):
            resolution = (resolution, resolution)

        op = ROIAlign(resolution, 1.0, 0, aligned=True)
        input = torch.from_numpy(img[None, None, :, :].astype("float32"))

        rois = [0] + list(box)
        rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32"))
        input = input.to(device=device)
        rois = rois.to(device=device)
        input.requires_grad = True
        output = op.forward(input, rois)
        return input, output
Exemplo n.º 3
0
 def test_empty_batch(self):
     input = torch.zeros(0, 3, 10, 10, dtype=torch.float32)
     rois = torch.zeros(0, 5, dtype=torch.float32)
     op = ROIAlign((7, 7), 1.0, 0, aligned=True)
     output = op.forward(input, rois)
     self.assertTrue(output.shape == (0, 3, 7, 7))
def measure_roialign_perf(input_shape, roi_shape, output_size, spatial_scale,
                          sampling_ratio=0, aligned=True):
    """
    Args:
        input: NCHW images
        rois: Bx5 boxes. First column is the index into N. The other 4 columns
            are xyxy.
        output_size (tuple): h, w
        spatial_scale (float): scale the input boxes by this number
        sampling_ratio (int): number of inputs samples to take for each output
            sample. 0 to take samples densely.
        aligned (bool): if False, use the legacy implementation in Detectron.
            If True, align the results more perfectly.
    """
    assert roi_shape[1] == 5, "ERROR: ROI shape expected to be of form (m,5)"
    
    # Preparing Inputs
    n = input_shape[0]
    b = roi_shape[0]
    inputbatch = torch.randn(input_shape, dtype=torch.float, requires_grad=True)
    # creating ROI tensor - shape (b,5)
    # RoI tensor [:, 1:] contains coordiantes of bounding boxes - xyxy.
    # (100,1200) range chosen based on COCO max image size.
    bboxes = torch.FloatTensor(roi_shape[0], 4).uniform_(100,1200)
    # First column of RoI tensor maps bounding box to image in batch.
    # Based on my observations, the boxes are ordered by image index in batch,
    # ie all boxes corresponding to first image first, then for the second
    # image, third image and so on.
    boxToNMapping = torch.tensor(
        np.expand_dims(np.array([i * n // b for i in range(b)]), axis=1),
        dtype=torch.float)
    roi = torch.cat((boxToNMapping, bboxes), dim=1)
    roi.requires_grad=True
    #print(inputbatch.shape, roi.shape)

    # Defining Op
    roi_align = ROIAlign(output_size, spatial_scale, sampling_ratio, aligned)
    
    roi_align.cuda()    
    inputbatch = inputbatch.cuda()
    roi = roi.cuda()

    # Forward Pass
    # warmup - 2 iters
    roi_align.forward(inputbatch, roi)
    roi_align.forward(inputbatch, roi)
    
    torch.cuda.synchronize()
    start = time.time()
    for _ in range(ITERATIONS):
        #output = roi_align.forward(inputbatch.cuda(), roi.cuda()) 
        output = roi_align.forward(inputbatch, roi) 
    torch.cuda.synchronize()
    end = time.time()       
    fwd_time = (end - start) * 1000 / ITERATIONS

    # Backward Pass
    # required hack to call backward()
    output_sum = output.sum()
    # warmup
    output_sum.backward(retain_graph=True)
    output_sum.backward(retain_graph=True)

    torch.cuda.synchronize()
    bwd_start = time.time()
    for _ in range(ITERATIONS):
        output_sum.backward(retain_graph=True)
    torch.cuda.synchronize()
    bwd_end = time.time()       
    bwd_time = (bwd_end - bwd_start) * 1000 / ITERATIONS
    
    return fwd_time, bwd_time