def _simple_roialign(self, img, box, resolution, aligned=True): """ RoiAlign with scale 1.0 and 0 sample ratio. """ if isinstance(resolution, int): resolution = (resolution, resolution) op = ROIAlign(resolution, 1.0, 0, aligned=aligned) input = torch.from_numpy(img[None, None, :, :].astype("float32")) rois = [0] + list(box) rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32")) output = op.forward(input, rois) if torch.cuda.is_available(): output_cuda = op.forward(input.cuda(), rois.cuda()).cpu() self.assertTrue(torch.allclose(output, output_cuda)) return output[0, 0]
def _simple_roialign_with_grad(self, img, box, resolution, device): if isinstance(resolution, int): resolution = (resolution, resolution) op = ROIAlign(resolution, 1.0, 0, aligned=True) input = torch.from_numpy(img[None, None, :, :].astype("float32")) rois = [0] + list(box) rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32")) input = input.to(device=device) rois = rois.to(device=device) input.requires_grad = True output = op.forward(input, rois) return input, output
def test_empty_batch(self): input = torch.zeros(0, 3, 10, 10, dtype=torch.float32) rois = torch.zeros(0, 5, dtype=torch.float32) op = ROIAlign((7, 7), 1.0, 0, aligned=True) output = op.forward(input, rois) self.assertTrue(output.shape == (0, 3, 7, 7))
def measure_roialign_perf(input_shape, roi_shape, output_size, spatial_scale, sampling_ratio=0, aligned=True): """ Args: input: NCHW images rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. output_size (tuple): h, w spatial_scale (float): scale the input boxes by this number sampling_ratio (int): number of inputs samples to take for each output sample. 0 to take samples densely. aligned (bool): if False, use the legacy implementation in Detectron. If True, align the results more perfectly. """ assert roi_shape[1] == 5, "ERROR: ROI shape expected to be of form (m,5)" # Preparing Inputs n = input_shape[0] b = roi_shape[0] inputbatch = torch.randn(input_shape, dtype=torch.float, requires_grad=True) # creating ROI tensor - shape (b,5) # RoI tensor [:, 1:] contains coordiantes of bounding boxes - xyxy. # (100,1200) range chosen based on COCO max image size. bboxes = torch.FloatTensor(roi_shape[0], 4).uniform_(100,1200) # First column of RoI tensor maps bounding box to image in batch. # Based on my observations, the boxes are ordered by image index in batch, # ie all boxes corresponding to first image first, then for the second # image, third image and so on. boxToNMapping = torch.tensor( np.expand_dims(np.array([i * n // b for i in range(b)]), axis=1), dtype=torch.float) roi = torch.cat((boxToNMapping, bboxes), dim=1) roi.requires_grad=True #print(inputbatch.shape, roi.shape) # Defining Op roi_align = ROIAlign(output_size, spatial_scale, sampling_ratio, aligned) roi_align.cuda() inputbatch = inputbatch.cuda() roi = roi.cuda() # Forward Pass # warmup - 2 iters roi_align.forward(inputbatch, roi) roi_align.forward(inputbatch, roi) torch.cuda.synchronize() start = time.time() for _ in range(ITERATIONS): #output = roi_align.forward(inputbatch.cuda(), roi.cuda()) output = roi_align.forward(inputbatch, roi) torch.cuda.synchronize() end = time.time() fwd_time = (end - start) * 1000 / ITERATIONS # Backward Pass # required hack to call backward() output_sum = output.sum() # warmup output_sum.backward(retain_graph=True) output_sum.backward(retain_graph=True) torch.cuda.synchronize() bwd_start = time.time() for _ in range(ITERATIONS): output_sum.backward(retain_graph=True) torch.cuda.synchronize() bwd_end = time.time() bwd_time = (bwd_end - bwd_start) * 1000 / ITERATIONS return fwd_time, bwd_time