Exemplo n.º 1
0
 def postprocess(self, outputs, images, image_ids, to_cpu):
     frames = []
     for instances, image, image_id in zip(outputs, images, image_ids):
         height, width = image.shape[:2]
         instances = detector_postprocess(instances, height, width)
         type_valid = [
             self.model_meta.thing_classes[pred_class] in TYPE_MAPPING
             for pred_class in instances.pred_classes]
         instances = instances[type_valid]
         instances.pred_classes = torch.as_tensor([
             TYPE_MAPPING[self.model_meta.thing_classes[pred_class]]
             for pred_class in instances.pred_classes])
         if len(instances) > 0:
             nms_mapping = torch.as_tensor([
                 NMS_MAPPING[pred_class.item()]
                 for pred_class in instances.pred_classes],
                 dtype=torch.int, device=self.device)
             nms_types = nms_mapping[:, 0]
             nms_scores = instances.scores + nms_mapping[:, 1]
             keep_indices = batched_nms(
                 instances.pred_boxes.tensor, nms_scores, nms_types,
                 self.nms_threshold)
             instances = instances[keep_indices]
         features = instances.roi_features.mean(dim=(2, 3))
         features = features / features.norm(dim=1, keepdim=True)
         instances.roi_features = features
         if to_cpu:
             instances = instances.to('cpu')
         frame = Frame(image_id, image, instances)
         frames.append(frame)
     return frames
Exemplo n.º 2
0
def ml_nms(boxlist,
           nms_thresh,
           max_proposals=-1,
           score_field="scores",
           label_field="labels"):
    """
    Performs non-maximum suppression on a boxlist, with scores specified
    in a boxlist field via score_field.
    
    Args:
        boxlist (detectron2.structures.Boxes): 
        nms_thresh (float): 
        max_proposals (int): if > 0, then only the top max_proposals are kept
            after non-maximum suppression
        score_field (str): 
    """
    if nms_thresh <= 0:
        return boxlist
    boxes = boxlist.pred_boxes.tensor
    scores = boxlist.scores
    labels = boxlist.pred_classes
    keep = batched_nms(boxes, scores, labels, nms_thresh)
    if max_proposals > 0:
        keep = keep[:max_proposals]
    boxlist = boxlist[keep]
    return boxlist
Exemplo n.º 3
0
    def inference_single_image(self, anchors, box_cls, box_delta, image_size):
        """
        Single-image inference. Return bounding-box detection results by thresholding
        on scores and applying non-maximum suppression (NMS).

        Arguments:
            anchors (list[Boxes]): list of #feature levels. Each entry contains
                a Boxes object, which contains all the anchors in that feature level.
            box_cls (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size (H x W x A, K)
            box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
            image_size (tuple(H, W)): a tuple of the image height and width.

        Returns:
            Same as `inference`, but for only one image.
        """
        boxes_all = []
        scores_all = []
        class_idxs_all = []

        # Iterate over every feature level
        for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta, anchors):
            # (HxWxAxK,)
            box_cls_i = box_cls_i.flatten().sigmoid_()

            # Keep top k top scoring indices only.
            num_topk = min(self.topk_candidates, box_reg_i.size(0))
            # torch.sort is actually faster than .topk (at least on GPUs)
            predicted_prob, topk_idxs = box_cls_i.sort(descending=True)
            predicted_prob = predicted_prob[:num_topk]
            topk_idxs = topk_idxs[:num_topk]

            # filter out the proposals with low confidence score
            keep_idxs = predicted_prob > self.score_threshold
            predicted_prob = predicted_prob[keep_idxs]
            topk_idxs = topk_idxs[keep_idxs]

            anchor_idxs = topk_idxs // self.num_classes
            classes_idxs = topk_idxs % self.num_classes

            box_reg_i = box_reg_i[anchor_idxs]
            anchors_i = anchors_i[anchor_idxs]
            # predict boxes
            predicted_boxes = self.box2box_transform.apply_deltas(box_reg_i, anchors_i.tensor)

            boxes_all.append(predicted_boxes)
            scores_all.append(predicted_prob)
            class_idxs_all.append(classes_idxs)

        boxes_all, scores_all, class_idxs_all = [
            cat(x) for x in [boxes_all, scores_all, class_idxs_all]
        ]
        keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.nms_threshold)
        keep = keep[: self.max_detections_per_image]

        result = Instances(image_size)
        result.pred_boxes = Boxes(boxes_all[keep])
        result.scores = scores_all[keep]
        result.pred_classes = class_idxs_all[keep]
        return result
Exemplo n.º 4
0
    def _get_class_predictions(self, boxes, scores, image_shape):

        num_bbox_reg_classes = boxes.shape[1] // 4

        # Convert to Boxes to use the `clip` function ...
        boxes = Boxes(boxes.reshape(-1, 4))
        boxes.clip(image_shape)
        boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

        # Filter results based on detection scores
        filter_mask = scores > self.class_score_thresh_test

        # R' x 2. First column contains indices of the R predictions;
        # Second column contains indices of classes.
        class_inds = filter_mask.nonzero()
        if num_bbox_reg_classes == 1:
            boxes = boxes[class_inds[:, 0], 0]
        else:
            boxes = boxes[filter_mask]
        scores = scores[filter_mask]

        # Apply per-class NMS
        keep_class = batched_nms(boxes, scores, class_inds[:, 1],
                                 self.class_nms_thresh_test)
        if self.topk_per_image_test >= 0:
            keep_class = keep_class[:self.topk_per_image_test]

        boxes, scores, class_inds = boxes[keep_class], scores[
            keep_class], class_inds[keep_class]

        return boxes, scores, class_inds
Exemplo n.º 5
0
 def test_batched_nms_rotated_0_degree_cuda(self):
     N = 2000
     num_classes = 50
     boxes, scores = self._create_tensors(N)
     idxs = torch.randint(0, num_classes, (N, ))
     rotated_boxes = torch.zeros(N, 5)
     rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
     rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
     rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
     rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
     err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}"
     for iou in [0.2, 0.5, 0.8]:
         backup = boxes.clone()
         keep_ref = batched_nms(boxes.cuda(), scores.cuda(), idxs, iou)
         self.assertTrue(torch.allclose(boxes, backup),
                         "boxes modified by batched_nms")
         backup = rotated_boxes.clone()
         keep = batched_nms_rotated(rotated_boxes.cuda(), scores.cuda(),
                                    idxs, iou)
         self.assertTrue(
             torch.allclose(rotated_boxes, backup),
             "rotated_boxes modified by batched_nms_rotated",
         )
         self.assertLessEqual(nms_edit_distance(keep, keep_ref), 2,
                              err_msg.format(iou))
Exemplo n.º 6
0
    def inference_single_image(
        self,
        anchors: List[Boxes],
        box_cls: List[Tensor],
        box_delta: List[Tensor],
        image_size: Tuple[int, int],
    ):
        """
        Single-image inference. Return bounding-box detection results by thresholding
        on scores and applying non-maximum suppression (NMS).

        Arguments:
            anchors (list[Boxes]): list of #feature levels. Each entry contains
                a Boxes object, which contains all the anchors in that feature level.
            box_cls (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size (H x W x A, K)
            box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
            image_size (tuple(H, W)): a tuple of the image height and width.

        Returns:
            Same as `inference`, but for only one image.
        """
        pred = self._decode_multi_level_predictions(
            anchors,
            box_cls,
            box_delta,
            self.test_score_thresh,
            self.test_topk_candidates,
            image_size,
        )
        keep = batched_nms(  # per-class NMS
            pred.pred_boxes.tensor, pred.scores, pred.pred_classes,
            self.test_nms_thresh)
        return pred[keep[:self.max_detections_per_image]]
Exemplo n.º 7
0
def ml_nms(boxlist,
           nms_thresh,
           max_proposals=-1,
           score_field="scores",
           label_field="labels"):
    """
    Performs non-maximum suppression on a boxlist, with scores specified
    in a boxlist field via score_field.
    Arguments:
        boxlist(BoxList)
        nms_thresh (float)
        max_proposals (int): if > 0, then only the top max_proposals are kept
            after non-maximum suppression
        score_field (str)
    """
    if nms_thresh <= 0:
        return boxlist
    if boxlist.has('pred_boxes'):
        boxes = boxlist.pred_boxes.tensor
        labels = boxlist.pred_classes
    else:
        boxes = boxlist.proposal_boxes.tensor
        labels = boxlist.proposal_boxes.tensor.new_zeros(
            len(boxlist.proposal_boxes.tensor))
    scores = boxlist.scores

    keep = batched_nms(boxes, scores, labels, nms_thresh)
    if max_proposals > 0:
        keep = keep[:max_proposals]
    boxlist = boxlist[keep]
    return boxlist
def fast_rcnn_inference_single_image(boxes,
                                     scores,
                                     image_shape,
                                     score_thresh,
                                     nms_thresh,
                                     topk_per_image,
                                     light=None):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    if type(light) == type(boxes):
        # print(light)
        light = Boxes(light.reshape(-1, 4))
        # light.clip(image_shape)
        light = light.tensor.view(-1, num_bbox_reg_classes, 4)

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
        if type(light) == type(boxes):
            light = light[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
        if type(light) == type(boxes):
            light = light[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    if type(light) == type(boxes):
        light = light[keep]
    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    if type(light) == type(boxes):
        result.pred_light = Boxes(light)
    return result, filter_inds[:, 0]
Exemplo n.º 9
0
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh,
                                     nms_thresh, topk_per_image):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    Tscores = scores
    #print (scores)
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]
    #print (scores)

    # Apply per-class NMS
    uniclass = torch.zeros(len(filter_inds[:, 1].tolist())).cuda()
    #keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    keep = batched_nms(boxes, scores, uniclass, nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]
    #print(filter_inds[:, 0])
    #print(torch.ByteTensor([0,1,0,0,1]))
    #print(filter_inds[:, 1])
    #print(keep)
    #print(Tscores[filter_inds[:, 0]])
    #print (scores)
    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    #result.scores = scores
    result.scores = Tscores[filter_inds[:, 0]]
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
def fast_rcnn_inference_single_image(boxes, scores, attr_scores, image_shape,
                                     score_thresh, nms_thresh, topk_per_image):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    # Make sure boxes and scores don't contain infinite or Nan
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) \
                                                  & torch.isfinite(attr_scores).all(dim=1)

    # Get scores from finite boxes and scores
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]
        attr_scores = attr_scores[valid_mask]

    scores = scores[:, :-1]  # Remove background class?
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # If using Attributes class:
    # attributes = Attributes(attributes.reshape(-1, 295))
    # attributes = attributes.tensor.view(-1, num_bbox_reg_classes, 295)

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()

    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, attr_scores, filter_inds, = boxes[keep], scores[
        keep], attr_scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.attr_scores = attr_scores
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Exemplo n.º 11
0
    def inference_single_image(self, conf_pred_per_image, loc_pred_per_image,
                               default_boxes, image_size):
        """
        Single-image inference. Return bounding-box detection results by thresholding
        on scores and applying non-maximum suppression (NMS).

        Args:
            conf_pred_per_image (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size [Hi x Wi x D, C].
            loc_pred_per_image (list[Tensor]): same shape as 'conf_pred_per_image' except
                that C becomes 4.
            default_boxes (list['Boxes']):  a list of 'Boxes' elements.
                The Boxes contains default boxes of one image on the specific feature level.
            image_size (tuple(H, W)): a tuple of the image height and width.

        Returns:
            Same as `inference`, but for only one image.
        """
        # predict confidence
        conf_pred = torch.cat(conf_pred_per_image, dim=0)  # [R, C]
        conf_pred = conf_pred.softmax(dim=1)

        # predict boxes
        loc_pred = torch.cat(loc_pred_per_image, dim=0)  # [R, 4]
        default_boxes = Boxes.cat(default_boxes)  # [R, 4]
        boxes_pred = self.box2box_transform.apply_deltas(
            loc_pred, default_boxes.tensor)

        num_boxes, num_classes = conf_pred.shape
        boxes_pred = boxes_pred.view(num_boxes, 1,
                                     4).expand(num_boxes, num_classes,
                                               4)  # [R, C, 4]
        labels = torch.arange(num_classes, device=self.device)  # [0, ..., C]
        labels = labels.view(1, num_classes).expand_as(conf_pred)  # [R, C]

        # remove predictions with the background label
        boxes_pred = boxes_pred[:, :-1]
        conf_pred = conf_pred[:, :-1]
        labels = labels[:, :-1]

        # batch everything, by making every class prediction be a separate instance
        boxes_pred = boxes_pred.reshape(-1, 4)
        conf_pred = conf_pred.reshape(-1)
        labels = labels.reshape(-1)

        # remove low scoring boxes
        indices = torch.nonzero(conf_pred > self.score_threshold).squeeze(1)
        boxes_pred, conf_pred, labels = boxes_pred[indices], conf_pred[
            indices], labels[indices]

        keep = batched_nms(boxes_pred, conf_pred, labels, self.nms_threshold)
        keep = keep[:self.max_detections_per_image]

        result = Instances(image_size)
        result.pred_boxes = Boxes(boxes_pred[keep])
        result.scores = conf_pred[keep]
        result.pred_classes = labels[keep]
        return result
Exemplo n.º 12
0
def fast_rcnn_inference_single_image(
    boxes,
    scores,
    image_shape: Tuple[int, int],
    score_thresh: float,
    nms_thresh: float,
    topk_per_image: int,
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)

    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    if len(category_disabler.global_cat_mask) > 0:
        print('<<<<<< category disabler activated >>>>>>')
        scores *= torch.tensor(category_disabler.global_cat_mask).cuda()
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # 1. Filter results based on detection scores. It can make NMS more efficient
    #    by filtering out low-confidence detections.
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # 2. Apply NMS for each class independently.
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Exemplo n.º 13
0
    def inference_single_image(self, locations, box_cls, box_reg, image_size):
        boxes_all = []
        scores_all = []
        class_idxs_all = []

        # Iterate over every feature level
        for box_cls_i, box_reg_i, locs_i in zip(box_cls, box_reg, locations):
            # (HxW, C)
            box_cls_i = box_cls_i.sigmoid_()
            keep_idxs = box_cls_i > self.pre_nms_thresh

            box_cls_i = box_cls_i[keep_idxs]
            keep_idxs_nonzero_i = keep_idxs.nonzero()

            box_loc_i = keep_idxs_nonzero_i[:, 0]
            class_i = keep_idxs_nonzero_i[:, 1]

            box_reg_i = box_reg_i[box_loc_i]
            locs_i = locs_i[box_loc_i]

            per_pre_nms_top_n = keep_idxs.sum().clamp(max=self.pre_nms_top_n)
            if keep_idxs.sum().item() > per_pre_nms_top_n.item():
                box_cls_i, topk_idxs = box_cls_i.topk(per_pre_nms_top_n,
                                                      sorted=False)

                class_i = class_i[topk_idxs]
                box_reg_i = box_reg_i[topk_idxs]
                locs_i = locs_i[topk_idxs]

            # predict boxes
            predicted_boxes = torch.stack([
                locs_i[:, 0] - box_reg_i[:, 0],
                locs_i[:, 1] - box_reg_i[:, 1],
                locs_i[:, 0] + box_reg_i[:, 2],
                locs_i[:, 1] + box_reg_i[:, 3],
            ],
                                          dim=1)
            box_cls_i = torch.sqrt(box_cls_i)

            boxes_all.append(predicted_boxes)
            scores_all.append(box_cls_i)
            class_idxs_all.append(class_i)

        boxes_all, scores_all, class_idxs_all = [
            cat(x) for x in [boxes_all, scores_all, class_idxs_all]
        ]

        # Apply per-class nms for each image
        keep = batched_nms(boxes_all, scores_all, class_idxs_all,
                           self.nms_thresh)
        keep = keep[:self.max_detections_per_image]

        result = Instances(image_size)
        result.pred_boxes = Boxes(boxes_all[keep])
        result.scores = scores_all[keep]
        result.pred_classes = class_idxs_all[keep]

        return result
Exemplo n.º 14
0
def fast_rcnn_inference_single_image(
    boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # 1. Filter results based on detection scores. It can make NMS more efficient
    #    by filtering out low-confidence detections.
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]


    # 2. Apply NMS for each class independently.
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    # DIOU NMS commented for now
    # keep = batched_diou_nms(boxes, scores, filter_inds[:, 1], nms_thresh) \
    #        if global_cfg.MODEL.ROI_BOX_HEAD.NMS_TYPE == "diou_nms" \
    #        else \
    #        batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)

    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Exemplo n.º 15
0
def fast_rcnn_inference_single_image(
    boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image, fc_box_features=None,
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    probs = scores.clone().detach()
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]

    # Compact all fc layers into a single tensor to work nicely with Instance class for now
    if fc_box_features is not None:
        fc_box_features = [fc_layer_box_features[filter_inds[:, 0]] for fc_layer_box_features in fc_box_features]
        # will need to know number of layers and dimensions to unpack
        fc_box_features = torch.cat(fc_box_features, dim=1)
        result.fc_box_features = fc_box_features

    probs = probs[filter_inds[:, 0]]
    result.probs = probs

    return result, filter_inds[:, 0]
Exemplo n.º 16
0
def softmax_fast_rcnn_inference_single_image(
    boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    # print("boxes:", boxes.shape, boxes[0, :4])
    # print("scores:", scores.shape, torch.sum(scores[0,:]))
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1)
    if not valid_mask.all():
        
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    #remove bg from scores
    scores = scores[:, :-1]
    
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4
    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    old_scores = scores[filter_inds[:, 0], :]
    scores = scores[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)

    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, old_scores, scores, filter_inds = boxes[keep], old_scores[keep], scores[keep], filter_inds[keep]
    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = filter_inds[:, 1]
    result.softmax_scores = old_scores
    return result, filter_inds[:, 0]
Exemplo n.º 17
0
    def get_pgt_mist(self,
                     prev_pred_boxes,
                     prev_pred_scores,
                     proposals,
                     top_pro=0.15,
                     suffix=""):
        pgt_scores, pgt_boxes, pgt_classes, pgt_weights = self.get_pgt_top_k(
            prev_pred_boxes,
            prev_pred_scores,
            proposals,
            top_k=top_pro,
            # thres=0.05,
            thres=0.0,
            need_instance=False,
            need_weight=True,
            suffix=suffix,
        )

        # NMS
        pgt_idxs = [torch.zeros_like(pgt_class) for pgt_class in pgt_classes]
        keeps = [
            batched_nms(pgt_box, pgt_score, pgt_class,
                        0.2) for pgt_box, pgt_score, pgt_class in zip(
                            pgt_boxes, pgt_scores, pgt_idxs)
        ]
        pgt_scores = [
            pgt_score[keep] for pgt_score, keep in zip(pgt_scores, keeps)
        ]
        pgt_boxes = [pgt_box[keep] for pgt_box, keep in zip(pgt_boxes, keeps)]
        pgt_classes = [
            pgt_class[keep] for pgt_class, keep in zip(pgt_classes, keeps)
        ]
        pgt_weights = [
            pgt_weight[keep] for pgt_weight, keep in zip(pgt_weights, keeps)
        ]

        pgt_boxes = [Boxes(pgt_box) for pgt_box in pgt_boxes]

        targets = [
            Instances(
                proposals[i].image_size,
                gt_boxes=pgt_box,
                gt_classes=pgt_class,
                gt_scores=pgt_score,
                gt_weights=pgt_weight,
            ) for i, (pgt_box, pgt_class, pgt_score, pgt_weight) in enumerate(
                zip(pgt_boxes, pgt_classes, pgt_scores, pgt_scores))
        ]

        self._vis_pgt(targets, "pgt_mist", suffix)

        return targets
Exemplo n.º 18
0
def measure_nms_perf(boxes_shape, scores_shape, levels_shape, threshold):
    """
    Args:
    """
    assert len(boxes_shape) == 2
    assert len(scores_shape) == 1
    assert len(levels_shape) == 1
    assert boxes_shape[0] == scores_shape[0]
    assert boxes_shape[0] == levels_shape[0]

    # Preparing Inputs
    # (0,1100) range chosen based on boxes observed in runs of detectron.
    boxes = torch.FloatTensor(boxes_shape[0], boxes_shape[1]).uniform_(0, 1100)

    # creating a random distribution between [-0.8, 0.8)
    scores_per_img = 1.6 * torch.rand(scores_shape, dtype=torch.float) - 0.8

    if levels_shape[0] > 8000:
        # max lvl value = 4. First 2000 entries: 0, next 2000 entries: 1, ... : 3, remaining entries: 4
        lvl = torch.tensor(np.array([i / 2000
                                     for i in range(levels_shape[0])]),
                           dtype=torch.long)
    else:
        # overdoing simple things
        lower_bound = levels_shape[0] // 5
        upper_bound = levels_shape[0] // 4
        np_lvl = []
        count = 0
        for lvl in range(4):
            tmp_shape = (random.randint(lower_bound, upper_bound))
            tmp = np.full(tmp_shape, lvl, dtype=int)
            #tmp = np.array([lvl for i in range(random.randint(lower_bound, upper_bound))])
            np_lvl.append(tmp)
            count += len(tmp)

        #np_lvl.append(np.array([4 for _ in range(levels_shape - count)]))
        np_lvl.append(np.full(levels_shape[0] - count, 4, dtype=int))
        lvl = torch.tensor(np.concatenate(np_lvl), dtype=torch.long)

    assert lvl.shape == levels_shape, "ensure lvl shape is correct"

    boxes = boxes.cuda()
    scores_per_img = scores_per_img.cuda()
    lvl = lvl.cuda()

    # Forward Pass
    # warmup - 2 iters
    batched_nms(boxes, scores_per_img, lvl, threshold)
    batched_nms(boxes, scores_per_img, lvl, threshold)

    torch.cuda.synchronize()
    start = time.time()
    for _ in range(ITERATIONS):
        batched_nms(boxes, scores_per_img, lvl, threshold)
    torch.cuda.synchronize()
    end = time.time()
    fwd_time = (end - start) * 1000 / ITERATIONS

    return fwd_time
Exemplo n.º 19
0
    def test_nms_scriptability(self):
        N = 2000
        num_classes = 50
        boxes, scores = self._create_tensors(N)
        idxs = torch.randint(0, num_classes, (N,))
        scripted_batched_nms = torch.jit.script(batched_nms)
        err_msg = "NMS is incompatible with jit-scripted NMS for IoU={}"

        for iou in [0.2, 0.5, 0.8]:
            keep_ref = batched_nms(boxes, scores, idxs, iou)
            backup = boxes.clone()
            scripted_keep = scripted_batched_nms(boxes, scores, idxs, iou)
            assert torch.allclose(boxes, backup), "boxes modified by jit-scripted batched_nms"
            self.assertTrue(torch.equal(keep_ref, scripted_keep), err_msg.format(iou))
Exemplo n.º 20
0
def fast_rcnn_inference_single_image(image_shape,
                                     boxes,
                                     scores,
                                     classes=None,
                                     score_thresh=0.05,
                                     nms_thresh=0.5,
                                     topk_per_image=1000):
    valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(
        dim=1)
    replace_cls = classes is not None
    if not valid_mask.all():
        boxes = boxes[valid_mask]
        scores = scores[valid_mask]

    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Filter results based on detection scores
    filter_mask = scores > score_thresh  # R x K
    # R' x 2. First column contains indices of the R predictions;
    # Second column contains indices of classes.
    filter_inds = filter_mask.nonzero()
    if num_bbox_reg_classes == 1:
        boxes = boxes[filter_inds[:, 0], 0]
    else:
        boxes = boxes[filter_mask]
    scores = scores[filter_mask]

    if replace_cls:
        classes = classes[filter_mask]

    # Apply per-class NMS
    keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    if replace_cls:
        result.pred_classes = classes[keep]
    else:
        result.pred_classes = filter_inds[:, 1]
    return result, filter_inds[:, 0]
Exemplo n.º 21
0
    def inference(self, _box_cls, _box_pred, image_sizes):
        """
        Arguments:
            box_cls (Tensor): tensor of shape   (batch_size, K, H, W).
            box_pred (Tensor): tensors of shape (batch_size, 4, H, W) .
            image_sizes (List[torch.Size]): the input image sizes

        Returns:
            results (List[Instances]): a list of #images elements.
        """
        box_cls = _box_cls.flatten(2)
        box_pred = _box_pred.flatten(2)
        
        assert len(box_cls) == len(image_sizes)
        results = []
        
        scores = torch.sigmoid(box_cls)

        for i, (scores_per_image, box_pred_per_image, image_size) in enumerate(zip(
                scores, box_pred, image_sizes
        )):
            result = Instances(image_size)
            
            # refer to https://github.com/FateScript/CenterNet-better
            topk_score_cat, topk_inds_cat = torch.topk(scores_per_image, k=self.num_boxes)
            topk_score, topk_inds = torch.topk(topk_score_cat.reshape(-1), k=self.num_boxes)
            topk_clses = topk_inds // self.num_boxes
            scores_per_image = topk_score
            labels_per_image = topk_clses
            
            topk_box_cat = box_pred_per_image[:, topk_inds_cat.reshape(-1)]
            topk_box = topk_box_cat[:, topk_inds]
            box_pred_per_image = topk_box.transpose(0, 1)
            
            if self.nms:
                keep = batched_nms(box_pred_per_image, 
                                   scores_per_image, 
                                   labels_per_image, 
                                   0.5)
                box_pred_per_image = box_pred_per_image[keep]
                scores_per_image = scores_per_image[keep]
                labels_per_image = labels_per_image[keep]

            result.pred_boxes = Boxes(box_pred_per_image)
            result.scores = scores_per_image
            result.pred_classes = labels_per_image
            results.append(result)

        return results
Exemplo n.º 22
0
    def inference_single_image(self, logits, init_boxes, refine_boxes,
                               image_size):
        boxes_all = []
        init_boxes_all = []
        class_idxs_all = []
        scores_all = []
        for logit, init_box, refine_box in zip(logits, init_boxes,
                                               refine_boxes):
            scores, cls = logit.sigmoid().max(0)
            cls = cls.view(-1)
            scores = scores.view(-1)
            init_box = init_box.view(4, -1).permute(1, 0)
            refine_box = refine_box.view(4, -1).permute(1, 0)

            predicted_prob, topk_idxs = scores.sort(descending=True)
            num_topk = min(self.topk_candidates, cls.size(0))
            predicted_prob = predicted_prob[:num_topk]
            topk_idxs = topk_idxs[:num_topk]

            # filter out the proposals with low confidence score
            keep_idxs = predicted_prob > self.score_threshold
            predicted_prob = predicted_prob[keep_idxs]
            topk_idxs = topk_idxs[keep_idxs]
            init_box_topk = init_box[topk_idxs]
            refine_box_topk = refine_box[topk_idxs]
            cls_topk = cls[topk_idxs]
            score_topk = scores[topk_idxs]

            boxes_all.append(refine_box_topk)
            init_boxes_all.append(init_box_topk)
            class_idxs_all.append(cls_topk)
            scores_all.append(score_topk)

        boxes_all, scores_all, class_idxs_all, init_boxes_all = [
            cat(x)
            for x in [boxes_all, scores_all, class_idxs_all, init_boxes_all]
        ]
        keep = batched_nms(boxes_all, scores_all, class_idxs_all,
                           self.nms_threshold)
        keep = keep[:self.max_detections_per_image]

        result = Instances(image_size)
        result.pred_boxes = Boxes(boxes_all[keep])
        result.scores = scores_all[keep]
        result.pred_classes = class_idxs_all[keep]
        result.init_boxes = init_boxes_all[keep]
        return result
Exemplo n.º 23
0
    def postprocess_single_image(self, box_cls, box_delta):
        boxes_all = []
        scores_all = []
        class_idxs_all = []

        # Iterate over every feature level
        for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta,
                                                   self.anchors):
            box_cls_i = box_cls_i.flatten()  # (HxWxAxK,)

            # Keep top k top scoring indices only.
            num_topk = min(self.topk_candidates, box_reg_i.size(0))
            # torch.sort is actually faster than .topk (at least on GPUs)
            predicted_prob, topk_idxs = box_cls_i.sort(descending=True)
            predicted_prob = predicted_prob[:num_topk]
            topk_idxs = topk_idxs[:num_topk]

            # filter out the proposals with low confidence score
            keep_idxs = predicted_prob > self.score_threshold
            predicted_prob = predicted_prob[keep_idxs]
            topk_idxs = topk_idxs[keep_idxs]

            anchor_idxs = topk_idxs // self.num_classes
            classes_idxs = topk_idxs % self.num_classes

            box_reg_i = box_reg_i[anchor_idxs]
            anchors_i = anchors_i[anchor_idxs]
            predicted_boxes = self.box2box_transform.apply_deltas(
                box_reg_i, anchors_i)

            boxes_all.append(predicted_boxes)
            scores_all.append(predicted_prob)
            class_idxs_all.append(classes_idxs)

        boxes_all, scores_all, class_idxs_all = [
            cat(x) for x in [boxes_all, scores_all, class_idxs_all]
        ]
        keep = batched_nms(boxes_all, scores_all, class_idxs_all,
                           self.nms_threshold)
        keep = keep[:self.max_detections_per_image]

        result = Instances(self.image_size)
        result.pred_boxes = Boxes(boxes_all[keep])
        result.scores = scores_all[keep]
        result.pred_classes = class_idxs_all[keep]
        return result
Exemplo n.º 24
0
 def inference_single_image(
     self,
     anchors: List[Boxes],
     box_cls: List[torch.Tensor],
     box_delta: List[torch.Tensor],
     image_size: Tuple[int, int],
 ):
     """
     Identical to :meth:`RetinaNet.inference_single_image.
     """
     pred = self._decode_multi_level_predictions(
         anchors,
         box_cls,
         box_delta,
         self.test_score_thresh,
         self.test_topk_candidates,
         image_size,
     )
     keep = batched_nms(pred.pred_boxes.tensor, pred.scores,
                        pred.pred_classes, self.test_nms_thresh)
     return pred[keep[:self.max_detections_per_image]]
Exemplo n.º 25
0
    def select_over_all_levels(self, instances, image_sizes):
        results = []
        for instance in instances:
            # multiclass nms
            keep = batched_nms(instance.proposal_boxes.tensor, instance.objectness_logits, instance.labels.float(), self.nms_thresh)
            instance = instance[keep]
            cls_scores = instance.objectness_logits
            number_of_detections = len(cls_scores)

            # Limit to max_per_image detections **over all classes**
            if number_of_detections > self.fpn_post_nms_top_n > 0:
                image_thresh, _ = torch.kthvalue(
                    cls_scores.cpu(),
                    number_of_detections - self.fpn_post_nms_top_n + 1
                )
                keep = cls_scores >= image_thresh.item()
                keep = torch.nonzero(keep).squeeze(1)
                instance = instance[keep]
            instance.remove("labels")
            results.append(instance)
        return results
Exemplo n.º 26
0
def general_standard_nms_postprocessing(input_im,
                                        outputs,
                                        nms_threshold=0.5,
                                        max_detections_per_image=100):
    """

    Args:
        input_im (list): an input im list generated from dataset handler.
        outputs (list): output list form model specific inference function
        nms_threshold (float): non-maximum suppression threshold
        max_detections_per_image (int): maximum allowed number of detections per image.

    Returns:
        result (Instances): final results after nms

    """
    predicted_boxes, predicted_boxes_covariance, predicted_prob, classes_idxs, predicted_prob_vectors = outputs

    # Perform nms
    keep = batched_nms(predicted_boxes, predicted_prob, classes_idxs,
                       nms_threshold)
    keep = keep[:max_detections_per_image]

    # Keep highest scoring results
    result = Instances(
        (input_im[0]['image'].shape[1], input_im[0]['image'].shape[2]))
    result.pred_boxes = Boxes(predicted_boxes[keep])
    result.scores = predicted_prob[keep]
    result.pred_classes = classes_idxs[keep]
    result.pred_cls_probs = predicted_prob_vectors[keep]

    # Handle case where there is no covariance matrix such as classical
    # inference.
    if isinstance(predicted_boxes_covariance, torch.Tensor):
        result.pred_boxes_covariance = predicted_boxes_covariance[keep]
    else:
        result.pred_boxes_covariance = torch.zeros(
            predicted_boxes[keep].shape + (4, )).to(device)
    return result
Exemplo n.º 27
0
 def test_batched_nms_rotated_0_degree_cpu(self, device="cpu"):
     N = 2000
     num_classes = 50
     boxes, scores = self._create_tensors(N, device=device)
     idxs = torch.randint(0, num_classes, (N,))
     rotated_boxes = torch.zeros(N, 5, device=device)
     rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
     rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
     rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
     rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
     err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}"
     for iou in [0.2, 0.5, 0.8]:
         backup = boxes.clone()
         keep_ref = batched_nms(boxes, scores, idxs, iou)
         assert torch.allclose(boxes, backup), "boxes modified by batched_nms"
         backup = rotated_boxes.clone()
         keep = batched_nms_rotated(rotated_boxes, scores, idxs, iou)
         assert torch.allclose(
             rotated_boxes, backup
         ), "rotated_boxes modified by batched_nms_rotated"
         # Occasionally the gap can be large if there are many IOU on the threshold boundary
         self.assertLessEqual(nms_edit_distance(keep, keep_ref), 5, err_msg.format(iou))
Exemplo n.º 28
0
    def postprocess(self, instances, batched_inputs, image_sizes):
        """
            Rescale the output instances to the target size.
        """
        # note: private function; subject to changes
        processed_results = []
        for results_per_image, input_per_image, image_size in zip(
                instances, batched_inputs, image_sizes):
            boxes = results_per_image.pred_boxes.tensor
            scores = results_per_image.scores
            class_idxs = results_per_image.pred_classes

            # Apply per-class nms for each image
            keep = batched_nms(boxes, scores, class_idxs, self.nms_thresh)
            keep = keep[:self.max_detections_per_image]
            results_per_image = results_per_image[keep]

            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            r = detector_postprocess(results_per_image, height, width)
            processed_results.append({"instances": r})

        return processed_results
Exemplo n.º 29
0
 def test_batched_nms_rotated_0_degree_cpu(self):
     # torch.manual_seed(0)
     N = 2000
     num_classes = 50
     boxes, scores = self._create_tensors(N)
     idxs = torch.randint(0, num_classes, (N, ))
     rotated_boxes = torch.zeros(N, 5)
     rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0
     rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0
     rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
     rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
     err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}"
     for iou in [0.2, 0.5, 0.8]:
         backup = boxes.clone()
         keep_ref = batched_nms(boxes, scores, idxs, iou)
         assert torch.allclose(boxes,
                               backup), "boxes modified by batched_nms"
         backup = rotated_boxes.clone()
         keep = batched_nms_rotated(rotated_boxes, scores, idxs, iou)
         assert torch.allclose(
             rotated_boxes,
             backup), "rotated_boxes modified by batched_nms_rotated"
         assert torch.equal(keep, keep_ref), err_msg.format(iou)
Exemplo n.º 30
0
def merge_branch_instances(instances, num_branch, nms_thrsh, topk_per_image):
    """
    Merge detection results from different branches of TridentNet.
    Return detection results by applying non-maximum suppression (NMS) on bounding boxes
    and keep the unsuppressed boxes and other instances (e.g mask) if any.

    Args:
        instances (list[Instances]): A list of N * num_branch instances that store detection
            results. Contain N images and each image has num_branch instances.
        num_branch (int): Number of branches used for merging detection results for each image.
        nms_thresh (float):  The threshold to use for box non-maximum suppression. Value in [0, 1].
        topk_per_image (int): The number of top scoring detections to return. Set < 0 to return
            all detections.

    Returns:
        results: (list[Instances]): A list of N instances, one for each image in the batch,
            that stores the topk most confidence detections after merging results from multiple
            branches.
    """
    if num_branch == 1:
        return instances

    batch_size = len(instances) // num_branch
    results = []
    for i in range(batch_size):
        instance = Instances.cat(
            [instances[i + batch_size * j] for j in range(num_branch)])

        # Apply per-class NMS
        keep = batched_nms(instance.pred_boxes.tensor, instance.scores,
                           instance.pred_classes, nms_thrsh)
        keep = keep[:topk_per_image]
        result = instance[keep]

        results.append(result)

    return results