Пример #1
0
    def label_and_sample_proposals(self, proposals, targets):
        """
        Prepare some proposals to be used to train the RROI heads.
        It performs box matching between `proposals` and `targets`, and assigns
        training labels to the proposals.
        It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes,
        with a fraction of positives that is no larger than `self.positive_sample_fraction.

        Args:
            See :meth:`StandardROIHeads.forward`

        Returns:
            list[Instances]: length `N` list of `Instances`s containing the proposals
                sampled for training. Each `Instances` has the following fields:
                - proposal_boxes: the rotated proposal boxes
                - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to
                  (this is only meaningful if the proposal has a label > 0; if label = 0
                   then the ground-truth box is random)
                - gt_classes: the ground-truth classification lable for each proposal
        """
        gt_boxes = [x.gt_boxes for x in targets]
        if self.proposal_append_gt:
            proposals = add_ground_truth_to_proposals(gt_boxes, proposals)

        proposals_with_gt = []

        num_fg_samples = []
        num_bg_samples = []
        for proposals_per_image, targets_per_image in zip(proposals, targets):
            has_gt = len(targets_per_image) > 0
            match_quality_matrix = pairwise_iou_rotated(
                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
            )
            matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix)
            sampled_idxs, gt_classes = self._sample_proposals(
                matched_idxs, matched_labels, targets_per_image.gt_classes
            )

            proposals_per_image = proposals_per_image[sampled_idxs]
            proposals_per_image.gt_classes = gt_classes

            if has_gt:
                sampled_targets = matched_idxs[sampled_idxs]
                proposals_per_image.gt_boxes = targets_per_image.gt_boxes[sampled_targets]
            else:
                gt_boxes = RotatedBoxes(
                    targets_per_image.gt_boxes.tensor.new_zeros((len(sampled_idxs), 5))
                )
                proposals_per_image.gt_boxes = gt_boxes

            num_bg_samples.append((gt_classes == self.num_classes).sum().item())
            num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1])
            proposals_with_gt.append(proposals_per_image)

        # Log the number of fg/bg samples that are selected for training ROI heads
        storage = get_event_storage()
        storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples))
        storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples))

        return proposals_with_gt
 def compute_iou_dt_gt(self, dt, gt, is_crowd):
     if self.is_rotated(dt) or self.is_rotated(gt):
         # TODO: take is_crowd into consideration
         assert all(c == 0 for c in is_crowd)
         dt = RotatedBoxes(self.boxlist_to_tensor(dt, output_box_dim=5))
         gt = RotatedBoxes(self.boxlist_to_tensor(gt, output_box_dim=5))
         return pairwise_iou_rotated(dt, gt)
     else:
         # This is the same as the classical COCO evaluation
         return maskUtils.iou(dt, gt, is_crowd)
Пример #3
0
    def _get_ground_truth(self):
        """
        Returns:
            gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the
                total number of anchors in image i (i.e., len(anchors[i])). Label values are
                in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class.
            gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 5).
        """
        gt_objectness_logits = []
        gt_anchor_deltas = []
        # Concatenate anchors from all feature maps into a single RotatedBoxes per image
        anchors = [RotatedBoxes.cat(anchors_i) for anchors_i in self.anchors]
        for image_size_i, anchors_i, gt_boxes_i in zip(self.image_sizes,
                                                       anchors, self.gt_boxes):
            """
            image_size_i: (h, w) for the i-th image
            anchors_i: anchors for i-th image
            gt_boxes_i: ground-truth boxes for i-th image
            """
            # DEBUG
            #assert torch.all(gt_boxes_i.tensor[:,2] > 1e-5)
            #assert torch.all(gt_boxes_i.tensor[:,3] > 1e-5)
            #assert torch.all(anchors_i.tensor[:,2] > 1e-5)
            #assert torch.all(anchors_i.tensor[:,3] > 1e-5)
            match_quality_matrix = pairwise_iou_rotated(gt_boxes_i, anchors_i)
            matched_idxs, gt_objectness_logits_i = self.anchor_matcher(
                match_quality_matrix)

            if self.boundary_threshold >= 0:
                # Discard anchors that go out of the boundaries of the image
                # NOTE: This is legacy functionality that is turned off by default in Detectron2
                anchors_inside_image = anchors_i.inside_box(
                    image_size_i, self.boundary_threshold)
                gt_objectness_logits_i[~anchors_inside_image] = -1

            if len(gt_boxes_i) == 0:
                # These values won't be used anyway since the anchor is labeled as background
                gt_anchor_deltas_i = torch.zeros_like(anchors_i.tensor)
            else:
                # TODO wasted computation for ignored boxes
                matched_gt_boxes = gt_boxes_i[matched_idxs]
                gt_anchor_deltas_i = self.box2box_transform.get_deltas(
                    anchors_i.tensor, matched_gt_boxes.tensor)

            gt_objectness_logits.append(gt_objectness_logits_i)
            gt_anchor_deltas.append(gt_anchor_deltas_i)

        return gt_objectness_logits, gt_anchor_deltas
Пример #4
0
    def _match_and_label_boxes(self, proposals, stage, targets):
        """
        Match proposals with groundtruth using the matcher at the given stage.
        Label the proposals as foreground or background based on the match.

        Args:
            proposals (list[Instances]): One Instances for each image, with
                the field "proposal_boxes".
            stage (int): the current stage
            targets (list[Instances]): the ground truth instances

        Returns:
            list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes"
        """
        num_fg_samples, num_bg_samples = [], []
        for proposals_per_image, targets_per_image in zip(proposals, targets):
            match_quality_matrix = pairwise_iou_rotated(
                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
            )
            # proposal_labels are 0 or 1
            matched_idxs, proposal_labels = self.proposal_matchers[stage](match_quality_matrix)
            if len(targets_per_image) > 0:
                gt_classes = targets_per_image.gt_classes[matched_idxs]
                # Label unmatched proposals (0 label from matcher) as background (label=num_classes)
                gt_classes[proposal_labels == 0] = self.num_classes
                gt_boxes = targets_per_image.gt_boxes[matched_idxs]
            else:
                gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
                gt_boxes = RotatedBoxes(
                    targets_per_image.gt_boxes.tensor.new_zeros((len(proposals_per_image), 4))
                )
            proposals_per_image.gt_classes = gt_classes
            proposals_per_image.gt_boxes = gt_boxes

            num_fg_samples.append((proposal_labels == 1).sum().item())
            num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1])

        # Log the number of fg/bg samples in each stage
        storage = get_event_storage()
        storage.put_scalar(
            "stage{}/roi_head/num_fg_samples".format(stage),
            sum(num_fg_samples) / len(num_fg_samples),
        )
        storage.put_scalar(
            "stage{}/roi_head/num_bg_samples".format(stage),
            sum(num_bg_samples) / len(num_bg_samples),
        )
        return proposals
Пример #5
0
 def computeIoU(self, imgId, catId):
     p = self.params
     if p.useCats:
         gt = self._gts[imgId, catId]
         dt = self._dts[imgId, catId]
     else:
         gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
         dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
     if len(gt) == 0 and len(dt) == 0:
         return []
     inds = np.argsort([-d['score'] for d in dt], kind='mergesort')
     dt = [dt[i] for i in inds]
     if len(dt) > p.maxDets[-1]:
         dt = dt[0:p.maxDets[-1]]
     ious = np.zeros((len(dt), len(gt)))
     for j, g in enumerate(gt):
         for i, d in enumerate(dt):
             # create bounds for ignore regions(double the gt bbox)
             gt_rotated_box = RotatedBoxes(
                 torch.tensor(g['bbox'], dtype=torch.float).view(-1, 5))
             dt_rotated_box = RotatedBoxes(
                 torch.tensor(d['bbox'], dtype=torch.float).view(-1, 5))
             ious[i, j] = pairwise_iou_rotated(gt_rotated_box,
                                               dt_rotated_box)
             del gt_rotated_box, dt_rotated_box
     # if p.iouType == 'segm':
     #     g = [g['segmentation'] for g in gt]
     #     d = [d['segmentation'] for d in dt]
     # elif p.iouType == 'bbox':
     #     g = [g['bbox'] for g in gt]
     #     d = [d['bbox'] for d in dt]
     # else:
     #     raise Exception('unknown iouType for iou computation')
     #
     # # compute iou between each dt and gt region
     # iscrowd = [int(o['iscrowd']) for o in gt]
     # ious = maskUtils.iou(d,g,iscrowd)
     del gt, dt
     return ious
Пример #6
0
    def label_and_sample_proposals(self, proposals, targets):
        """
        Prepare some proposals to be used to train the RROI heads.
        It performs box matching between `proposals` and `targets`, and assign
        training labels to the lproposals.
        It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes,
        with a fraction of positives that is no larger than `self.positive_sample_fraction.

        Args:
            See :meth:`StandardROIHeads.forward`

        Returns:
            list[Instances]: length `N` list of `Instances`s containing the proposals
                sampled for training. Each `Instances` has the following fields:
                - proposal_boxes: the proposal rotated boxes
                - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to
                  (this is only meaningful if the proposal has a label > 0; if label = 0
                   then the ground-truth box is random)
                - other fields such as "gt_classes" and "gt_masks" that are included in `targets`.
        """
        gt_boxes = [x.gt_boxes for x in targets]
        # Augment proposals with ground-truth boxes.
        # In the case of learned proposals (e.g., RPN), in the beginning of training
        # the proposals are of low quality due to random initialization.
        # It's possible that none of these initial
        # proposals have high enough overlap with the gt objects to be used
        # as positive examples for the second stage components (box head,
        # cls head, mask head). Adding the gt boxes to the set of proposals
        # ensures that the second stage components will have some positive
        # examples from the start of training. For RPN, this augmentation improves
        # convergence and empirically improves box AP on COCO by about 0.5
        # points (under one tested configuration).
        proposals = add_ground_truth_to_proposals(gt_boxes, proposals)

        proposals_with_gt = []

        num_fg_samples = []
        num_bg_samples = []
        for proposals_per_image, targets_per_image in zip(proposals, targets):
            has_gt = len(targets_per_image) > 0
            match_quality_matrix = pairwise_iou_rotated(
                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes
            )
            matched_idxs, proposals_labels = self.proposal_matcher(match_quality_matrix)

            # Get the corresponding GT for each proposal
            if has_gt:
                gt_classes = targets_per_image.gt_classes[matched_idxs]
                # Label unmatched proposals (0 label from matcher) as background (label=num_classes)
                gt_classes[proposals_labels == 0] = self.num_classes
                # Label ignore proposals (-1 label)
                gt_classes[proposals_labels == -1] = -1
            else:
                gt_classes = torch.zeros_like(matched_idxs) + self.num_classes

            sampled_fg_inds, sampled_bg_inds = subsample_labels(
                gt_classes,
                self.batch_size_per_image,
                self.positive_sample_fraction,
                self.num_classes,
            )

            sampled_inds = torch.cat([sampled_fg_inds, sampled_bg_inds], dim=0)

            proposals_per_image = proposals_per_image[sampled_inds]
            proposals_per_image.gt_classes = gt_classes[sampled_inds]

            if has_gt:
                sampled_targets = matched_idxs[sampled_inds]
                proposals_per_image.gt_boxes = targets_per_image.gt_boxes[sampled_targets]
            else:
                gt_boxes = RotatedBoxes(
                    targets_per_image.gt_boxes.tensor.new_zeros((len(sampled_inds), 5))
                )
                proposals_per_image.gt_boxes = gt_boxes

            num_fg_samples.append(sampled_fg_inds.numel())
            num_bg_samples.append(sampled_bg_inds.numel())
            proposals_with_gt.append(proposals_per_image)

        # Log the number of fg/bg samples that are selected for training ROI heads
        storage = get_event_storage()
        storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples))
        storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples))

        return proposals_with_gt
Пример #7
0
    def evaluate(self):
        """
        Returns:
            dict: has a key "segm", whose value is a dict of "AP" and "AP50".
        """
        OVTHRES = 0.25 # TODO: make this configurable
        ANGLEMAX = 30

        def load_grasps(path): # TODO: duplicate code, see dataloader
            with open(path) as f:
                for i, line in enumerate(f):
                    # careful: potential mistake in jacquard format description on website, jaw and opening interchanged!
                    xc, yc, a, jaw, opening = [float(v) for v in line[:-1].split(';')]
                    # jaw = h, opening = w according to jacquard paper
                    yield (xc, yc, opening, jaw, -a)

        comm.synchronize()
        if not comm.is_main_process():
            return

        mAP, mPrec, mRec, mAcc = 0, 0, 0, 0
        nTotal = len(self._predictions)
        mTps, mFps = 0,0
        for pred in self._predictions:
            file_name, scores, boxes, classes = pred
            boxes_gt = RotatedBoxes(list(load_grasps(file_name)))

            # init true positives, false positives
            tps, fps = [], []
            # sort by confidence/score
            boxes = boxes[np.argsort(-scores, kind='mergesort')]
            TOP_N = 1
            for j in range(TOP_N):
                box = boxes[j]
                angle = box.tensor.squeeze()[2]
                sector = classes[j]
                ovmax = float('-inf')
                for k in range(len(boxes_gt)):
                    box_gt = boxes_gt[k]
                    angle_gt = box_gt.tensor.squeeze()[2]
                    print(sector*10, angle_gt)
                    
                    # compute iou on GPU
                    iou = pairwise_iou_rotated(box, box_gt) # TODO: assumes len(gts)>len(scores)
                    # get best match
                    max_iou = torch.max(iou)
                    ovmax = max((ovmax, max_iou))
                if ovmax > OVTHRES and abs(angle-angle_gt) <= ANGLEMAX:
                    tps.append(1)
                    fps.append(0)
                    mTps += 1
                else:
                    fps.append(1)
                    tps.append(0)
                    mFps += 1

            # compute precision and recall
            fp = np.cumsum(np.array(fps))
            tp = np.cumsum(np.array(tps))
            rec = tp / np.maximum(TOP_N, torch.finfo(torch.float64).eps) # avoid divide by zero
            #brec = tp / np.maximum(len(boxes_gt), torch.finfo(torch.float64).eps) # avoid divide by zero
            prec = tp / np.maximum(tp + fp, torch.finfo(torch.float64).eps) # avoid divide by zero

            # let pascal voc compute ap
            ap = voc_ap(rec, prec)

            mAP += ap / nTotal

        acc = mTps / (mTps+mFps)
        ret = OrderedDict()
        ret["grasp"] = {"mAP": mAP*100, "mAcc:": acc*100}
        # TODO: add segm

        return ret
Пример #8
0
    def get_ground_truth(self, anchors, targets):
        """
        Args:
            anchors (list[list[Boxes]]): a list of N=#image elements. Each is a
                list of #feature level Boxes. The Boxes contains anchors of
                this image on the specific feature level.
            targets (list[Instances]): a list of N `Instances`s. The i-th
                `Instances` contains the ground-truth per-instance annotations
                for the i-th input image.  Specify `targets` during training only.

        Returns:
            gt_classes (Tensor):
                An integer tensor of shape (N, R) storing ground-truth
                labels for each anchor.
                R is the total number of anchors, i.e. the sum of Hi x Wi x A for all levels.
                Anchors with an IoU with some target higher than the foreground threshold
                are assigned their corresponding label in the [0, K-1] range.
                Anchors whose IoU are below the background threshold are assigned
                the label "K". Anchors whose IoU are between the foreground and background
                thresholds are assigned a label "-1", i.e. ignore.
            gt_anchors_deltas (Tensor):
                Shape (N, R, 4).
                The last dimension represents ground-truth box2box transform
                targets (dx, dy, dw, dh) that map each anchor to its matched ground-truth box.
                The values in the tensor are meaningful only when the corresponding
                anchor is labeled as foreground.
        """
        gt_classes = []
        gt_anchors_deltas = []
        anchors = [RotatedBoxes.cat(anchors_i) for anchors_i in anchors]
        # list[Tensor(R, 4)], one for each image

        for anchors_per_image, targets_per_image in zip(anchors, targets):
            match_quality_matrix = pairwise_iou_rotated(
                targets_per_image.gt_boxes, anchors_per_image)

            # adjust the scores of 'relation' and 'complexes' cases in the matrix

            # gt_matched_idxs, anchor_labels = self.matcher(match_quality_matrix, targets_per_image, anchors_per_image)
            gt_matched_idxs, anchor_labels = self.matcher(match_quality_matrix)
            # ground truth box regression
            matched_gt_boxes = targets_per_image[gt_matched_idxs].gt_boxes

            gt_anchors_reg_deltas_i = self.box2box_transform.get_deltas(
                anchors_per_image.tensor, matched_gt_boxes.tensor)

            # ground truth classes
            has_gt = len(targets_per_image) > 0
            if has_gt:
                gt_classes_i = targets_per_image.gt_classes[gt_matched_idxs]
                # Anchors with label 0 are treated as background.
                gt_classes_i[anchor_labels == 0] = self.num_classes
                # Anchors with label -1 are ignored.
                gt_classes_i[anchor_labels == -1] = -1
            else:
                gt_classes_i = torch.zeros_like(
                    gt_matched_idxs) + self.num_classes

            gt_classes.append(gt_classes_i)
            gt_anchors_deltas.append(gt_anchors_reg_deltas_i)
        del anchors
        return torch.stack(gt_classes), torch.stack(gt_anchors_deltas)
Пример #9
0
    def evaluate(self):
        """
        Returns:
            dict: has a key "segm", whose value is a dict of "AP" and "AP50".
        """
        OVTHRES = 0.25  # TODO: make this configurable
        ANGLEMAX = 30

        comm.synchronize()
        if not comm.is_main_process():
            return

        mAP, mPrec, mRec, mAcc = 0, 0, 0, 0
        nTotal = len(self._predictions)
        mTps, mFps, mTns, mFns = 0, 0, 0, 0
        for pred in self._predictions:
            file_name, neg_file_name, scores, boxes, classes = pred
            boxes_gt = None
            neg_boxes_gt = None
            with open(file_name) as f:
                boxes_gt = RotatedBoxes(list(Grasp.load_grasps_plain(f)))
            with open(neg_file_name) as f:
                neg_boxes_gt = RotatedBoxes(list(Grasp.load_grasps_plain(f)))

            # init true positives, false positives, true negatives, false negatives
            tps, fps, tns, fns = [], [], [], []
            # sort by confidence/score
            boxes = boxes[np.argsort(-scores, kind='mergesort')]
            TOP_N = 1
            for j in range(TOP_N):
                box = boxes[j]
                angle = box.tensor.squeeze()[2]
                class_ = classes[j]
                if class_ == 0:  #grasp
                    ovmax = float('-inf')
                    for k in range(len(boxes_gt)):
                        box_gt = boxes_gt[k]
                        angle_gt = box_gt.tensor.squeeze()[2]
                        #print(sector*10, angle_gt)

                        # compute iou on GPU
                        iou = pairwise_iou_rotated(
                            box, box_gt)  # TODO: assumes len(gts)>len(scores)
                        # get best match
                        max_iou = torch.max(iou)
                        ovmax = max((ovmax, max_iou))
                    if ovmax > OVTHRES and abs(angle - angle_gt) <= ANGLEMAX:
                        tps.append(1)
                        fps.append(0)
                        tns.append(0)
                        fns.append(0)
                        mTps += 1
                    else:
                        tps.append(0)
                        fps.append(1)
                        tns.append(0)
                        fns.append(0)
                        mFps += 1
                else:
                    ovmax = float('-inf')
                    for k in range(len(neg_boxes_gt)):
                        box_gt = neg_boxes_gt[k]
                        angle_gt = box_gt.tensor.squeeze()[2]
                        #print(sector*10, angle_gt)

                        # compute iou on GPU
                        iou = pairwise_iou_rotated(
                            box, box_gt)  # TODO: assumes len(gts)>len(scores)
                        # get best match
                        max_iou = torch.max(iou)
                        ovmax = max((ovmax, max_iou))
                    if ovmax > OVTHRES:  # and abs(angle-angle_gt) <= ANGLEMAX:
                        tps.append(0)
                        fps.append(0)
                        tns.append(1)
                        fns.append(0)
                        mTns += 1
                    else:
                        tps.append(0)
                        fps.append(0)
                        tns.append(0)
                        fns.append(1)
                        mFns += 1

            # compute precision and recall
            fp = np.cumsum(np.array(fps))
            tp = np.cumsum(np.array(tps))
            fn = np.cumsum(np.array(fns))
            rec = tp / np.maximum(
                tp + fn,
                torch.finfo(torch.float64).eps)  # avoid divide by zero
            #brec = tp / np.maximum(len(boxes_gt), torch.finfo(torch.float64).eps) # avoid divide by zero
            prec = tp / np.maximum(
                tp + fp,
                torch.finfo(torch.float64).eps)  # avoid divide by zero

            # let pascal voc compute ap
            ap = voc_ap(rec, prec)

            mAP += ap / nTotal

        acc = (mTps + mTns) / (mTps + mFps + mTns + mFns)
        what = mTps / (mTps + mFps)
        ret = OrderedDict()
        ret["grasp"] = {
            "mAP": mAP * 100,
            "mAcc:": acc * 100,
            "mWhatever": what * 100
        }
        # TODO: add segm

        return ret
Пример #10
0
def _evaluate_rotated_box_proposals(dataset_predictions,
                                    coco_api,
                                    thresholds=None,
                                    area="all",
                                    limit=None):
    """
    Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official COCO API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        "all": 0,
        "small": 1,
        "medium": 2,
        "large": 3,
        "96-128": 4,
        "128-256": 5,
        "256-512": 6,
        "512-inf": 7,
    }
    area_ranges = [
        [0**2, 1e5**2],  # all
        [0**2, 32**2],  # small
        [32**2, 96**2],  # medium
        [96**2, 1e5**2],  # large
        [96**2, 128**2],  # 96-128
        [128**2, 256**2],  # 128-256
        [256**2, 512**2],  # 256-512
        [512**2, 1e5**2],
    ]  # 512-inf
    assert area in areas, "Unknown area range: {}".format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = []
    num_pos = 0

    for prediction_dict in dataset_predictions:
        predictions = prediction_dict["proposals"]

        # sort predictions in descending order
        # TODO maybe remove this and make it explicit in the documentation
        inds = predictions.objectness_logits.sort(descending=True)[1]
        predictions = predictions[inds]

        ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"])
        anno = coco_api.loadAnns(ann_ids)
        gt_boxes = [
            BoxMode.convert(obj["bbox"], BoxMode.XYWHA_ABS, BoxMode.XYWHA_ABS)
            for obj in anno if obj["iscrowd"] == 0
        ]
        gt_boxes = torch.as_tensor(gt_boxes).reshape(
            -1, 5)  # guard against no boxes
        gt_boxes = RotatedBoxes(gt_boxes)
        gt_areas = torch.as_tensor(
            [obj["area"] for obj in anno if obj["iscrowd"] == 0])

        if len(gt_boxes) == 0 or len(predictions) == 0:
            continue

        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <=
                                                       area_range[1])
        gt_boxes = gt_boxes[valid_gt_inds]

        num_pos += len(gt_boxes)

        if len(gt_boxes) == 0:
            continue

        if limit is not None and len(predictions) > limit:
            predictions = predictions[:limit]

        overlaps = pairwise_iou_rotated(predictions.proposal_boxes, gt_boxes)

        _gt_overlaps = torch.zeros(len(gt_boxes))
        for j in range(min(len(predictions), len(gt_boxes))):
            # find which proposal box maximally covers each gt box
            # and get the iou amount of coverage for each gt box
            max_overlaps, argmax_overlaps = overlaps.max(dim=0)

            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ovr, gt_ind = max_overlaps.max(dim=0)
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1

        # append recorded iou coverage level
        gt_overlaps.append(_gt_overlaps)
    gt_overlaps = (torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else
                   torch.zeros(0, dtype=torch.float32))
    gt_overlaps, _ = torch.sort(gt_overlaps)

    if thresholds is None:
        step = 0.05
        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
    recalls = torch.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {
        "ar": ar,
        "recalls": recalls,
        "thresholds": thresholds,
        "gt_overlaps": gt_overlaps,
        "num_pos": num_pos,
    }