Python bbox_overlaps示例，model.rpn.bbox_transform.bbox_overlaps Python示例

示例#1

0

显示文件

    def _score_of_edge(self, v1, v2):
        N1 = v1['boxes'].size(0)
        N2 = v2['boxes'].size(0)
        score = torch.cuda.FloatTensor(N1, N2).fill_(np.nan)
        track_score = torch.cuda.FloatTensor(N1, N2).fill_(np.nan)

        for i1 in range(N1):
            # scores of i1 box in frame i with all boxes in frame i+1
            scores2 = v2['scores'].contiguous().view(-1, 1)
            scores1 = v1['scores'][i1]
            score[i1, :] = scores1 + scores2.t()

        if v1['trackedboxes'] is not None and v2['trackedboxes'] is not None:
            # overlaps between the boxes with tracked_boxes
            # overlaps (N1, N2)
            overlap_ratio_1 = bbox_overlaps(v1['boxes'].contiguous(),
                                            v1['trackedboxes'][0])
            overlap_ratio_2 = bbox_overlaps(v2['boxes'].contiguous(),
                                            v1['trackedboxes'][1])
            track_score = torch.mm(torch.round(overlap_ratio_1),
                                   torch.round(overlap_ratio_2).t())
            score[track_score > 0.] += 1.0
            track_score = (track_score > 0.).float()
        else:
            track_score = torch.cuda.FloatTensor(N1, N2).zero_()
        return score, track_score

示例#2

0

显示文件

    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, \
            'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in range(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes),
                                dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes':
                boxes,
                'gt_classes':
                np.zeros((num_boxes, ), dtype=np.int32),
                'gt_overlaps':
                overlaps,
                'flipped':
                False,
                'seg_areas':
                np.zeros((num_boxes, ), dtype=np.float32),
            })
        return roidb

示例#3

0

显示文件

    def _generate_rel_labels(self, obj_rois, gt_boxes, obj_num, rel_mat):

        obj_pair_rel_label = torch.Tensor(
            self.rel_batch_size).type_as(gt_boxes).zero_().long()
        # generate online data labels
        cur_pair = 0
        for i in range(obj_num.size(0)):
            img_index = i % self.batch_size
            if obj_num[i] <= 1:
                continue
            begin_ind = torch.sum(obj_num[:i])
            overlaps = bbox_overlaps(
                obj_rois[begin_ind:begin_ind + obj_num[i]][:, 1:5],
                gt_boxes[img_index][:, 0:4])
            max_overlaps, max_inds = torch.max(overlaps, 1)
            for o1ind in range(obj_num[i]):
                for o2ind in range(o1ind + 1, obj_num[i]):
                    o1_gt = int(max_inds[o1ind].item())
                    o2_gt = int(max_inds[o2ind].item())
                    if o1_gt == o2_gt:
                        # skip invalid pairs
                        if self._isex:
                            cur_pair += 2
                        else:
                            cur_pair += 1
                        continue
                    # some labels are leaved out when labeling
                    if rel_mat[img_index][o1_gt, o2_gt].item() == 0:
                        if rel_mat[img_index][o2_gt, o1_gt].item() == 3:
                            rel_mat[img_index][
                                o1_gt, o2_gt] = rel_mat[img_index][o2_gt,
                                                                   o1_gt]
                        else:
                            rel_mat[img_index][
                                o1_gt,
                                o2_gt] = 3 - rel_mat[img_index][o2_gt, o1_gt]
                    obj_pair_rel_label[cur_pair] = rel_mat[img_index][o1_gt,
                                                                      o2_gt]

                    cur_pair += 1
                    if self._isex:
                        # some labels are leaved out when labeling
                        if rel_mat[img_index][o2_gt, o1_gt].item() == 0:
                            if rel_mat[img_index][o1_gt, o2_gt].item() == 3:
                                rel_mat[img_index][
                                    o2_gt, o1_gt] = rel_mat[img_index][o1_gt,
                                                                       o2_gt]
                            else:
                                rel_mat[img_index][
                                    o2_gt,
                                    o1_gt] = 3 - rel_mat[img_index][o1_gt,
                                                                    o2_gt]
                        obj_pair_rel_label[cur_pair] = rel_mat[img_index][
                            o2_gt, o1_gt]
                        cur_pair += 1

        return obj_pair_rel_label

示例#4

0

显示文件

文件： net_utils.py 项目： wanght2018/CE7454_2018_Project_Group11

def Precision_Recall(dets, thresh, gt_boxes, num_boxes):
    bbox_list = []
    for i in range(np.minimum(10, dets.shape[0])):
        bbox = [int(np.round(x)) for x in dets[i, :4]]
        score = dets[i, -1]
        if score > thresh:
            bbox_list.append(bbox)
    predict_bbox = torch.Tensor(bbox_list)
    target_boxes = gt_boxes[0].cpu()[:num_boxes, :4]
    TP = 0
    if(target_boxes.size(0) > 0 and predict_bbox.size(0) > 0):
        overlaps = bbox_overlaps(predict_bbox, target_boxes)
        iou, argmax = torch.max(overlaps, 1)
        TP = torch.sum(iou.gt(0.5)).item()

    return predict_bbox.size(0), TP

示例#5

0

显示文件

 def _select_pairs(self, obj_rois, obj_num):
     # in each image, only 2 rois are preserved.
     obj_num = obj_num[:self.batch_size].zero_() + 2
     selected_rois = []
     for im_ind in range(self.batch_size):
         rois = obj_rois[obj_rois[:, 0] == im_ind]
         for _ in range(5):
             selected = rois[np.random.choice(np.arange(rois.shape[0]),
                                              size=2,
                                              replace=False)]
             # check if the selected two boxes are same.
             if bbox_overlaps(selected[0:1][:, 1:5],
                              selected[1:2][:, 1:5]).item() > 0.7:
                 continue
             else:
                 break
         selected_rois.append(selected.clone())
     selected_rois = torch.cat(selected_rois, dim=0)
     return selected_rois, obj_num

示例#6

0

显示文件

    def _generate_rel_labels(self, obj_rois, gt_boxes, obj_num, rel_mat,
                             rel_batch_size):
        if self.using_crf:
            rel_mat = RelaTransform(rel_mat)
        obj_pair_rel_label = torch.Tensor(rel_batch_size).type_as(
            gt_boxes).zero_().long()
        # generate online data labels
        cur_pair = 0
        for i in range(obj_num.size(0)):
            img_index = i % self.batch_size
            if obj_num[i] <= 1:
                continue
            begin_ind = torch.sum(obj_num[:i])
            overlaps = bbox_overlaps(
                obj_rois[begin_ind:begin_ind + obj_num[i]][:, 1:5],
                gt_boxes[img_index][:, 0:4])
            max_overlaps, max_inds = torch.max(overlaps, 1)
            for o1ind in range(obj_num[i]):
                for o2ind in range(o1ind + 1, obj_num[i]):
                    o1_gt = int(max_inds[o1ind].item())
                    o2_gt = int(max_inds[o2ind].item())
                    if o1_gt == o2_gt:
                        # skip invalid pairs
                        if self._isex:
                            cur_pair += 2
                        else:
                            cur_pair += 1
                        continue
                    # some labels are neglected when the dataset was labeled
                    rel_mat[img_index] = self._check_rel_mat(
                        rel_mat[img_index], o1_gt, o2_gt)
                    obj_pair_rel_label[cur_pair] = rel_mat[img_index][o1_gt,
                                                                      o2_gt]
                    cur_pair += 1

                    if self._isex:
                        rel_mat[img_index] = self._check_rel_mat(
                            rel_mat[img_index], o2_gt, o1_gt)
                        obj_pair_rel_label[cur_pair] = rel_mat[img_index][
                            o2_gt, o1_gt]
                        cur_pair += 1

        return obj_pair_rel_label

示例#7

0

显示文件

文件： get_dataset_counts.py 项目： zxyCynthia/graph-rcnn.pytorch

def box_filter(boxes, must_overlap=False):
    """ Only include boxes that overlap as possible relations.
    If no overlapping boxes, use all of them."""
    n_cands = boxes.shape[0]

    overlaps = bbox_overlaps(boxes.astype(np.float), boxes.astype(np.float)) > 0
    np.fill_diagonal(overlaps, 0)

    all_possib = np.ones_like(overlaps, dtype=np.bool)
    np.fill_diagonal(all_possib, 0)

    if must_overlap:
        possible_boxes = np.column_stack(np.where(overlaps))

        if possible_boxes.size == 0:
            possible_boxes = np.column_stack(np.where(all_possib))
    else:
        possible_boxes = np.column_stack(np.where(all_possib))
    return possible_boxes

示例#8

0

显示文件

文件： eval.py 项目： gyq716/my_graph_rcnn

def eval_objects_recall(gt_annot, obj_rois, obj_scores, top_Ns):

    gt_obj_labels = gt_annot[:, 4].contiguous().view(-1, 1)
    gt_obj_rois = gt_annot[:, :4]

    obj_cnt = gt_obj_rois.size(0)
    obj_correct_cnt = torch.zeros(len(top_Ns)).int()

    obj_scores[:, 0].zero_()
    max_obj_scores, max_obj_ind = torch.max(obj_scores, 1)
    obj_scores_final = max_obj_scores
    obj_labels_final = max_obj_ind

    # compute overlaps between gt_obj_rois and pre_obj_rois
    overlaps = bbox_overlaps(obj_rois.contiguous(), gt_obj_rois.contiguous())

    # sort triplet_scores
    _, order = torch.sort(obj_scores_final, 0, True)

    for idx, top_N in enumerate(top_Ns):
        keep_ind = order[:top_N]
        obj_scores_topN = obj_scores_final[keep_ind]
        obj_rois_topN = obj_rois[keep_ind]
        obj_annot_topN = obj_labels_final[keep_ind]

        for k in range(gt_obj_rois.size(0)):
            gt = gt_obj_labels[k]
            gt_box = gt_obj_rois[k]

            valid_index = (overlaps[keep_ind][:, k] > 0.5).nonzero()

            if len(valid_index.size()) == 0:
                continue

            valid_index = valid_index.squeeze()
            for i in range(valid_index.size(0)):
                obj_label = obj_annot_topN[valid_index[i]]
                if gt[0] == obj_label[0]:
                    obj_correct_cnt[idx] += 1
                    break

        return obj_cnt, obj_correct_cnt

示例#9

0

显示文件

def draw(path_img,
         save_path,
         preds,
         gts,
         pred_scores,
         nid,
         self_defined=False):
    image = cv2.imread(path_img, 1)
    height, width, _ = image.shape

    if preds is None and gts is None:
        pass
    elif preds is None:
        for e in gts:

            cv2.rectangle(image, (int(e[0]), int(e[1])),
                          (int(e[2]), int(e[3])), (0, 0, 255))
            cv2.putText(image, "fn",
                        (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
    elif gts is None:
        for e in preds:
            cv2.rectangle(image, (int(e[0]), int(e[1])),
                          (int(e[2]), int(e[3])), (255, 0, 0))
            cv2.putText(image, "fp",
                        (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
    else:

        for e in gts:
            cv2.rectangle(image, (int(e[0]), int(e[1])),
                          (int(e[2]), int(e[3])), (255, 255, 255))
            #cv2.putText(image, "gt", (int(max(2, e[2] - 20)), int(max(2, e[1] + 10))),
            #            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
        #cv2.imwrite(os.path.join(save_path, '{}.png'.format(nid)), image)
        #preds = torch.from_numpy(preds)
        #gts = torch.from_numpy(gts)
        overlap = bbox_overlaps(preds, gts)
        overlap = overlap.numpy()
        if isinstance(preds, torch.Tensor):
            preds = preds.numpy()
        if isinstance(gts, torch.Tensor):
            gts = gts.numpy()
        idx_assigned_gt = overlap.argmax(axis=1)
        confidence = overlap.max(axis=1)
        assigned_gts = gts[idx_assigned_gt]
        flag = np.where(confidence >= iou_thres, 1, 0)
        if np.sum(flag) > 0:
            idx_tp = flag.nonzero()[0]
            tp_boxes = preds[idx_tp].astype(np.int16)
            tp_score = pred_scores[idx_tp]
            #cv2.putText(image, str(tp_score[i]), (int(max(2, e[0] + 20)), int(max(2, e[1] + 20))),
            #        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        else:
            tp_boxes = None
        flag = np.where(confidence < iou_thres, 1, 0)
        if np.sum(flag) > 0:
            idx_fp = flag.nonzero()[0]
            fp_boxes = preds[idx_fp].astype(np.int16)
            fp_score = pred_scores[idx_fp]
        else:
            fp_boxes = None
        tmp = overlap.max(axis=0)
        flag2 = np.where(tmp < iou_thres, 1, 0)
        if np.sum(flag2) > 0:
            idx_fn = flag2.nonzero()[0]
            fn_boxes = gts[idx_fn].astype(np.int16)
        else:
            fn_boxes = None

        non_fn_id = []
        non_fp_id = []
        if self_defined and fn_boxes is not None and fp_boxes is not None:
            max_w = width
            max_h = height
            mask_fp = np.zeros((max_h, max_w), dtype=np.int64)
            mask_fn = np.zeros((max_h, max_w), dtype=np.int64)
            id_fp = np.ones((max_h, max_w), dtype=np.int16) * -1
            id_fn = np.ones((max_h, max_w), dtype=np.int16) * -1
            for idx, e in enumerate(fp_boxes):
                mask_fp[e[1]:e[3], e[0]:e[2]] = 1
                id_fp[e[1]:e[3], e[0]:e[2]] = idx

            for idx, e in enumerate(fn_boxes):
                #mask_fn[e[1]:e[3], e[0]:e[2]] = 1
                fn_area = (e[3] - e[1]) * (e[2] - e[0])
                nids = list(set(id_fp[e[1]:e[3], e[0]:e[2]].reshape(-1)))
                nids = [x for x in nids if x >= 0]
                if len(nids) <= 1:
                    continue
                fp_area = mask_fp[e[1]:e[3], e[0]:e[2]].sum()
                if float(fp_area) / fn_area > 0.5:
                    non_fn_id.append(idx)
                    non_fp_id.extend(nids)
            add_tp_boxes = fp_boxes[list(set(non_fp_id))]
            if add_tp_boxes is not None and tp_boxes is not None:
                tp_boxes = np.concatenate([tp_boxes, add_tp_boxes], axis=0)
            elif add_tp_boxes is not None:
                tp_boxes = add_tp_boxes
            else:
                pass
        if tp_boxes is not None:
            for i, e in enumerate(tp_boxes):
                cv2.rectangle(image, (int(e[0]), int(e[1])),
                              (int(e[2]), int(e[3])), (0, 255, 0))
                cv2.putText(image, "tp",
                            (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        if fp_boxes is not None:
            for i, e in enumerate(fp_boxes):
                if self_defined:
                    if i in non_fp_id:
                        continue
                cv2.rectangle(image, (int(e[0]), int(e[1])),
                              (int(e[2]), int(e[3])), (255, 0, 0))
                cv2.putText(image, "fp",
                            (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
        if fn_boxes is not None:
            for i, e in enumerate(fn_boxes):
                if self_defined:
                    if i in non_fn_id:
                        continue
                cv2.rectangle(image, (int(e[0]), int(e[1])),
                              (int(e[2]), int(e[3])), (0, 0, 255))
                cv2.putText(image, "fn",
                            (int(max(2, e[0] - 10)), int(max(2, e[1] - 10))),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        #num_non_fn = len(set(non_fn_id))
        #num_non_fp = len(set(non_fp_id))

    cv2.imwrite(os.path.join(save_path, '{}.png'.format(nid)), image)

示例#10

0

显示文件

文件： eval.py 项目： gyq716/my_graph_rcnn

def eval_attribute_recall(gt_annot, obj_rois, obj_scores, att_scores, top_Ns):

    gt_obj_labels = gt_annot[:, 4].contiguous().view(-1, 1)
    gt_obj_rois = gt_annot[:, :4]
    gt_atts = gt_annot[:, 5:21]

    obj_scores[:, 0].zero_()
    max_obj_scores, max_obj_ind = torch.max(obj_scores, 1)
    obj_labels_final = max_obj_ind

    att_scores[:, 0].zero_()
    att_scores_sorted, order = torch.sort(att_scores, 1, True)

    # since the maximal number of attributes for each bbox is 16, we trim att_scores_sorted to 16
    att_scores_sorted_trim = att_scores_sorted[:, :16]
    order_trim = order[:, :16]

    # multiply two scores to get the final scores
    att_scores_final = max_obj_scores * att_scores_sorted_trim

    map_x = np.arange(0, att_scores_final.size(1))
    map_y = np.arange(0, att_scores_final.size(0))
    map_x_g, map_y_g = np.meshgrid(map_x, map_y)
    map_yx = torch.from_numpy(
        np.vstack((map_y_g.ravel(), map_x_g.ravel())).transpose()).cuda()

    overlaps = bbox_overlaps(obj_rois.contiguous(), gt_obj_rois.contiguous())

    att_scores_final_v = att_scores_final.view(-1)
    map_yx_v = map_yx.view(-1, 2)

    _, order = torch.sort(att_scores_final_v, 0, True)

    for idx, top_N in enumerate(top_Ns):
        keep_ind = order[:top_N]

        map_yx_v_kept = map_yx_v[keep_ind]

        obj_kept = map_yx_v_kept[keep_ind, 0]
        att_kept = order_trim[map_yx_v_kept[keep_ind, 1]]

        obj_annot_topN = obj_labels_final[obj_kept]

        for k in range(gt_obj_rois.size(0)):
            gt_obj_label = gt_obj_labels[k]
            gt_box = gt_obj_rois[k]
            gt_att_label = gt_atts[k]

            valid_index = (overlaps[obj_kept][:, k] > 0.5).nonzero()

            if len(valid_index.size()) == 0:
                continue

            valid_index = valid_index.squeeze()
            for i in range(valid_index.size(0)):
                obj_label = obj_annot_topN[valid_index[i]]
                att_pos = att_kept[valid_index[i]]
                if gt_obj_label[0] == obj_label[0] and gt_att_label[
                        att_pos] == 1:
                    obj_correct_cnt[idx] += 1
                    break

        return None

示例#11

0

显示文件

文件： eval.py 项目： gyq716/my_graph_rcnn

def eval_relations_recall(gt_annot, obj_rois, obj_scores, rel_inds, rel_scores,
                          top_Ns):

    gt_obj_labels = gt_annot[:, 4].contiguous().view(-1, 1)
    gt_obj_rois = gt_annot[:, :4]
    gt_rels = gt_annot[:, 21:]

    gt_rels_ind = gt_rels.nonzero()

    if len(gt_rels_ind.size()) == 0:
        return 0, torch.zeros(len(top_Ns)).int(), None, None

    gt_rels_view = gt_rels.contiguous().view(-1)

    rel_cnt = gt_rels_ind.size(0)

    rel_correct_cnt = torch.zeros(len(top_Ns)).int()

    gt_pred_labels = gt_rels_view[
        gt_rels_view.nonzero().squeeze()].contiguous().view(-1, 1)

    gt_rel_rois = torch.cat(
        (gt_obj_rois[gt_rels_ind[:, 0]], gt_obj_rois[gt_rels_ind[:, 1]]), 1)
    gt_rel_labels = torch.cat(
        (gt_obj_labels[gt_rels_ind[:, 0]], gt_pred_labels,
         gt_obj_labels[gt_rels_ind[:, 1]]), 1)

    obj_scores[:, 0].zero_()
    max_obj_scores, max_obj_ind = torch.max(obj_scores, 1)

    # find the top-N triplets
    sobj_inds = rel_inds[:, 0]
    oobj_inds = rel_inds[:, 1]

    # pdb.set_trace()
    # perform nms on object rois
    _, order = torch.sort(max_obj_scores, 0, True)
    obj_scores_ordered = max_obj_scores[order]
    obj_rois_ordered = obj_rois[order]
    keep = nms_detections(obj_rois_ordered, obj_scores_ordered)

    notkeep_ind = order.clone().fill_(1)
    notkeep_ind[order[keep.squeeze().long()]] = 0

    notkeep_rels = notkeep_ind[sobj_inds].eq(1) | notkeep_ind[oobj_inds].eq(1)
    # set the

    rel_scores[:, 0].zero_()
    max_rel_scores, max_rel_ind = torch.max(rel_scores, 1)

    rel_scores_final = max_rel_scores * max_obj_scores[
        sobj_inds] * max_obj_scores[oobj_inds]
    rel_scores_final[notkeep_rels] = 0

    rel_rois_final = torch.cat((obj_rois[sobj_inds], obj_rois[oobj_inds]), 1)

    max_obj_ind = max_obj_ind.contiguous().view(-1, 1)
    max_rel_ind = max_rel_ind.contiguous().view(-1, 1)
    rel_annot_final = torch.cat(
        (max_obj_ind[sobj_inds], max_rel_ind, max_obj_ind[oobj_inds]), 1)

    # pdb.set_trace()

    # compute overlaps between gt_sobj and pred_sobj
    overlap_sobjs = bbox_overlaps(rel_rois_final[:, :4].contiguous(),
                                  gt_rel_rois[:, :4].contiguous())
    # compute overlaps between gt_oobj and pred_oobj
    overlap_oobjs = bbox_overlaps(rel_rois_final[:, 4:].contiguous(),
                                  gt_rel_rois[:, 4:].contiguous())

    # sort triplet_scores
    _, order = torch.sort(rel_scores_final, 0, True)

    for idx, top_N in enumerate(top_Ns):
        keep_ind = order[:top_N]
        rel_annot_topN = rel_annot_final[keep_ind]

        for k in range(gt_rel_rois.size(0)):
            gt = gt_rel_labels[k]
            gt_box = gt_rel_rois[k]

            valid_index = (
                ((overlap_sobjs[keep_ind][:, k] > 0.5).int() +
                 (overlap_oobjs[keep_ind][:, k] > 0.5).int()) == 2).nonzero()

            if len(valid_index.size()) == 0:
                continue

            # rel_correct_cnt[idx] += 1
            # continue

            valid_index = valid_index.squeeze()
            for i in range(valid_index.size(0)):
                rel = rel_annot_topN[valid_index[i]]
                if gt[0] == rel[0] and gt[1] == rel[1] and gt[2] == rel[2]:
                    rel_correct_cnt[idx] += 1
                    break

    return rel_cnt, rel_correct_cnt, gt_rel_rois, gt_rel_labels

示例#12

0

显示文件

    def evaluate_recall(self,
                        candidate_boxes=None,
                        thresholds=None,
                        area='all',
                        limit=None):
        """Evaluate detection proposal recall metrics.

    Returns:
        results: dictionary of results with keys
            'ar': average recall
            'recalls': vector recalls at each IoU overlap threshold
            'thresholds': vector of IoU overlap thresholds
            'gt_overlaps': vector of all ground-truth overlaps
    """
        # Record max overlap value for each gt box
        # Return vector of overlap values
        areas = {
            'all': 0,
            'small': 1,
            'medium': 2,
            'large': 3,
            '96-128': 4,
            '128-256': 5,
            '256-512': 6,
            '512-inf': 7
        }
        area_ranges = [
            [0**2, 1e5**2],  # all
            [0**2, 32**2],  # small
            [32**2, 96**2],  # medium
            [96**2, 1e5**2],  # large
            [96**2, 128**2],  # 96-128
            [128**2, 256**2],  # 128-256
            [256**2, 512**2],  # 256-512
            [512**2, 1e5**2],  # 512-inf
        ]
        assert area in areas, 'unknown area range: {}'.format(area)
        area_range = area_ranges[areas[area]]
        gt_overlaps = np.zeros(0)
        num_pos = 0
        for i in range(self.num_images):
            # Checking for max_overlaps == 1 avoids including crowd annotations
            # (...pretty hacking :/)
            max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(
                axis=1)
            gt_inds = np.where((self.roidb[i]['gt_classes'] > 0)
                               & (max_gt_overlaps == 1))[0]
            gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
            gt_areas = self.roidb[i]['seg_areas'][gt_inds]
            valid_gt_inds = np.where((gt_areas >= area_range[0])
                                     & (gt_areas <= area_range[1]))[0]
            gt_boxes = gt_boxes[valid_gt_inds, :]
            num_pos += len(valid_gt_inds)

            if candidate_boxes is None:
                # If candidate_boxes is not supplied, the default is to use the
                # non-ground-truth boxes from this roidb
                non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
                boxes = self.roidb[i]['boxes'][non_gt_inds, :]
            else:
                boxes = candidate_boxes[i]
            if boxes.shape[0] == 0:
                continue
            if limit is not None and boxes.shape[0] > limit:
                boxes = boxes[:limit, :]

            overlaps = bbox_overlaps(boxes.astype(np.float),
                                     gt_boxes.astype(np.float))

            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in range(gt_boxes.shape[0]):
                # find which proposal box maximally covers each gt box
                argmax_overlaps = overlaps.argmax(axis=0)
                # and get the iou amount of coverage for each gt box
                max_overlaps = overlaps.max(axis=0)
                # find which gt box is 'best' covered (i.e. 'best' = most iou)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert (gt_ovr >= 0)
                # find the proposal box that covers the best covered gt box
                box_ind = argmax_overlaps[gt_ind]
                # record the iou coverage of this gt box
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert (_gt_overlaps[j] == gt_ovr)
                # mark the proposal box and the gt box as used
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1
            # append recorded iou coverage level
            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        gt_overlaps = np.sort(gt_overlaps)
        if thresholds is None:
            step = 0.05
            thresholds = np.arange(0.5, 0.95 + 1e-5, step)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        # ar = 2 * np.trapz(recalls, thresholds)
        ar = recalls.mean()
        return {
            'ar': ar,
            'recalls': recalls,
            'thresholds': thresholds,
            'gt_overlaps': gt_overlaps
        }