Пример #1
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, \
            'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in range(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes),
                                dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes':
                boxes,
                'gt_classes':
                np.zeros((num_boxes, ), dtype=np.int32),
                'gt_overlaps':
                overlaps,
                'flipped':
                False,
                'seg_areas':
                np.zeros((num_boxes, ), dtype=np.float32),
            })
        return roidb
Пример #2
0
def prepare_targets(annos, anchors, topk=9, device=torch.device("cuda:0")):
    anchor_nums = [anchor.shape[0] for anchor in anchors]
    anchors = torch.cat(anchors).to(device)
    total_anchors_num = anchors.shape[0]

    gt_boxes = annos['bboxes'].float().to(device)
    obj_nums = annos['obj_num'].to(device)
    obj_clses = annos['cls'].to(device)
    batch_size = gt_boxes.shape[0]

    # print('prepare_targets', anchors.device, gt_boxes.device)

    anchors_cx = (anchors[:, 2] + anchors[:, 0]) / 2.0
    anchors_cy = (anchors[:, 3] + anchors[:, 1]) / 2.0
    anchor_points = torch.stack((anchors_cx, anchors_cy), dim=1)

    cls_targets = []
    reg_targets = []
    ctness_targets = []
    for i in range(batch_size):
        # print('gt number:', obj_nums[i])
        num_gt = obj_nums[i]
        if num_gt == 0:
            cls_targets.append(
                torch.full((anchors.shape[0], ), -1,
                           dtype=torch.int).to(device))
            reg_targets.append(torch.zeros_like(anchors).to(device))
            ctness_targets.append(
                torch.zeros((anchors.shape[0], ),
                            dtype=torch.float).to(device))
            continue

        bboxes_per_img = gt_boxes[i][:num_gt]
        labels_per_im = obj_clses[i][:num_gt]
        # print('labels_per_im', labels_per_im)

        gt_cx = (bboxes_per_img[:, 2] + bboxes_per_img[:, 0]) / 2.0
        gt_cy = (bboxes_per_img[:, 3] + bboxes_per_img[:, 1]) / 2.0
        gt_points = torch.stack((gt_cx, gt_cy), dim=1)
        # print(gt_points)
        # print(gt_points.shape, anchor_points.shape)
        distances = (anchor_points[:, None, :] -
                     gt_points[None, :, :]).pow(2).sum(-1).sqrt()
        # print('distance', distances.shape)

        candidate_idxs = []
        star_idx = 0
        for anchor_num_per_level in anchor_nums:
            end_idx = star_idx + anchor_num_per_level
            distances_per_level = distances[star_idx:end_idx, :]
            _, topk_idxs_per_level = distances_per_level.topk(topk,
                                                              dim=0,
                                                              largest=False)
            # print('topk_idxs_per_level', topk_idxs_per_level.shape)
            candidate_idxs.append(topk_idxs_per_level + star_idx)
            star_idx = end_idx
        candidate_idxs = torch.cat(candidate_idxs, dim=0)
        # print('candidate_idxs', candidate_idxs)

        ious = bbox_overlaps(anchors.float(), bboxes_per_img.float())
        # print('ious', ious.shape, candidate_idxs.shape)
        candidate_ious = ious[candidate_idxs, torch.arange(num_gt)]
        # print('candidate_ious', candidate_ious)

        iou_mean_per_gt = candidate_ious.mean(0)
        iou_std_per_gt = candidate_ious.std(0)
        iou_thresh_per_gt = iou_mean_per_gt + iou_std_per_gt
        is_pos = candidate_ious >= iou_thresh_per_gt[None, :]
        # print('is_pos', is_pos.shape)

        # Limiting the final positive samples’ center to object
        for ng in range(num_gt):
            candidate_idxs[:, ng] += ng * total_anchors_num
        e_anchors_cx = anchors_cx.view(1, -1).expand(
            num_gt, total_anchors_num).contiguous().view(-1)
        e_anchors_cy = anchors_cy.view(1, -1).expand(
            num_gt, total_anchors_num).contiguous().view(-1)

        candidate_idxs = candidate_idxs.view(-1)
        l = e_anchors_cx[candidate_idxs].view(-1, num_gt) - bboxes_per_img[:,
                                                                           0]
        t = e_anchors_cy[candidate_idxs].view(-1, num_gt) - bboxes_per_img[:,
                                                                           1]
        r = bboxes_per_img[:, 2] - e_anchors_cx[candidate_idxs].view(
            -1, num_gt)
        b = bboxes_per_img[:, 3] - e_anchors_cy[candidate_idxs].view(
            -1, num_gt)
        # print('l,t,r,b', l.shape)
        is_in_gts = torch.stack([l, t, r, b], dim=1).min(dim=1)[0] > 0.01
        # print('is_in_gts', is_in_gts.shape)
        is_pos = is_pos & is_in_gts
        # print('is_pos & is_in_gts', is_pos.shape, is_pos)

        # if an anchor box is assigned to multiple gts, the one with the highest IoU will be selected.
        ious_inf = torch.full_like(ious, -INF).t().contiguous().view(-1)
        index = candidate_idxs.view(-1)[is_pos.view(-1)]
        ious_inf[index] = ious.t().contiguous().view(-1)[index]
        ious_inf = ious_inf.view(num_gt, -1).t()
        # print('ious_inf', ious_inf[0], ious_inf.shape)

        anchors_to_gt_values, anchors_to_gt_indexs = ious_inf.max(dim=1)
        # print('anchors_to_gt_values', anchors_to_gt_values, anchors_to_gt_indexs, anchors_to_gt_values.shape)
        # print('max', ious_inf[0, anchors_to_gt_indexs[0]])
        # print(labels_per_im)
        cls_labels_per_im = labels_per_im[anchors_to_gt_indexs]
        # print('cls_labels_per_im', cls_labels_per_im, len(cls_labels_per_im))
        cls_labels_per_im[anchors_to_gt_values == -INF] = -1
        # print('cls_labels_per_im', cls_labels_per_im)
        matched_gts = bboxes_per_img[anchors_to_gt_indexs]
        # print('matched_gts', bboxes_per_img.shape, matched_gts.shape, cls_labels_per_im.shape)

        reg_targets_per_im = box_encode(matched_gts, anchors)
        cls_targets.append(cls_labels_per_im)
        reg_targets.append(reg_targets_per_im)
        # print('res', cls_labels_per_im.shape, reg_targets_per_im.shape)

        # centerness
        l = anchors_cx - matched_gts[:, 0]
        t = anchors_cy - matched_gts[:, 1]
        r = matched_gts[:, 2] - anchors_cx
        b = matched_gts[:, 3] - anchors_cy
        left_right = torch.stack([l, r], dim=1).abs()
        top_bottom = torch.stack([t, b], dim=1).abs()
        centerness = torch.sqrt((left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * \
                      (top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0]))
        assert not torch.isnan(centerness).any()
        ctness_targets.append(centerness)

    cls_targets = torch.stack(cls_targets, dim=0)
    reg_targets = torch.stack(reg_targets, dim=0)
    ctness_targets = torch.stack(ctness_targets, dim=0)

    return cls_targets, reg_targets, ctness_targets
Пример #3
0
def tpfp_imagenet(det_bboxes,
                  gt_bboxes,
                  gt_bboxes_ignore=None,
                  default_iou_thr=0.5,
                  area_ranges=None):
    """Check if detected bboxes are true positive or false positive.
    Args:
        det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5).
        gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4).
        gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image,
            of shape (k, 4). Default: None
        default_iou_thr (float): IoU threshold to be considered as matched for
            medium and large bboxes (small ones have special rules).
            Default: 0.5.
        area_ranges (list[tuple] | None): Range of bbox areas to be evaluated,
            in the format [(min1, max1), (min2, max2), ...]. Default: None.
    Returns:
        tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of
            each array is (num_scales, m).
    """
    # an indicator of ignored gts
    gt_ignore_inds = np.concatenate((np.zeros(gt_bboxes.shape[0],
                                              dtype=np.bool),
                                     np.ones(gt_bboxes_ignore.shape[0],
                                             dtype=np.bool)))
    # stack gt_bboxes and gt_bboxes_ignore for convenience
    gt_bboxes = np.vstack((gt_bboxes, gt_bboxes_ignore))

    num_dets = det_bboxes.shape[0]
    num_gts = gt_bboxes.shape[0]
    if area_ranges is None:
        area_ranges = [(None, None)]
    num_scales = len(area_ranges)
    # tp and fp are of shape (num_scales, num_gts), each row is tp or fp
    # of a certain scale.
    tp = np.zeros((num_scales, num_dets), dtype=np.float32)
    fp = np.zeros((num_scales, num_dets), dtype=np.float32)
    if gt_bboxes.shape[0] == 0:
        if area_ranges == [(None, None)]:
            fp[...] = 1
        else:
            det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0]) * (
                det_bboxes[:, 3] - det_bboxes[:, 1])
            for i, (min_area, max_area) in enumerate(area_ranges):
                fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1
        return tp, fp
    ious = bbox_overlaps(det_bboxes, gt_bboxes - 1)
    gt_w = gt_bboxes[:, 2] - gt_bboxes[:, 0]
    gt_h = gt_bboxes[:, 3] - gt_bboxes[:, 1]
    iou_thrs = np.minimum((gt_w * gt_h) / ((gt_w + 10.0) * (gt_h + 10.0)),
                          default_iou_thr)
    # sort all detections by scores in descending order
    sort_inds = np.argsort(-det_bboxes[:, -1])
    for k, (min_area, max_area) in enumerate(area_ranges):
        gt_covered = np.zeros(num_gts, dtype=bool)
        # if no area range is specified, gt_area_ignore is all False
        if min_area is None:
            gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool)
        else:
            gt_areas = gt_w * gt_h
            gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area)
        for i in sort_inds:
            max_iou = -1
            matched_gt = -1
            # find best overlapped available gt
            for j in range(num_gts):
                # different from PASCAL VOC: allow finding other gts if the
                # best overlaped ones are already matched by other det bboxes
                if gt_covered[j]:
                    continue
                elif ious[i, j] >= iou_thrs[j] and ious[i, j] > max_iou:
                    max_iou = ious[i, j]
                    matched_gt = j
            # there are 4 cases for a det bbox:
            # 1. it matches a gt, tp = 1, fp = 0
            # 2. it matches an ignored gt, tp = 0, fp = 0
            # 3. it matches no gt and within area range, tp = 0, fp = 1
            # 4. it matches no gt but is beyond area range, tp = 0, fp = 0
            if matched_gt >= 0:
                gt_covered[matched_gt] = 1
                if not (gt_ignore_inds[matched_gt]
                        or gt_area_ignore[matched_gt]):
                    tp[k, i] = 1
            elif min_area is None:
                fp[k, i] = 1
            else:
                bbox = det_bboxes[i, :4]
                area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
                if area >= min_area and area < max_area:
                    fp[k, i] = 1
    return tp, fp
Пример #4
0
def tpfp_default(det_bboxes,
                 gt_bboxes,
                 gt_bboxes_ignore=None,
                 iou_thr=0.5,
                 area_ranges=None):
    """Check if detected bboxes are true positive or false positive.
    Args:
        det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5).
        gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4).
        gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image,
            of shape (k, 4). Default: None
        iou_thr (float): IoU threshold to be considered as matched.
            Default: 0.5.
        area_ranges (list[tuple] | None): Range of bbox areas to be evaluated,
            in the format [(min1, max1), (min2, max2), ...]. Default: None.
    Returns:
        tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of
            each array is (num_scales, m).
    """
    # an indicator of ignored gts
    gt_ignore_inds = np.concatenate((np.zeros(gt_bboxes.shape[0],
                                              dtype=np.bool),
                                     np.ones(gt_bboxes_ignore.shape[0],
                                             dtype=np.bool)))
    # stack gt_bboxes and gt_bboxes_ignore for convenience
    gt_bboxes = np.vstack((gt_bboxes, gt_bboxes_ignore))

    num_dets = det_bboxes.shape[0]
    num_gts = gt_bboxes.shape[0]
    if area_ranges is None:
        area_ranges = [(None, None)]
    num_scales = len(area_ranges)
    # tp and fp are of shape (num_scales, num_gts), each row is tp or fp of
    # a certain scale
    tp = np.zeros((num_scales, num_dets), dtype=np.float32)
    fp = np.zeros((num_scales, num_dets), dtype=np.float32)

    # if there is no gt bboxes in this image, then all det bboxes
    # within area range are false positives
    if gt_bboxes.shape[0] == 0:
        if area_ranges == [(None, None)]:
            fp[...] = 1
        else:
            det_areas = (det_bboxes[:, 2] - det_bboxes[:, 0]) * (
                det_bboxes[:, 3] - det_bboxes[:, 1])
            for i, (min_area, max_area) in enumerate(area_ranges):
                fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1
        return tp, fp

    ious = bbox_overlaps(det_bboxes, gt_bboxes)
    # for each det, the max iou with all gts
    ious_max = ious.max(axis=1)
    # for each det, which gt overlaps most with it
    ious_argmax = ious.argmax(axis=1)
    # sort all dets in descending order by scores
    sort_inds = np.argsort(-det_bboxes[:, -1])
    for k, (min_area, max_area) in enumerate(area_ranges):
        gt_covered = np.zeros(num_gts, dtype=bool)
        # if no area range is specified, gt_area_ignore is all False
        if min_area is None:
            gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool)
        else:
            gt_areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * (gt_bboxes[:, 3] -
                                                              gt_bboxes[:, 1])
            gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area)
        for i in sort_inds:
            if ious_max[i] >= iou_thr:
                matched_gt = ious_argmax[i]
                if not (gt_ignore_inds[matched_gt]
                        or gt_area_ignore[matched_gt]):
                    if not gt_covered[matched_gt]:
                        gt_covered[matched_gt] = True
                        tp[k, i] = 1
                    else:
                        fp[k, i] = 1
                # otherwise ignore this detected bbox, tp = 0, fp = 0
            elif min_area is None:
                fp[k, i] = 1
            else:
                bbox = det_bboxes[i, :4]
                area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
                if area >= min_area and area < max_area:
                    fp[k, i] = 1
    return tp, fp
Пример #5
0
    def evaluate_recall(self,
                        candidate_boxes=None,
                        thresholds=None,
                        area='all',
                        limit=None):
        """Evaluate detection proposal recall metrics.
        Returns:
            results: dictionary of results with keys
                'ar': average recall
                'recalls': vector recalls at each IoU overlap threshold
                'thresholds': vector of IoU overlap thresholds
                'gt_overlaps': vector of all ground-truth overlaps
        """
        # Record max overlap value for each gt box
        # Return vector of overlap values
        areas = {
            'all': 0,
            'small': 1,
            'medium': 2,
            'large': 3,
            '96-128': 4,
            '128-256': 5,
            '256-512': 6,
            '512-inf': 7
        }
        area_ranges = [
            [0**2, 1e5**2],  # all
            [0**2, 32**2],  # small
            [32**2, 96**2],  # medium
            [96**2, 1e5**2],  # large
            [96**2, 128**2],  # 96-128
            [128**2, 256**2],  # 128-256
            [256**2, 512**2],  # 256-512
            [512**2, 1e5**2],  # 512-inf
        ]
        assert areas.has_key(area), 'unknown area range: {}'.format(area)
        area_range = area_ranges[areas[area]]
        gt_overlaps = np.zeros(0)
        num_pos = 0
        for i in range(self.num_images):
            # Checking for max_overlaps == 1 avoids including crowd annotations
            # (...pretty hacking :/)
            max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(
                axis=1)
            gt_inds = np.where((self.roidb[i]['gt_classes'] > 0)
                               & (max_gt_overlaps == 1))[0]
            gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
            gt_areas = self.roidb[i]['seg_areas'][gt_inds]
            valid_gt_inds = np.where((gt_areas >= area_range[0])
                                     & (gt_areas <= area_range[1]))[0]
            gt_boxes = gt_boxes[valid_gt_inds, :]
            num_pos += len(valid_gt_inds)

            if candidate_boxes is None:
                # If candidate_boxes is not supplied, the default is to use the
                # non-ground-truth boxes from this roidb
                non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
                boxes = self.roidb[i]['boxes'][non_gt_inds, :]
            else:
                boxes = candidate_boxes[i]
            if boxes.shape[0] == 0:
                continue
            if limit is not None and boxes.shape[0] > limit:
                boxes = boxes[:limit, :]

            overlaps = bbox_overlaps(boxes.astype(np.float),
                                     gt_boxes.astype(np.float))

            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in range(gt_boxes.shape[0]):
                # find which proposal box maximally covers each gt box
                argmax_overlaps = overlaps.argmax(axis=0)
                # and get the iou amount of coverage for each gt box
                max_overlaps = overlaps.max(axis=0)
                # find which gt box is 'best' covered (i.e. 'best' = most iou)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert (gt_ovr >= 0)
                # find the proposal box that covers the best covered gt box
                box_ind = argmax_overlaps[gt_ind]
                # record the iou coverage of this gt box
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert (_gt_overlaps[j] == gt_ovr)
                # mark the proposal box and the gt box as used
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1
            # append recorded iou coverage level
            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        gt_overlaps = np.sort(gt_overlaps)
        if thresholds is None:
            step = 0.05
            thresholds = np.arange(0.5, 0.95 + 1e-5, step)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        # ar = 2 * np.trapz(recalls, thresholds)
        ar = recalls.mean()
        return {
            'ar': ar,
            'recalls': recalls,
            'thresholds': thresholds,
            'gt_overlaps': gt_overlaps
        }
Пример #6
0
def cal_rpn(imgsize, featuresize, scale, gtboxes):
    """
    计算rpn
    :param imgsize: 输入图像尺寸
    :param featuresize: 特征图尺寸,比如VGG16基础网络,此大小为w/16, h/16
    :param scale: 输入图像与特征图尺寸缩小比例,比如16
    :param gtboxes: (x1, y1, x2, y2)
    :return: [labels, bbox_targets], base_anchor
    """

    imgh, imgw = imgsize

    # gen base anchor 生成基本的anchor,一共9个
    base_anchor = gen_anchor(featuresize, scale)

    # calculate iou 计算anchor和gt-box的overlap,用来给anchor上标签
    # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组
    # overlaps = cal_overlaps(base_anchor, gtboxes)
    overlaps = bbox_overlaps(np.ascontiguousarray(base_anchor, dtype=np.float),
                             np.ascontiguousarray(gtboxes, dtype=np.float))

    # init labels -1 don't care  0 is negative  1 is positive
    labels = np.empty(base_anchor.shape[0])
    labels.fill(-1)  # 初始化label,均为-1

    # for each GT box corresponds to an anchor which has highest IOU
    # 找到和每一个gtbox,overlap最大的那个anchor
    gt_argmax_overlaps = overlaps.argmax(axis=0)

    # the anchor with the highest IOU overlap with a GT box
    # 找到和每一个anchor,overlap最大的那个gtbox
    anchor_argmax_overlaps = overlaps.argmax(axis=1)
    anchor_max_overlaps = overlaps[range(overlaps.shape[0]),
                                   anchor_argmax_overlaps]

    # IOU > IOU_POSITIVE
    labels[anchor_max_overlaps > IOU_POSITIVE] = 1
    # IOU <IOU_NEGATIVE
    labels[anchor_max_overlaps < IOU_NEGATIVE] = 0
    # ensure that every GT box has at least one positive RPN region
    labels[gt_argmax_overlaps] = 1

    # only keep anchors inside the image 仅保留那些还在图像内部的anchor,超出图像的都删掉
    outside_anchor = np.where((base_anchor[:, 0] < 0) | (base_anchor[:, 1] < 0)
                              | (base_anchor[:, 2] >= imgw)
                              | (base_anchor[:, 3] >= imgh))[0]
    labels[outside_anchor] = -1

    # subsample positive labels ,if greater than RPN_POSITIVE_NUM(default 128)
    fg_index = np.where(labels == 1)[0]
    if (len(fg_index) > RPN_POSITIVE_NUM):
        labels[np.random.choice(fg_index,
                                len(fg_index) - RPN_POSITIVE_NUM,
                                replace=False)] = -1

    # subsample negative labels
    bg_index = np.where(labels == 0)[0]
    num_bg = RPN_TOTAL_NUM - np.sum(labels == 1)
    if (len(bg_index) > num_bg):
        labels[np.random.choice(bg_index,
                                len(bg_index) - num_bg,
                                replace=False)] = -1

    # calculate bbox targets
    bbox_targets = bbox_transfrom(base_anchor,
                                  gtboxes[anchor_argmax_overlaps, :])

    return [labels, bbox_targets], base_anchor