示例#1
0
文件: imdb.py 项目: khp1993/IoU-Net
  def create_roidb_from_box_list(self, box_list, gt_roidb):
    assert len(box_list) == self.num_images, \
      'Number of boxes must match number of ground-truth images'
    roidb = []
    for i in range(self.num_images):
      boxes = box_list[i]
      num_boxes = boxes.shape[0]
      overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

      if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
        gt_boxes = gt_roidb[i]['boxes']
        gt_classes = gt_roidb[i]['gt_classes']
        gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                    gt_boxes.astype(np.float))
        argmaxes = gt_overlaps.argmax(axis=1)
        maxes = gt_overlaps.max(axis=1)
        I = np.where(maxes > 0)[0]
        overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

      overlaps = scipy.sparse.csr_matrix(overlaps)
      roidb.append({
        'boxes': boxes,
        'gt_classes': np.zeros((num_boxes,), dtype=np.int32),
        'gt_overlaps': overlaps,
        'flipped': False,
        'seg_areas': np.zeros((num_boxes,), dtype=np.float32),
      })
    return roidb
示例#2
0
def score_of_edge(v1, v2, iouth, costtype):
    """
    :param v1: live paths
    :param v2:  frames
    :param iouth:
    :param costtype:
    :return:
    """
    # Number of detections at frame t
    N2 = v2['boxes'].shape[0]
    score = np.zeros((1, N2))
    iou = bbox_overlaps(
        np.ascontiguousarray(v2['boxes'], dtype=np.float),
        np.ascontiguousarray(v1['boxes'][-1].reshape(1, -1), dtype=np.float))
    for i in range(0, N2):
        if iou.item(i) >= iouth:
            scores2 = v2['scores'][i]
            scores1 = v1['scores'][-1]
            # if len(v1['allScores'].shape)<2:
            #    v1['allScores'] = v1['allScores'].reshape(1,-1)
            score_similarity = np.sqrt(
                np.sum(((v1['allScores'][-1, :].reshape(1, -1) -
                         v2['allScores'][i, :].reshape(1, -1))**2)))
            if costtype == 'score':
                score[:, i] = scores2
            elif costtype == 'scrSim':
                score[:, i] = 1.0 - score_similarity
            elif costtype == 'scrMinusSim':
                score[:, i] = scores2 + (1. - score_similarity)
    return score
示例#3
0
def _sample_rois(all_rois, proposals, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    gt_boxes = proposals['gt_boxes']
    gt_labels = proposals['gt_classes']
    gt_scores = proposals['gt_scores']
    overlaps = bbox_overlaps(np.ascontiguousarray(all_rois[0], dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    try:
        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
    except:
        pdb.set_trace()

    labels = gt_labels[gt_assignment, 0]
    cls_loss_weights = gt_scores[gt_assignment, 0]
    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= 0.5)[0]

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where(max_overlaps < 0.5)[0]

    labels[bg_inds] = 0
    real_labels = np.zeros((labels.shape[0], 21))
    for i in range(labels.shape[0]):
        real_labels[i, labels[i]] = 1
    rois = all_rois
    return real_labels, rois, cls_loss_weights
示例#4
0
def _compute_targets(rois, overlaps, labels):
    """Compute bounding-box regression targets for an image."""

    # We are sampling relations from fg rois, hence each
    # fg box must be assigned to an gt box
    assert (cfg.TRAIN.FG_THRESH >= cfg.TRAIN.BBOX_THRESH)

    # Indices of ground-truth ROIs
    gt_inds = np.where(overlaps == 1)[0]

    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return np.zeros((rois.shape[0], 5), dtype=np.float32)
    else:
        # sanity check
        assert (gt_inds[0] == 0)
        for i in range(1, len(gt_inds)):
            assert (gt_inds[i] - gt_inds[i - 1] == 1)
    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = bbox_overlaps(
        np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
        np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)

    # guarding against the case where a gt box doesn't get assigned to itself
    gt_to_ex_inds = [np.where(ex_inds == g)[0][0] for g in gt_inds]
    for i, g in enumerate(gt_to_ex_inds):
        gt_assignment[g] = gt_inds[i]

    # assign rois
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]

    # record target assignments for all foreground rois
    fg_gt_ind_assignment = {}
    for i, e in enumerate(ex_inds):
        if overlaps[e] >= cfg.TRAIN.FG_THRESH:
            fg_gt_ind_assignment[e] = gt_inds[gt_assignment[i]]

    # check if all gt has been assigned
    for g in gt_inds:
        assert (g in list(fg_gt_ind_assignment.values()))

    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    targets[ex_inds, 0] = labels[ex_inds]
    # transfer to center and log
    # targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
    return targets, fg_gt_ind_assignment
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image,
                 num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=fg_rois_per_this_image,
                             replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI)
                       & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=int(bg_rois_per_this_image),
                             replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_this_image:] = 0
    rois = all_rois[keep_inds]

    bbox_target_data = _compute_targets(rois[:, 1:5],
                                        gt_boxes[gt_assignment[keep_inds], :4],
                                        labels)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, bbox_targets, bbox_inside_weights
示例#6
0
    def forward(self, boxes, im_labels, cls_prob_new, proposals):
        eps = 1e-9
        cls_prob_new = cls_prob_new.clamp(eps, 1 - eps)

        num_images, num_classes = im_labels.shape
        assert num_images == 1, 'batch size shoud be equal to 1'
        # overlaps: (rois x gt_boxes)
        gt_boxes = proposals['gt_boxes']
        gt_labels = proposals['gt_classes'].astype(np.long)
        gt_scores = proposals['gt_scores']
        overlaps = bbox_overlaps(
            np.ascontiguousarray(boxes, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
        labels = gt_labels[gt_assignment, 0]
        cls_loss_weights = gt_scores[gt_assignment, 0]

        # Select background RoIs as those with < FG_THRESH overlap
        bg_inds = np.where(max_overlaps < cfg.TRAIN.FG_THRESH)[0]
        labels[bg_inds] = 0
        gt_assignment[bg_inds] = -1

        ig_inds = np.where(max_overlaps < cfg.TRAIN.BG_THRESH)[0]
        cls_loss_weights[ig_inds] = 0.0

        device_id = cls_prob_new.get_device()
        cls_loss_weights = torch.from_numpy(cls_loss_weights)
        labels = torch.from_numpy(labels)
        gt_assignment = torch.from_numpy(gt_assignment)
        gt_labels = torch.from_numpy(gt_labels)
        gt_scores = torch.from_numpy(gt_scores).cuda(device_id)

        loss = torch.tensor(0.).cuda(device_id)

        for i in range(len(gt_boxes)):
            p_mask = torch.where(
                gt_assignment == i,
                torch.ones_like(gt_assignment, dtype=torch.float),
                torch.zeros_like(gt_assignment,
                                 dtype=torch.float)).cuda(device_id)
            p_count = torch.sum(p_mask)
            if p_count > 0:
                mean_prob = torch.sum(
                    cls_prob_new[:, gt_labels[i, 0]] * p_mask) / p_count
                loss = loss - torch.log(mean_prob) * p_count * gt_scores[i, 0]
        n_mask = torch.where(labels == 0, cls_loss_weights,
                             torch.zeros_like(
                                 labels, dtype=torch.float)).cuda(device_id)
        loss = loss - torch.sum(torch.log(cls_prob_new[:, 0]) * n_mask)
        return loss / cls_prob_new.shape[0]
示例#7
0
def _get_proposal_clusters(all_rois, proposals, im_labels, cls_prob):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    num_images, num_classes = im_labels.shape
    assert num_images == 1, 'batch size shoud be equal to 1'
    # overlaps: (rois x gt_boxes)
    gt_boxes = proposals['gt_boxes']
    gt_labels = proposals['gt_classes']
    gt_scores = proposals['gt_scores']
    overlaps = bbox_overlaps(np.ascontiguousarray(all_rois, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_labels[gt_assignment, 0]
    cls_loss_weights = gt_scores[gt_assignment, 0]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]

    # Select background RoIs as those with < FG_THRESH overlap
    bg_inds = np.where(max_overlaps < cfg.TRAIN.FG_THRESH)[0]

    ig_inds = np.where(max_overlaps < cfg.TRAIN.BG_THRESH)[0]
    cls_loss_weights[ig_inds] = 0.0

    labels[bg_inds] = 0
    gt_assignment[bg_inds] = -1

    img_cls_loss_weights = np.zeros(gt_boxes.shape[0], dtype=np.float32)
    pc_probs = np.zeros(gt_boxes.shape[0], dtype=np.float32)
    pc_labels = np.zeros(gt_boxes.shape[0], dtype=np.int32)
    pc_count = np.zeros(gt_boxes.shape[0], dtype=np.int32)

    for i in xrange(gt_boxes.shape[0]):
        po_index = np.where(gt_assignment == i)[0]
        if len(po_index) > 0:
            img_cls_loss_weights[i] = np.sum(cls_loss_weights[po_index])
            pc_labels[i] = gt_labels[i, 0]
            pc_count[i] = len(po_index)
            pc_probs[i] = np.average(cls_prob[po_index, pc_labels[i]])
        else:
            img_cls_loss_weights[i] = 0
            pc_labels[i] = gt_labels[i, 0]
            pc_count[i] = 0
            pc_probs[i] = 0

    return labels, cls_loss_weights, gt_assignment, pc_labels, pc_probs, pc_count, img_cls_loss_weights
示例#8
0
    def _sample_rois(self, all_rois, proposals):

        gt_boxes = proposals['gt_boxes']
        overlaps = bbox_overlaps(
            np.ascontiguousarray(all_rois[0], dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))
        try:
            max_overlaps = overlaps.max(axis=1)
        except:
            pdb.set_trace()

        fg_inds = np.where(max_overlaps >= 0.5)[0]
        # gt_index = np.where(max_overlaps == 1.0)[0]
        # fg_inds = np.array(list(set(fg_inds)-set(gt_index)))
        pos_samples = np.empty((0, 4))
        if fg_inds.shape[0] != 0:
            pos_samples = np.vstack((pos_samples, all_rois[0][fg_inds, :]))

        return pos_samples
示例#9
0
def choose_gt(boxes, cls_prob, im_labels):

    boxes = boxes[..., 1:]
    num_images, num_classes = im_labels.shape
    assert num_images == 1, 'batch size shoud be equal to 1'
    im_labels_tmp = im_labels[0, :]
    gt_boxes = np.zeros((0, 5), dtype=np.float32)

    if 21 == cls_prob.shape[2]:
        cls_prob = cls_prob[:, :, 1:]

    for i in range(num_classes):
        if im_labels_tmp[i] == 1:
            gt_boxes_tmp = np.zeros((1, 5), dtype=np.float32)
            cls_prob_tmp = cls_prob[:, :, i].data
            max_index = np.argmax(cls_prob_tmp)
            gt_boxes_tmp[:, 0:4] = boxes[:, max_index, :].reshape(1, -1)
            gt_boxes_tmp[:, 4] = i + 1
            gt_boxes = np.vstack((gt_boxes, gt_boxes_tmp))

    # choose pos samples by gt
    overlaps = bbox_overlaps(np.ascontiguousarray(boxes[0], dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    max_overlaps = overlaps.max(axis=1)

    fg_inds = np.where(max_overlaps >= 0.5)[0]
    pos_samples = np.empty((0, 4), dtype=np.float32)
    if fg_inds.shape[0] != 0:
        pos_samples = np.vstack((pos_samples, boxes[0][fg_inds, :]))
        pos_samples = np.hstack((np.zeros((pos_samples.shape[0], 1),
                                          dtype=np.float32), pos_samples))
    pos_samples = Variable(torch.from_numpy(np.array([pos_samples])).cuda())

    gt_boxes = np.array([gt_boxes])
    gt_boxes = Variable(torch.from_numpy(gt_boxes))
    if torch.cuda.is_available():
        gt_boxes = gt_boxes.cuda()

    return gt_boxes, pos_samples
示例#10
0
文件: imdb.py 项目: khp1993/IoU-Net
  def evaluate_recall(self, candidate_boxes=None, thresholds=None,
                      area='all', limit=None):
    """Evaluate detection proposal recall metrics.

    Returns:
        results: dictionary of results with keys
            'ar': average recall
            'recalls': vector recalls at each IoU overlap threshold
            'thresholds': vector of IoU overlap thresholds
            'gt_overlaps': vector of all ground-truth overlaps
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {'all': 0, 'small': 1, 'medium': 2, 'large': 3,
             '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
    area_ranges = [[0 ** 2, 1e5 ** 2],  # all
                   [0 ** 2, 32 ** 2],  # small
                   [32 ** 2, 96 ** 2],  # medium
                   [96 ** 2, 1e5 ** 2],  # large
                   [96 ** 2, 128 ** 2],  # 96-128
                   [128 ** 2, 256 ** 2],  # 128-256
                   [256 ** 2, 512 ** 2],  # 256-512
                   [512 ** 2, 1e5 ** 2],  # 512-inf
                   ]
    assert area in areas, 'unknown area range: {}'.format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = np.zeros(0)
    num_pos = 0
    for i in range(self.num_images):
      # Checking for max_overlaps == 1 avoids including crowd annotations
      # (...pretty hacking :/)
      max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
      gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
                         (max_gt_overlaps == 1))[0]
      gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
      gt_areas = self.roidb[i]['seg_areas'][gt_inds]
      valid_gt_inds = np.where((gt_areas >= area_range[0]) &
                               (gt_areas <= area_range[1]))[0]
      gt_boxes = gt_boxes[valid_gt_inds, :]
      num_pos += len(valid_gt_inds)

      if candidate_boxes is None:
        # If candidate_boxes is not supplied, the default is to use the
        # non-ground-truth boxes from this roidb
        non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
        boxes = self.roidb[i]['boxes'][non_gt_inds, :]
      else:
        boxes = candidate_boxes[i]
      if boxes.shape[0] == 0:
        continue
      if limit is not None and boxes.shape[0] > limit:
        boxes = boxes[:limit, :]

      overlaps = bbox_overlaps(boxes.astype(np.float),
                               gt_boxes.astype(np.float))

      _gt_overlaps = np.zeros((gt_boxes.shape[0]))
      for j in range(gt_boxes.shape[0]):
        # find which proposal box maximally covers each gt box
        argmax_overlaps = overlaps.argmax(axis=0)
        # and get the iou amount of coverage for each gt box
        max_overlaps = overlaps.max(axis=0)
        # find which gt box is 'best' covered (i.e. 'best' = most iou)
        gt_ind = max_overlaps.argmax()
        gt_ovr = max_overlaps.max()
        assert (gt_ovr >= 0)
        # find the proposal box that covers the best covered gt box
        box_ind = argmax_overlaps[gt_ind]
        # record the iou coverage of this gt box
        _gt_overlaps[j] = overlaps[box_ind, gt_ind]
        assert (_gt_overlaps[j] == gt_ovr)
        # mark the proposal box and the gt box as used
        overlaps[box_ind, :] = -1
        overlaps[:, gt_ind] = -1
      # append recorded iou coverage level
      gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

    gt_overlaps = np.sort(gt_overlaps)
    if thresholds is None:
      step = 0.05
      thresholds = np.arange(0.5, 0.95 + 1e-5, step)
    recalls = np.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
      recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
            'gt_overlaps': gt_overlaps}
示例#11
0
def anchor_target_layer(rpn_cls_score,
                        gt_boxes,
                        im_info,
                        data,
                        _feat_stride=[
                            16,
                        ],
                        anchor_scales=[4, 8, 16, 32]):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    """
    _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]

    if DEBUG:
        print('anchors:')
        print(_anchors)
        print('anchor shapes:')
        print(
            np.hstack((
                _anchors[:, 2::4] - _anchors[:, 0::4],
                _anchors[:, 3::4] - _anchors[:, 1::4],
            )))
        _counts = cfg.EPS
        _sums = np.zeros((1, 4))
        _squared_sums = np.zeros((1, 4))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0
    # map of shape (..., H, W)
    #height, width = rpn_cls_score.shape[1:3]

    im_info = im_info[0]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate 9 anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the 9 anchors
    # filter out-of-image anchors
    # measure GT overlap

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]

    if DEBUG:
        print('AnchorTargetLayer: height', height, 'width', width)
        print('')
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))
        print('height, width: ({}, {})'.format(height, width))
        print('rpn: gt_boxes.shape', gt_boxes.shape)
        print('rpn: gt_boxes', gt_boxes)

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2)))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    if DEBUG:
        print('total_anchors', total_anchors)
        print('inds_inside', len(inds_inside))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]
    if DEBUG:
        print('anchors.shape', anchors.shape)

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1
        #print "was %s inds, disabling %s, now %s inds" % (
        #len(bg_inds), len(disable_inds), np.sum(labels == 0))

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    if DEBUG:
        _sums += bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts += np.sum(labels == 1)
        means = _sums / _counts
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print('means:')
        print(means)
        print('stdevs:')
        print(stds)

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    if DEBUG:
        print('rpn: max max_overlap', np.max(max_overlaps))
        print('rpn: num_positive', np.sum(labels == 1))
        print('rpn: num_negative', np.sum(labels == 0))
        _fg_sum += np.sum(labels == 1)
        _bg_sum += np.sum(labels == 0)
        _count += 1
        print('rpn: num_positive avg', _fg_sum / _count)
        print('rpn: num_negative avg', _bg_sum / _count)

    # labels
    #pdb.set_trace()
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
    #assert bbox_inside_weights.shape[2] == height
    #assert bbox_inside_weights.shape[3] == width

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
    #assert bbox_outside_weights.shape[2] == height
    #assert bbox_outside_weights.shape[3] == width

    rpn_bbox_outside_weights = bbox_outside_weights

    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
示例#12
0
  def evaluate_recall(self, scale, candidate_boxes=None, thresholds=None, limit=None, target='left'):
    """Evaluate detection proposal recall metrics.

    Returns:
        results: dictionary of results with keys
            'ar': average recall
            'recalls': vector recalls at each IoU overlap threshold
            'thresholds': vector of IoU overlap thresholds
            'gt_overlaps': vector of all ground-truth overlaps
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    gt_overlaps_left = np.zeros(0)
    max_overlaps_inx_left = np.zeros(0)
    gt_overlaps_right = np.zeros(0)
    max_overlaps_inx_right = np.zeros(0)
    num_pos = 0
    for i in range(len(candidate_boxes)):
      # Checking for max_overlaps == 1 avoids including crowd annotations
      # (...pretty hacking :/)
      gt_inds = np.where((self.roidb[i]['boxes_left'][:,3] - self.roidb[i]['boxes_left'][:,1] >= 25) &
                         (self.roidb[i]['occlusion'][:] <= 1) &
                         (self.roidb[i]['truncation'][:] <= 0.3) &
                         (self.roidb[i]['gt_classes'][:] == 1))[0]
      gt_boxes_left = self.roidb[i]['boxes_left'][gt_inds, :]
      gt_boxes_right = self.roidb[i]['boxes_right'][gt_inds, :]
      
      num_pos += len(gt_inds)

      boxes_left = candidate_boxes[i][:,:4]/scale
      boxes_right = candidate_boxes[i][:,4:]/scale

      if boxes_left.shape[0] == 0:
        continue
      if limit is not None and boxes_left.shape[0] > limit:
        boxes_left = boxes_left[:limit, :]
        boxes_right = boxes_right[:limit, :]

      overlaps_left = bbox_overlaps(boxes_left[:,:4].astype(np.float),
                              gt_boxes_left.astype(np.float))
      overlaps_right = bbox_overlaps(boxes_right[:,:4].astype(np.float),
                               gt_boxes_right.astype(np.float))

      # left
      _gt_overlaps_left = np.zeros((gt_boxes_left.shape[0]))
      _max_overlaps_inx_left = np.zeros((gt_boxes_left.shape[0]), dtype=int)
      for j in range(gt_boxes_left.shape[0]):
        # find which proposal box maximally covers each gt box
        argmax_overlaps_left = overlaps_left.argmax(axis=0)
        # and get the iou amount of coverage for each gt box
        max_overlaps_left = overlaps_left.max(axis=0)
        # find which gt box is 'best' covered (i.e. 'best' = most iou)
        gt_ind = max_overlaps_left.argmax()
        gt_ovr = max_overlaps_left.max()
        assert (gt_ovr >= 0)
        # find the proposal box that covers the best covered gt box
        box_ind = argmax_overlaps_left[gt_ind]
        # record the iou coverage of this gt box
        _gt_overlaps_left[j] = overlaps_left[box_ind, gt_ind]
        _max_overlaps_inx_left[j] = box_ind
        assert (_gt_overlaps_left[j] == gt_ovr)
        # mark the proposal box and the gt box as used
        overlaps_left[box_ind, :] = -1
        overlaps_left[:, gt_ind] = -1
      # append recorded iou coverage level
      gt_overlaps_left = np.hstack((gt_overlaps_left, _gt_overlaps_left))
      max_overlaps_inx_left = np.hstack((max_overlaps_inx_left, _max_overlaps_inx_left))

      # right
      _gt_overlaps_right = np.zeros((gt_boxes_right.shape[0]))
      _max_overlaps_inx_right = np.zeros((gt_boxes_right.shape[0]), dtype=int)
      for j in range(gt_boxes_right.shape[0]):
        # find which proposal box maximally covers each gt box
        argmax_overlaps_right = overlaps_right.argmax(axis=0)
        # and get the iou amount of coverage for each gt box
        max_overlaps_right = overlaps_right.max(axis=0)
        # find which gt box is 'best' covered (i.e. 'best' = most iou)
        gt_ind = max_overlaps_right.argmax()
        gt_ovr = max_overlaps_right.max()
        assert (gt_ovr >= 0)
        # find the proposal box that covers the best covered gt box
        box_ind = argmax_overlaps_right[gt_ind]
        # record the iou coverage of this gt box
        _gt_overlaps_right[j] = overlaps_right[box_ind, gt_ind]
        _max_overlaps_inx_right[j] = box_ind
        assert (_gt_overlaps_right[j] == gt_ovr)
        # mark the proposal box and the gt box as used
        overlaps_right[box_ind, :] = -1
        overlaps_right[:, gt_ind] = -1
      # append recorded iou coverage level
      gt_overlaps_right = np.hstack((gt_overlaps_right, _gt_overlaps_right))
      max_overlaps_inx_right = np.hstack((max_overlaps_inx_right, _max_overlaps_inx_right))

    #gt_overlaps_left = np.sort(gt_overlaps_left)
    if thresholds is None:
      step = 0.05
      thresholds = np.arange(0.1, 0.95 + 1e-5, step)
    
    recalls_left = np.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
      recalls_left[i] = (gt_overlaps_left >= t).sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar_left = recalls_left.mean()

    recalls_right = np.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
      recalls_right[i] = (gt_overlaps_right >= t).sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar_right = recalls_right.mean()

    recalls_stereo = np.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
      recalls_stereo[i] = ((gt_overlaps_left >= t)&(gt_overlaps_right >= t)&(max_overlaps_inx_right >= max_overlaps_inx_left)).sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar_stereo = recalls_stereo.mean()

    return {'ar_left': ar_left, 'recalls_left': recalls_left,\
            'ar_right': ar_right, 'recalls_right': recalls_right,\
            'ar_stereo': ar_stereo, 'recalls_stereo': recalls_stereo, 'thresholds': thresholds}