Exemplo n.º 1
0
def _compute_targets(rois, overlaps, labels):
    """
    Compute bounding-box regression targets for an image.
    for each roi find the corresponding gt_box, then compute the distance.
    """
    # Indices of ground-truth ROIs
    gt_inds = np.where(overlaps == 1)[0]
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return np.zeros((rois.shape[0], 5), dtype=np.float32)
    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = bbox_overlaps(
        np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
        np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]

    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    targets[ex_inds, 0] = labels[ex_inds]
    targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
    return targets
Exemplo n.º 2
0
def evaluate_signal_proposal(p_bbox_list, g_bbox_list, thredshold):
    """
    计算单分类的网络性能
    :param g_bbox_list: groud truth shape: [n,4]
    :param p_bbox_list: predicted shape: [n,4] [left_top_x, left_top_y, right_bottom_x, right_bottom_y]
    :return
    """
    overlaps = bbox_overlaps(np.ascontiguousarray(p_bbox_list, dtype=np.float),
                             np.ascontiguousarray(g_bbox_list, dtype=np.float))
    # precision
    max_p_overlaps = np.max(overlaps, axis=1)
    # print(max_p_overlaps)
    filter = np.where(max_p_overlaps >= thredshold)
    # print(filter[0])

    precision_TP = len(filter[0])
    precision = float(precision_TP) / float(len(p_bbox_list))
    # print(precision, float(precision_TP), float(len(p_bbox_list)))

    # recall
    filted_overlaps = overlaps[filter[0]]
    # print(filted_overlaps)
    max_index = np.argmax(filted_overlaps, axis=1)
    # print(max_index.shape)

    recall = float(max_index.shape[0]) / float(len(g_bbox_list))

    return precision, recall
Exemplo n.º 3
0
 def report(self, bbox):
     """tracker calls this function to report the result.
     bbox should be in the form of (x, y, w, h)"""
     gt = self._ground_truth[self._cur - 1]
     # Since result bbox is reported once a time, therefore takes the first one
     overlap = bbox_overlaps([gt], [bbox])[0]
     self._overlaps[self._video_name].append(overlap)
Exemplo n.º 4
0
def _compute_targets(rois, overlaps, labels):
    """
    Compute bounding-box regression targets for an image.
    for each roi find the corresponding gt_box, then compute the distance.
    """
    # Indices of ground-truth ROIs
    gt_inds = np.where(overlaps == 1)[0]
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return np.zeros((rois.shape[0], 5), dtype=np.float32)
    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = bbox_overlaps(
        np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
        np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]

    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    targets[ex_inds, 0] = labels[ex_inds]
    targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
    return targets
Exemplo n.º 5
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, \
                'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in range(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes),
                                dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes':
                boxes,
                'gt_classes':
                np.zeros((num_boxes, ), dtype=np.int32),
                'gt_overlaps':
                overlaps,
                'flipped':
                False,
                'seg_areas':
                np.zeros((num_boxes, ), dtype=np.float32),
            })
        return roidb
Exemplo n.º 6
0
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.FLAGS.roi_fg_threshold)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.FLAGS.roi_bg_threshold_high) &
                       (max_overlaps >= cfg.FLAGS.roi_bg_threshold_low))[0]
    print('max_overlaps', max_overlaps)
    print('fg_inds', fg_inds)
    print('bg_inds', bg_inds)

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.size > 0 and bg_inds.size > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
        fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        to_replace = bg_inds.size < bg_rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
    elif fg_inds.size > 0:
        to_replace = fg_inds.size < rois_per_image
        fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = rois_per_image
    elif bg_inds.size > 0:
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
        fg_rois_per_image = 0
    else:
        import pdb
        pdb.set_trace()

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_image):] = 0
    rois = all_rois[keep_inds]
    roi_scores = all_scores[keep_inds]

    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
Exemplo n.º 7
0
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
  """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
  # overlaps: (rois x gt_boxes)
  overlaps = bbox_overlaps(
    all_rois[:, 1:5].data,
    gt_boxes[:, :4].data)
  max_overlaps, gt_assignment = overlaps.max(1)
  labels = gt_boxes[gt_assignment, [4]]

  # Select foreground RoIs as those with >= FG_THRESH overlap
  fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1)
  # Guard against the case when an image has fewer than fg_rois_per_image
  # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
  bg_inds = (((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO)) == 2).nonzero().view(-1)

  # Small modification to the original version where we ensure a fixed number of regions are sampled
  if fg_inds.numel() > 0 and bg_inds.numel() > 0:
    fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel())
    fg_inds = fg_inds[torch.from_numpy(
      npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()]
    bg_rois_per_image = rois_per_image - fg_rois_per_image
    to_replace = bg_inds.numel() < bg_rois_per_image
    bg_inds = bg_inds[torch.from_numpy(
      npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()]
  elif fg_inds.numel() > 0:
    to_replace = fg_inds.numel() < rois_per_image
    fg_inds = fg_inds[torch.from_numpy(
      npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
    fg_rois_per_image = rois_per_image
  elif bg_inds.numel() > 0:
    to_replace = bg_inds.numel() < rois_per_image
    bg_inds = bg_inds[torch.from_numpy(
      npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
    fg_rois_per_image = 0
  else:
    import pdb
    pdb.set_trace()

  # The indices that we're selecting (both fg and bg)
  keep_inds = torch.cat([fg_inds, bg_inds], 0)
  # Select sampled values from various arrays:
  labels = labels[keep_inds].contiguous()
  # Clamp labels for the background RoIs to 0
  labels[int(fg_rois_per_image):] = 0
  rois = all_rois[keep_inds].contiguous()
  roi_scores = all_scores[keep_inds].contiguous()

  bbox_target_data = _compute_targets(
    rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data)

  bbox_targets, bbox_inside_weights = \
    _get_bbox_regression_labels(bbox_target_data, num_classes)

  return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
Exemplo n.º 8
0
def uniform_aspect_sample(im, bbox, params, num, stype):
    assert len(bbox) == 4, "Invalid ground-truth(x, y, w, h) form."
    assert bbox[2] > 0 and bbox[3] > 0, "Width or height < 0."
    assert len(params) == 5, "Invalid {:d}-tuple params(should be five-tuple).".format(len(params))
    assert num > 0, "Number of samples should be larger than 0."

    im_shape = im.shape
    im_w = im_shape[1]
    im_h = im_shape[0]

    # Calculate average of width and height
    centerx = bbox[0] + bbox[2] / 2.
    centery = bbox[1] + bbox[3] / 2.

    xrand = params[0] * bbox[2] * (npr.rand(num, 1) * 2 - 1)
    yrand = params[1] * bbox[3] * (npr.rand(num, 1) * 2 - 1)
    wrand = bbox[2] * (1.05 ** (npr.rand(num, 1) * 4 - 2))
    hrand = bbox[3] * (1.05 ** (npr.rand(num, 1) * 4 - 2))
    ws = wrand * (1.05 ** npr.rand(num, 1))
    hs = hrand * (1.05 ** npr.rand(num, 1))

    bboxes = []
    for i in range(num):
        cx = centerx + xrand[i, 0]
        cy = centery + yrand[i, 0]
        hw = ws[i, 0] / 2.
        hh = hs[i, 0] / 2.
        box = (
            max(0, int(cx - hw)),
            max(0, int(cy - hh)),
            min(im_w, int(cx + hw)),
            min(im_h, int(cy + hh))
        )
        sample = (box[0], box[1], box[2] - box[0], box[3] - box[1])
        if int(sample[2]) <= 0 or int(sample[3]) <= 0:
            continue
        overlap = bbox_overlaps([bbox], [sample])[0]
        if overlap > params[3]:
            bboxes.append({
                'img': im,
                'box': sample,
                'label': 1,
                'overlap': overlap
            })
        elif overlap < params[4]:
            bboxes.append({
                'img': im,
                'box': sample,
                'label': 0,
                'overlap': overlap
            })
    return bboxes
Exemplo n.º 9
0
def fusion_target(rois, gt_labels, gt_boxes, gt_boxes3d):
    CFG = EasyDict()
    CFG.TRAIN = EasyDict()
    CFG.TRAIN.RCNN_BATCH_SIZE = 128
    CFG.TRAIN.RCNN_FG_FRACTION = 0.25
    CFG.TRAIN.RCNN_FG_THRESH_LO = 0.5

    # Include "ground-truth" in the set of candidate rois
    rois = rois.reshape(-1, 5)  # Proposal (i, x1, y1, x2, y2) coming from RPN
    num = len(gt_boxes)
    zeros = np.zeros((num, 1), dtype=np.float32)
    extended_rois = np.vstack((rois, np.hstack((zeros, gt_boxes))))
    assert np.all(
        extended_rois[:, 0] == 0), 'Only single image batches are supported'

    rois_per_image = CFG.TRAIN.RCNN_BATCH_SIZE
    fg_rois_per_image = np.round(CFG.TRAIN.RCNN_FG_FRACTION * rois_per_image)

    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(extended_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))
    max_overlaps = overlaps.max(axis=1)
    gt_assignment = overlaps.argmax(axis=1)
    labels = gt_labels[gt_assignment]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= CFG.TRAIN.RCNN_FG_THRESH_LO)[0]

    # Select false positive
    fp_inds = np.where((max_overlaps < 0.01))[0]

    # The indices that we're selecting (both fg and bg)
    keep = np.append(fg_inds, fp_inds)
    rois = extended_rois[keep]
    labels = labels[keep]
    labels[fg_inds.size:] = 0

    gt_boxes3d = gt_boxes3d[gt_assignment[keep]]
    et_boxes = rois[:, 1:5]

    et_boxes3d = top_box_to_box3d(et_boxes)
    targets = box3d_transform(et_boxes3d, gt_boxes3d)
    targets[np.where(labels == 0), :, :] = 0

    return rois, labels, targets
Exemplo n.º 10
0
def evaluate_signal_bbox(p_bbox_list, g_bbox_list, thredshold):
    """
    计算单分类的网络性能
    :param p_bbox_list: predicted shape: [n,4] [left_top_x, left_top_y, right_bottom_x, right_bottom_y]
    :param g_bbox_list: groud truth shape: [n,4]
    :return
    """
    # print(g_bbox_list)
    overlaps = bbox_overlaps(np.ascontiguousarray(p_bbox_list, dtype=np.float),
                             np.ascontiguousarray(g_bbox_list, dtype=np.float),
                             1)
    max_overlaps = np.max(overlaps, axis=0)
    # print(max_overlaps)
    filter = np.where(max_overlaps >= thredshold)[0]

    recall = float(len(filter)) / float(len(g_bbox_list))

    return recall
def anchor_target_layer(rpn_cls_score,
                        rpn_cls_prob,
                        im_name,
                        gt_boxes_large,
                        gt_ishard,
                        dontcare_areas,
                        im_info,
                        _feat_stride=[
                            16,
                        ],
                        anchor_scales=[
                            16,
                        ]):
    """
    将gt_box划分为细框
    实现论文中的side-refinement
    arameters
    ----------
    rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer
    gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
    gt_ishard: (G, 1), 1 or 0 indicates difficult or not
    dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    :return:
    """
    gt_boxes = split_frame(gt_boxes_large)
    _anchors = generate_anchors(
        scales=np.array(anchor_scales))  # 生成基本的anchor,一共9个
    _num_anchors = _anchors.shape[0]  # 9个anchor

    if DEBUG:
        print('anchors:')
        print(_anchors)
        print('anchor shapes:')
        print(
            np.hstack((
                _anchors[:, 2::4] - _anchors[:, 0::4],
                _anchors[:, 3::4] - _anchors[:, 1::4],
            )))
        _counts = cfg.EPS
        _sums = np.zeros((1, 4))
        _squared_sums = np.zeros((1, 4))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    im_info = im_info[0]  # 图像的高宽及通道数

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]  # feature-map的高宽

    if DEBUG:
        print('AnchorTargetLayer: height', height, 'width', width)
        print('')
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))
        print('height, width: ({}, {})'.format(height, width))
        print('rpn: gt_boxes.shape', gt_boxes.shape)

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride  # (W)
    shift_y = np.arange(0, height) * _feat_stride  # (H)
    shift_x, shift_y = np.meshgrid(
        shift_x, shift_y)  # in W H order   # shift_x (H, W)  shift_y (H, W)

    # K is H x W
    shifts = np.vstack(
        (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel()
         )).transpose()  # 生成feature-map和真实image上anchor之间的偏移量     #(H*W, 4)
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors  # 9个anchor
    K = shifts.shape[0]  # 50*37,feature-map的宽乘高的大小
    all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2)))  # 相当于复制宽高的维度,然后相加
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    # 仅保留那些还在图像内部的anchor,超出图像的都删掉
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    if DEBUG:
        print('total_anchors', total_anchors)
        print('inds_inside', len(inds_inside))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]  # 保留那些在图像内的anchor   (In, 4)
    if DEBUG:
        print('anchors.shape', anchors.shape)

    # 至此,anchor准备好了
    # --------------------------------------------------------------
    # label: 1 is positive, 0 is negative, -1 is dont care
    # (A)
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)  # 初始化label,均为-1

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt), shape is A x G
    # 计算anchor和gt-box的overlap,用来给anchor上标签
    overlaps = bbox_overlaps(np.ascontiguousarray(
        anchors, dtype=np.float), np.ascontiguousarray(
            gt_boxes,
            dtype=np.float))  # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组
    # 存放每一个anchor和每一个gtbox之间的overlap
    argmax_overlaps = overlaps.argmax(
        axis=1)  # (A)#找到和每一个anchor,overlap最大的那个gt
    max_overlaps = overlaps[np.arange(
        len(inds_inside)
    ), argmax_overlaps]  # 假如在内部的anchor有900个 ,(900,), 表示的是每一个anchor最大的overlaps值
    gt_argmax_overlaps = overlaps.argmax(
        axis=0)  # G#找到所有anchor中与gtbox,overlap最大的那个anchor  # (3)

    gt_max_overlaps = overlaps[
        gt_argmax_overlaps, np.arange(
            overlaps.shape[1]
        )]  # 比如有3个gt 那么就得到(3,),表示的是上一步找到的与gt的overlap最大的3个anchor的overlap值
    gt_argmax_overlaps = np.where(
        overlaps == gt_max_overlaps)[0]  # (3, ) 表示的是哪几个与gt有最大overlap的anchor的索引

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1  # 每个位置上的9个anchor中overlap最大的认为是前景

    # 是将iou小于0.5的样本标记为负样本,
    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: above threshold IOU
    labels[max_overlaps >=
           cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1  # overlap大于0.7的认为是前景

    # 增加的修复,负样本包含了最上方最下方有字的部分,这些样本会干扰样本,因此可以去掉这些负样本中,处在最上方的和左下方的样本
    bg_anchor_index = labels == 0

    y_anchor = anchors[:, 3]
    top_anchor_index = y_anchor < min(anchors[:, 1]) + 50
    bottom_anchor_index = y_anchor > max(anchors[:, 3]) - 50
    assert top_anchor_index.shape == bottom_anchor_index.shape
    top_bottom_anchor_index = top_anchor_index + bottom_anchor_index
    bg_topbottom_anchor_index = bg_anchor_index * top_bottom_anchor_index

    labels[bg_topbottom_anchor_index] = -1

    # 可视化这时候的正样本,看一下是怎样的
    # vis_labels = _unmap(labels, total_anchors, inds_inside, fill=-1)  # 这些anchor的label是-1,也即dontcare
    # vis_training_sample(vis_labels, all_anchors, im_name, gt_boxes)

    if DEBUG:
        print('在过滤数量之前:')
        print('正样本:' + str(len(np.where(labels == 1)[0])))
        print('负样本:' + str(len(np.where(labels == 0)[0])))
        print('忽略样本:' + str(len(np.where(labels == -1)[0])))

    # 至此,第一次生成好了这个图片的labels,
    # 生成其他部分的标签
    v_target, o_target = _compute_targets(
        anchors,
        gt_boxes[argmax_overlaps, :])  # 根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差)

    # 但是计算损失函数的时候,其实是需要j索引和k索引,所以计算好这两个索引,一并返回,帮助计算损失函数
    # j索引,有效索引:正锚点或者与gt的overlap大于0.5以上的锚点的索引
    # 正锚点
    positive_index = np.where(labels == 1)[0]  # 应该是一个(p,)p应该不大于128

    #
    # ignore_index = np.where(labels==-1)[0]  # 应该是一个(n,)n应该很大,因为忽略的anchor很多
    keep_index = np.where(labels != -1)[0]
    _ = np.where(max_overlaps > 0.5)[0]  # 应该是一个(c,),表示overlap大于0.5的anchor的索引

    remove_ignore = list()
    for i in range(_.shape[0]):
        if i in keep_index:
            remove_ignore.append(_[i])
    remove_ignore = np.array(remove_ignore)
    effect_index = np.append(positive_index, remove_ignore)

    remove_repeat = np.array(list(set(list(effect_index))))

    j_index = remove_repeat.astype(np.int32)

    j_index1 = np.zeros((len(inds_inside)), dtype=np.int32)
    j_index1[j_index] = 1

    # k 索引 , 边缘索引

    # 先找到所有的可以认为是边缘的gt框,这里简单的认为是边缘框和左右各自一个。
    # ori_gt_box = (gt_boxes/im_info[2]).astype(np.int32, copy=False)
    ori_gt_box = gt_boxes.astype(np.float32, copy=False)
    # 找到左右边界框,矩阵操作实现  todo
    list_left_index = list()
    list_right_index = list()
    for i in range(ori_gt_box.shape[0]):
        if ori_gt_box[i][2] - ori_gt_box[i][0] != 15:
            list_left_index.append(i)
            if ori_gt_box[i][0] % 16 != 0:  # 看做是左边边界框
                list_left_index.append(i + 1)
            if (ori_gt_box[i][2] + 1) % 16 != 0:  # 看做是右边边界框
                list_left_index.append(i - 1)
        else:
            continue
    list_index1 = list_left_index + list_right_index
    # 去除不属于gt中的索引和重复的索引
    list_index2 = list(set(list_index1))
    list_index3 = sorted(list_index2)
    list_index4 = list()
    for index in list_index3:
        if index in range(ori_gt_box.shape[0]):
            list_index4.append(index)

    gt_side_index = np.array(list_index4).astype(np.int32)  # 得到了边界gt框的索引

    # 要得到与这些gt框有最大的overlap的anchors的索引,这些anchor是我们关心的
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    anchor_side_index = gt_argmax_overlaps[
        gt_side_index]  # 得到143个与gt具有最大的overlaps的anchor的索引
    # 还要去掉与边界框overlap为0的anchor,因为这些anhcor不是真的我们关心的anchor,如果不去除,还会造成o_loss异常大
    # anchor_side_list = list()
    anchor_fg_side_list = list()
    anchor_nocare_side_list = list()
    for i in range(anchor_side_index.shape[0]):
        anchor_index = anchor_side_index[i]
        gt_index = gt_side_index[i]
        overlap = overlaps[anchor_index, gt_index]
        if overlap > 0.05:
            anchor_fg_side_list.append(anchor_index)
        elif overlap > 0:
            anchor_nocare_side_list.append(anchor_index)
        else:
            pass
    # 找到了与所有边界框有最大交集的anchor,这些anchor中有的与gt的iou只有很小(因为gt特别窄,不够16像素),所以这些anchor我们标记为-1,意思是模型将之识别为什么我们都不关心了,但是iou大于0.4的,我们都将之标记为正样本,另模型能够正确学习正负样本
    anchor_fg_side_index = np.array(anchor_fg_side_list, dtype=np.int32)
    anchor_nocare_side_index = np.array(anchor_nocare_side_list,
                                        dtype=np.int32)
    anchor_fg_side_index = np.array(
        sorted(list(set(list(anchor_fg_side_index))))).astype(np.int32)
    anchor_nocare_side_index = np.array(
        sorted(list(set(list(anchor_nocare_side_index))))).astype(np.int32)
    labels[anchor_fg_side_index] = 1
    labels[anchor_nocare_side_index] = -1

    k_index = anchor_fg_side_index.copy()
    k_index1 = np.zeros((len(inds_inside)), dtype=np.int32)
    k_index1[k_index] = 1

    # map up to original set of anchors
    # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来
    labels = _unmap(labels, total_anchors, inds_inside,
                    fill=-1)  # 这些anchor的label是-1,也即dontcare
    v_target = _unmap(v_target, total_anchors, inds_inside,
                      fill=0)  # 这些anchor的真值是0,也即没有值
    o_target = _unmap(o_target, total_anchors, inds_inside, fill=0)
    j_index2 = _unmap(j_index1, total_anchors, inds_inside,
                      fill=0).astype(np.int32)
    k_index2 = _unmap(k_index1, total_anchors, inds_inside,
                      fill=0).astype(np.int32)

    # real_j_index = np.where(j_index2==1)[0]
    # real_k_index = np.where(k_index2==1)[0]

    if DEBUG:
        # 可视化出我们最终选出来的正样本,确定是否合理
        vis_training_sample(labels, all_anchors, im_name, gt_boxes)
    if DEBUG or SHOW_SOME:
        print('正样本:' + str(len(np.where(labels == 1)[0])))
        print('负样本:' + str(len(np.where(labels == 0)[0])))
        print('忽略样本:' + str(len(np.where(labels == -1)[0])))
        # print('保存的tmp_labels')
        # print('正样本:' + str(len(np.where(tmp_labels == 1)[0])))
        # print('负样本:' + str(len(np.where(tmp_labels == 0)[0])))
        # print('忽略样本:' + str(len(np.where(tmp_labels == -1)[0])))
    return labels, v_target, o_target, j_index2, k_index2
Exemplo n.º 12
0
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors):
    """Same as the anchor target layer in original Fast/er RCNN """

    height, width = rpn_cls_score.shape[1:3]

    if DEBUG:
        print('AnchorTargetLayer: height', height, 'width', width)
        print('all anchors size {}'.format(all_anchors.shape))
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))
        print('height, width: ({}, {})'.format(height, width))
        print('rpn: gt_boxes.shape', gt_boxes.shape)
        print('rpn: gt_boxes', gt_boxes)

    A = num_anchors
    total_anchors = all_anchors.shape[
        0]  #  anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False) H*W, 4
    K = total_anchors / num_anchors

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # map of shape (..., H, W)

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    #     if DEBUG:
    #         print('total_anchors', total_anchors)
    #         print('inds_inside', len(inds_inside))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    #     if DEBUG:
    #         print('anchors.shape', anchors.shape)

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox.bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))

    if DEBUG:
        print('anchors {} --> \n  {}'.format(anchors.shape, anchors))
        print('gt boxes --> \n {}'.format(gt_boxes))
        print('anchors , gt boxes overlaps {} --> \n {}'.format(
            overlaps.shape, overlaps))

    # axis=1 按行值    axis=0 按列值.  argmax(axis=1) 找到anchors每行对应重合度最大的gt_boxes
    # overlaps [ N * K]  N anchors length, K gt boxes length
    argmax_overlaps = overlaps.argmax(
        axis=1)  # (A)#找到和每一个gtbox,overlap最大的那个anchor
    if DEBUG:
        print('arg max over laps axis = 1 --> \n {}'.format(argmax_overlaps))
    max_overlaps = overlaps[np.arange(len(
        inds_inside)), argmax_overlaps]  # argmax_overlaps 是选择overlaps最大的一列的列位置

    # argmax(axis=1) 按列扫描找到每列最大的行索引值, 找到gt_boxes 每行数据 重合度最大的anchors的行
    gt_argmax_overlaps = overlaps.argmax(
        axis=0)  # 取得gt_boxes 对应重合度最大的 anchors的行值
    if DEBUG:
        print('gt arg max over laps axis = 0 --> \n {}'.format(
            gt_argmax_overlaps))

    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(
        overlaps == gt_max_overlaps)[0]  # 返回overlaps=gt max overlaps
    if DEBUG:
        print('gt arg max over laps 222 axis = 0 --> \n {}'.format(
            gt_argmax_overlaps))

#     if DEBUG:
#         print('arg max over laps ->', argmax_overlaps)
#         print('max over laps ->', max_overlaps)
#         print('gt argmax overlaps ->',len(gt_argmax_overlaps))

    if not RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    # gt_artmax_overlaps是gt_boxs的每个位置在anchros里面有最大值的位置,即每个gt_boxes在anchors里重合度最大的地方
    labels[gt_argmax_overlaps] = 1
    if DEBUG:
        print('labels --> \n {}'.format(labels))

    # fg label: above threshold IOU
    labels[max_overlaps >= RPN_POSITIVE_OVERLAP] = 1

    if RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many
    num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    if DEBUG:
        print('fg_inds -->  {}'.format(fg_inds))
        print('labels --> \n {}'.format(labels))

    # subsample negative labels if we have too many
    num_bg = RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1
    if DEBUG:
        print('bg_inds -->  {}'.format(bg_inds))
        print('labels --> \n {}'.format(labels))


#     if DEBUG:
#         print('Number FG -> {}  Number BG -> {}'.format(num_fg, num_bg))

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)

    # argmax_overlaps是对应anchors每一行对应最大的列索引值,相对于gt_boxes则是其对应的行号
    cgt_boxes = gt_boxes[argmax_overlaps, :]
    #     if DEBUG:
    #         print('anchors size -> {} cgt boxes size -> {}'.format(anchors.shape, cgt_boxes.shape))
    #         print('anchors --> \n {}'.format(anchors))
    #         print('cgt boxes --> \n {}'.format(cgt_boxes))

    bbox_targets = _compute_targets(anchors, cgt_boxes)
    #     if DEBUG:
    #         print('bbox target {} --> \n {}'.format(bbox_targets.shape,bbox_targets))

    # zz = bbox_transform_inv(torch.from_numpy(anchors[4506]).view(1,4),torch.from_numpy(bbox_targets[4506]).view(1,4))

    # bbox inside 权重
    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(RPN_BBOX_INSIDE_WEIGHTS)
    # bbox outside 权重
    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)

    if RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        # positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        # negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        # 外部权重,前景是1,背景是0
        positive_weights = np.ones((1, 4))
        negative_weights = np.zeros((1, 4))
    else:
        assert ((RPN_POSITIVE_WEIGHT > 0) & (RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (RPN_POSITIVE_WEIGHT / np.sum(labels == 1))
        negative_weights = ((1.0 - RPN_POSITIVE_WEIGHT) / np.sum(labels == 0))

    # 外部权重,前景是1,背景是0
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    if DEBUG:
        print('labels 2 --> \n {}'.format(labels))

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    if DEBUG:
        print('labels 2 --> \n {}'.format(labels))
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    # labels
    # labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    # labels = labels.reshape((1, 1, A * height, width))
    labels = labels.reshape((1, height, width, A))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Exemplo n.º 13
0
    def evaluate_recall(self,
                        candidate_boxes=None,
                        thresholds=None,
                        area='all',
                        limit=None):
        """Evaluate detection proposal recall metrics.

    Returns:
        results: dictionary of results with keys
            'ar': average recall
            'recalls': vector recalls at each IoU overlap threshold
            'thresholds': vector of IoU overlap thresholds
            'gt_overlaps': vector of all ground-truth overlaps
    """
        # Record max overlap value for each gt box
        # Return vector of overlap values
        areas = {
            'all': 0,
            'small': 1,
            'medium': 2,
            'large': 3,
            '96-128': 4,
            '128-256': 5,
            '256-512': 6,
            '512-inf': 7
        }
        area_ranges = [
            [0**2, 1e5**2],  # all
            [0**2, 32**2],  # small
            [32**2, 96**2],  # medium
            [96**2, 1e5**2],  # large
            [96**2, 128**2],  # 96-128
            [128**2, 256**2],  # 128-256
            [256**2, 512**2],  # 256-512
            [512**2, 1e5**2],  # 512-inf
        ]
        assert area in areas, 'unknown area range: {}'.format(area)
        area_range = area_ranges[areas[area]]
        gt_overlaps = np.zeros(0)
        num_pos = 0
        for i in range(self.num_images):
            # Checking for max_overlaps == 1 avoids including crowd annotations
            # (...pretty hacking :/)
            max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(
                axis=1)
            gt_inds = np.where((self.roidb[i]['gt_classes'] > 0)
                               & (max_gt_overlaps == 1))[0]
            gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
            gt_areas = self.roidb[i]['seg_areas'][gt_inds]
            valid_gt_inds = np.where((gt_areas >= area_range[0])
                                     & (gt_areas <= area_range[1]))[0]
            gt_boxes = gt_boxes[valid_gt_inds, :]
            num_pos += len(valid_gt_inds)

            if candidate_boxes is None:
                # If candidate_boxes is not supplied, the default is to use the
                # non-ground-truth boxes from this roidb
                non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
                boxes = self.roidb[i]['boxes'][non_gt_inds, :]
            else:
                boxes = candidate_boxes[i]
            if boxes.shape[0] == 0:
                continue
            if limit is not None and boxes.shape[0] > limit:
                boxes = boxes[:limit, :]

            overlaps = bbox_overlaps(boxes.astype(np.float),
                                     gt_boxes.astype(np.float))

            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in range(gt_boxes.shape[0]):
                # find which proposal box maximally covers each gt box
                argmax_overlaps = overlaps.argmax(axis=0)
                # and get the iou amount of coverage for each gt box
                max_overlaps = overlaps.max(axis=0)
                # find which gt box is 'best' covered (i.e. 'best' = most iou)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert (gt_ovr >= 0)
                # find the proposal box that covers the best covered gt box
                box_ind = argmax_overlaps[gt_ind]
                # record the iou coverage of this gt box
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert (_gt_overlaps[j] == gt_ovr)
                # mark the proposal box and the gt box as used
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1
            # append recorded iou coverage level
            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        gt_overlaps = np.sort(gt_overlaps)
        if thresholds is None:
            step = 0.05
            thresholds = np.arange(0.5, 0.95 + 1e-5, step)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        # ar = 2 * np.trapz(recalls, thresholds)
        ar = recalls.mean()
        return {
            'ar': ar,
            'recalls': recalls,
            'thresholds': thresholds,
            'gt_overlaps': gt_overlaps
        }
def anchor_target_layer(rpn_cls_score,
                        gt_boxes,
                        gt_ishard,
                        dontcare_areas,
                        im_info,
                        _feat_stride=[
                            16,
                        ],
                        anchor_scales=[
                            16,
                        ]):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    Parameters
    ----------
    rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer
    gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
    gt_ishard: (G, 1), 1 or 0 indicates difficult or not
    dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare
    rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform)
                            that are the regression objectives
    rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg
    rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg,
                            beacuse the numbers of bgs and fgs mays significiantly different
    """
    _anchors = generate_anchors(
        scales=np.array(anchor_scales))  # 生成基本的anchor,一共10个
    _num_anchors = _anchors.shape[0]  # 10个anchor

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0
    # map of shape (..., H, W)
    # height, width = rpn_cls_score.shape[1:3]

    im_info = im_info[0]  #图像的高宽及通道数

    # 在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标
    # Algorithm:
    # for each (H, W) location i
    #   generate 9 anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the 9 anchors
    # filter out-of-image anchors
    # measure GT overlap

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]  #feature-map的高宽

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)  # in W H order
    # K is H x W
    shifts = np.vstack(
        (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
         shift_y.ravel())).transpose()  # 生成feature-map和真实image上anchor之间的偏移量
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors  # 10个anchor
    K = shifts.shape[0]  # 50*37,feature-map的宽乘高的大小
    all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2)))  # 相当于复制宽高的维度,然后相加
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    # 仅保留那些还在图像内部的anchor,超出图像的都删掉
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # if DEBUG:
    # print('total_anchors', total_anchors)
    # print('inds_inside', len(inds_inside))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]  #保留那些在图像内的anchor
    # if DEBUG:
    # print('anchors.shape', anchors.shape)

    #至此,anchor准备好了
    #--------------------------------------------------------------
    # label:>=1 is prostive, 0 is negative, -1 is dont care
    # (A)
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)  #初始化label,均为-1

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt), shape is A x G
    # 计算anchor和gt-box的overlap,用来给anchor上标签
    # print('anchors shape', anchors.shape) [n , 4]
    # print('anchors ascontiguousarray', np.ascontiguousarray(anchors, dtype=np.float))
    # print('gt_boxes shape', gt_boxes)
    # print('gt_boxes ascontiguousarray', np.ascontiguousarray(gt_boxes, dtype=np.float))
    # overlaps shape = [12402, 465]
    # ascontiguousarray返回地址连续的数组
    # print('gt_boxes',gt_boxes.shape)
    overlaps = bbox_overlaps(np.ascontiguousarray(
        anchors, dtype=np.float), np.ascontiguousarray(
            gt_boxes,
            dtype=np.float))  #假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组
    # argmax_overlaps shape  (12402,)
    # 存放每一个anchor和gt最大的iou的那个gt的位置
    argmax_overlaps = overlaps.argmax(
        axis=1)  # (A)#找到和每一个gtbox,overlap最大的那个anchor
    # print('argmax_overlaps shape ',  argmax_overlaps.shape, argmax_overlaps[1000:1100])
    # pp_label = np.max(overlaps, axis=1)
    # print('pp_label', pp_label.shape, pp_label[0:100])
    # 所有anchor与groudtruth的最高得分的那个值
    max_overlaps = np.max(overlaps, axis=1)
    # print('max_overlaps shape', max_overlaps.shape)
    # print('max_overlaps',max_overlaps[0:100])
    gt_argmax_overlaps = overlaps.argmax(
        axis=0)  # G#找到每个位置上10个anchor中与gtbox,overlap最大的那个
    # print('gt_argmax_overlaps',gt_argmax_overlaps[0:100])
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    # print('gt_max_overlaps',gt_max_overlaps[0:100])
    # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
    # print('gt_argmax_overlaps', gt_argmax_overlaps[0:100])
    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps <
               cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0  #先给背景上标签,小于0.3overlap的

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = gt_boxes[:,
                                          4]  # 每个位置上的10个anchor中overlap最大的认为是前景
    # print('gt_boxes[gt_argmax_overlaps, 4]', gt_boxes[argmax_overlaps[gt_argmax_overlaps], 4])
    # fg label: above threshold IOU
    max_iou_pp = max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP  # 0.7
    labels[max_iou_pp] = gt_boxes[argmax_overlaps[max_iou_pp],
                                  4]  #overlap大于0.7的认为是前景
    # print('labels', labels)

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:  # False
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many
    # 对正样本进行采样,如果正样本的数量太多的话
    # 限制正样本的数量不超过128个
    # TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。
    # cfg.TRAIN.RPN_FG_FRACTION = 0.5 ,cfg.TRAIN.RPN_BATCHSIZE = 300
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels >= 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)  # 随机去除掉一些正样本
        labels[disable_inds] = -1  # 变为-1

    # subsample negative labels if we have too many
    # 对负样本进行采样,如果负样本的数量太多的话
    # 正负样本总数是256,限制正样本数目最多128,
    # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels >= 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1
        # print "was %s inds, disabling %s, now %s inds" % (
        # len(bg_inds), len(disable_inds), np.sum(labels == 0))

    # 至此, 上好标签,开始计算rpn-box的真值
    #--------------------------------------------------------------
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(
        anchors,
        gt_boxes[argmax_overlaps, :])  #根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差)
    # bbox_targets.shape [  inds_inside, 4]

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels >= 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)  #内部权重,前景就给1,其他是0

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:  #暂时使用uniform 权重,也就是正样本是1,负样本是0
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0) + 1
        # positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        # negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        positive_weights = np.ones((1, 4))
        negative_weights = np.zeros((1, 4))
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            (np.sum(labels >= 1)) + 1)
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            (np.sum(labels == 0)) + 1)
    bbox_outside_weights[labels >= 1, :] = positive_weights  #外部权重,前景是1,背景是0
    bbox_outside_weights[labels == 0, :] = negative_weights

    # if DEBUG:
    #     _sums += bbox_targets[labels == 1, :].sum(axis=0)
    #     _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
    #     _counts += np.sum(labels == 1)
    #     means = _sums / _counts
    #     stds = np.sqrt(_squared_sums / _counts - means ** 2)
    #     print('means:')
    #     print(means)
    #     print('stdevs:')
    #     print(stds)

    # map up to original set of anchors
    # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来
    labels = _unmap(labels, total_anchors, inds_inside,
                    fill=-1)  #这些anchor的label是-1,也即dontcare
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside,
                          fill=0)  #这些anchor的真值是0,也即没有值
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)  #内部权重以0填充
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)  #外部权重以0填充

    # if DEBUG:
    #     print('rpn: max max_overlap', np.max(max_overlaps))
    #     print('rpn: num_positive', np.sum(labels == 1))
    #     print('rpn: num_negative', np.sum(labels == 0))
    #     _fg_sum += np.sum(labels == 1)
    #     _bg_sum += np.sum(labels == 0)
    #     _count += 1
    #     print('rpn: num_positive avg', _fg_sum / _count)
    #     print('rpn: num_negative avg', _bg_sum / _count)

    # labels
    labels = labels.reshape((1, height, width, A))  #reshap一下label
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))#reshape

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))
    rpn_bbox_outside_weights = bbox_outside_weights
    # print('rpn_bbox_targets', rpn_bbox_targets.shape, rpn_bbox_targets)
    # print('rpn_labels shape',rpn_labels.shape)

    # rpn_bbox_targets shape [1, 37, 40, 40]
    # rpn_labels (1, 37, 40, 10)
    # print('rpn_labels 0 num', len(np.where(rpn_labels[0]==0)[2]))
    # print('rpn_labels 1 num', len(np.where(rpn_labels[0]==1)[2]))
    # print('rpn_labels 2 num', len(np.where(rpn_labels[0]==2)[2]))

    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Exemplo n.º 15
0
def mdnet_sample(im, bbox, params, num, stype):
    """Generate gaussian samples based on bbox
    :arg
    im: cv2's image
    bbox: ground-truth box(x, y, w, h)
    params: five-tuple(width, height, scale, pos_threshold, neg_threshold) of gaussian parameters
    num: number of samples
    :return
    bboxes: list of boxes
            {
                'img' :img,
                'box'(x, y, w, h),
                'label': label,
                'overlap': overlap
            }
    """
    assert len(bbox) == 4, "Invalid ground-truth(x, y, w, h) form."
    assert bbox[2] > 0 and bbox[3] > 0, "Width or height < 0."
    assert len(
        params
    ) == 5, "Invalid {:d}-tuple params(should be five-tuple).".format(
        len(params))
    assert num > 0, "Number of samples should be larger than 0."

    im_shape = im.shape
    im_w = im_shape[1]
    im_h = im_shape[0]

    # Calculate average of width and height
    centerx = bbox[0] + bbox[2] / 2
    centery = bbox[1] + bbox[3] / 2

    bboxes = []
    cur_id = 0
    while cur_id < num:
        # new box parameters
        _mean = (bbox[2] + bbox[3]) / 2
        offsetx = rd.gauss(0, params[0] * _mean)
        offsety = rd.gauss(0, params[1] * _mean)
        scalex = rd.gauss(1, params[2])
        # scaley = rd.gauss(1, params[2])
        scaley = scalex
        # new box half width and half height
        hw = bbox[2] * scalex / 2
        hh = bbox[3] * scaley / 2
        # box is in the form of (x1, y1, x2, y2)
        box = (max(0, centerx + offsetx - hw), max(0, centery + offsety - hh),
               min(im_w,
                   centerx + offsetx + hw), min(im_h, centery + offsety + hh))

        # transform to (x, y, w, h)
        sample = (box[0], box[1], box[2] - box[0], box[3] - box[1])
        if int(sample[2]) <= 0 or int(sample[3]) <= 0:
            continue
        # since there is only one query box, then take the first one in the overlaps
        overlap = bbox_overlaps([bbox], [sample])[0]
        if overlap > params[3]:
            bboxes.append({
                'img': im,
                'box': sample,
                'label': 1,
                'overlap': overlap
            })
        elif overlap < params[4]:
            bboxes.append({
                'img': im,
                'box': sample,
                'label': 0,
                'overlap': overlap
            })
        else:
            continue
        cur_id += 1
    return bboxes
Exemplo n.º 16
0
def anchor_target_layer(rpn_cls_score,
                        gt_boxes,
                        gt_ishard,
                        dontcare_areas,
                        im_info,
                        _feat_stride=[
                            16,
                        ],
                        anchor_scales=[
                            16,
                        ]):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    Parameters
    ----------
    rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer
    gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
    gt_ishard: (G, 1), 1 or 0 indicates difficult or not
    dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare
    rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform)
                            that are the regression objectives
    rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg
    rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg,
                            beacuse the numbers of bgs and fgs mays significiantly different
    """
    _anchors = generate_anchors(
        scales=np.array(anchor_scales))  #生成基本的anchor,一共9个
    _num_anchors = _anchors.shape[0]  #9个anchor

    if DEBUG:
        print('anchors:')
        print(_anchors)
        print('anchor shapes:')
        print(
            np.hstack((
                _anchors[:, 2::4] - _anchors[:, 0::4],
                _anchors[:, 3::4] - _anchors[:, 1::4],
            )))
        _counts = cfg.EPS
        _sums = np.zeros((1, 4))
        _squared_sums = np.zeros((1, 4))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0
    # map of shape (..., H, W)
    #height, width = rpn_cls_score.shape[1:3]

    im_info = im_info[0]  #图像的高宽及通道数

    #在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标
    # Algorithm:
    # for each (H, W) location i
    #   generate 9 anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the 9 anchors
    # filter out-of-image anchors
    # measure GT overlap

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]  #feature-map的高宽

    if DEBUG:
        print('AnchorTargetLayer: height', height, 'width', width)
        print('')
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))
        print('height, width: ({}, {})'.format(height, width))
        print('rpn: gt_boxes.shape', gt_boxes.shape)
        print('rpn: gt_boxes', gt_boxes)

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride  # (W)
    shift_y = np.arange(0, height) * _feat_stride  #(H)
    shift_x, shift_y = np.meshgrid(
        shift_x, shift_y)  # in W H order   # shift_x (H, W)  shift_y (H, W)
    # K is H x W
    shifts = np.vstack(
        (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel()
         )).transpose()  #生成feature-map和真实image上anchor之间的偏移量     #(H*W, 4)
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors  #9个anchor
    K = shifts.shape[0]  #50*37,feature-map的宽乘高的大小
    all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2)))  #相当于复制宽高的维度,然后相加
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    #仅保留那些还在图像内部的anchor,超出图像的都删掉
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    if DEBUG:
        print('total_anchors', total_anchors)
        print('inds_inside', len(inds_inside))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]  #保留那些在图像内的anchor   (In, 4)
    if DEBUG:
        print('anchors.shape', anchors.shape)

    #至此,anchor准备好了
    #--------------------------------------------------------------
    # label: 1 is positive, 0 is negative, -1 is dont care
    # (A)
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)  #初始化label,均为-1

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt), shape is A x G
    #计算anchor和gt-box的overlap,用来给anchor上标签
    overlaps = bbox_overlaps(np.ascontiguousarray(
        anchors, dtype=np.float), np.ascontiguousarray(
            gt_boxes,
            dtype=np.float))  #假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组
    # 存放每一个anchor和每一个gtbox之间的overlap
    argmax_overlaps = overlaps.argmax(
        axis=1)  # (A)#找到和每一个anchor,overlap最大的那个gt
    max_overlaps = overlaps[np.arange(
        len(inds_inside)
    ), argmax_overlaps]  # 假如在内部的anchor有900个 ,(900,), 表示的是每一个anchor最大的overlaps值
    gt_argmax_overlaps = overlaps.argmax(
        axis=0)  # G#找到所有anchor中与gtbox,overlap最大的那个anchor  # (3)
    if DEBUG:
        print('获取所有anchor中与gt相交最大的哪几个anchor的索引')
        print('gt_argmax_overlaps.shape', gt_argmax_overlaps.shape)
        print('gt_argmax_overlaps', gt_argmax_overlaps)
    gt_max_overlaps = overlaps[
        gt_argmax_overlaps, np.arange(
            overlaps.shape[1]
        )]  #  比如有3个gt 那么就得到(3,),表示的是上一步找到的与gt的overlap最大的3个anchor的overlap值
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[
        0]  #  (3, ) 表示的是哪几个与gt有最大overlap的anchor的索引
    if DEBUG:
        print('这一步是找到那些同样与gt有最大overlap的索引,上一步找到的4个,这一步找到其他重复的')
        print('gt_argmax_overlaps.shape', gt_argmax_overlaps.shape)
        print('gt_argmax_overlaps', gt_argmax_overlaps)

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps <
               cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0  #先给背景上标签,小于0.3overlap的

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1  #每个位置上的9个anchor中overlap最大的认为是前景
    # fg label: above threshold IOU
    labels[max_overlaps >=
           cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1  #overlap大于0.7的认为是前景

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    if DEBUG:
        print('在过滤数量之前:')
        print('正样本:', len(np.where(labels == 1)[0]))
        print('负样本:', len(np.where(labels == 0)[0]))
        print('忽略样本:', len(np.where(labels == -1)[0]))
    # preclude dontcare areas
    if dontcare_areas is not None and dontcare_areas.shape[
            0] > 0:  #这里我们暂时不考虑有doncare_area的存在
        # intersec shape is D x A
        intersecs = bbox_intersections(
            np.ascontiguousarray(dontcare_areas, dtype=np.float),  # D x 4
            np.ascontiguousarray(anchors, dtype=np.float)  # A x 4
        )
        intersecs_ = intersecs.sum(axis=0)  # A x 1
        labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1

    #这里我们暂时不考虑难样本的问题
    # preclude hard samples that are highly occlusioned, truncated or difficult to see
    if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[
            0] > 0:
        assert gt_ishard.shape[0] == gt_boxes.shape[0]
        gt_ishard = gt_ishard.astype(int)
        gt_hardboxes = gt_boxes[gt_ishard == 1, :]
        if gt_hardboxes.shape[0] > 0:
            # H x A
            hard_overlaps = bbox_overlaps(
                np.ascontiguousarray(gt_hardboxes, dtype=np.float),  # H x 4
                np.ascontiguousarray(anchors, dtype=np.float))  # A x 4
            hard_max_overlaps = hard_overlaps.max(axis=0)  # (A)
            labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1
            max_intersec_label_inds = hard_overlaps.argmax(axis=1)  # H x 1
            labels[max_intersec_label_inds] = -1  #

    # subsample positive labels if we have too many
    #对正样本进行采样,如果正样本的数量太多的话
    # 限制正样本的数量不超过128个
    #TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)  #随机去除掉一些正样本
        labels[disable_inds] = -1  #变为-1

    # subsample negative labels if we have too many
    #对负样本进行采样,如果负样本的数量太多的话
    # 正负样本总数是256,限制正样本数目最多128,
    # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1
        #print "was %s inds, disabling %s, now %s inds" % (
        #len(bg_inds), len(disable_inds), np.sum(labels == 0))

    if DEBUG:
        print("考虑均衡住正负样本以后:")
        print('正样本:', len(np.where(labels == 1)[0]))
        print('负样本:', len(np.where(labels == 0)[0]))
        print('忽略样本:', len(np.where(labels == -1)[0]))
    # 至此, 上好标签,开始计算rpn-box的真值
    #--------------------------------------------------------------
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(
        anchors,
        gt_boxes[argmax_overlaps, :])  #根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差)

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)  #内部权重,前景就给1,其他是0

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:  #暂时使用uniform 权重,也就是正样本是1,负样本是0
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0) + 1
        # positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        # negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        positive_weights = np.ones((1, 4))
        negative_weights = np.zeros((1, 4))
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            (np.sum(labels == 1)) + 1)
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            (np.sum(labels == 0)) + 1)
    bbox_outside_weights[labels == 1, :] = positive_weights  #  外部权重,前景是1,背景是0
    bbox_outside_weights[labels == 0, :] = negative_weights

    if DEBUG:
        _sums += bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts += np.sum(labels == 1)
        means = _sums / _counts
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print('means:')
        print(means)
        print('stdevs:')
        print(stds)

    # map up to original set of anchors
    # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来
    labels = _unmap(labels, total_anchors, inds_inside,
                    fill=-1)  #这些anchor的label是-1,也即dontcare
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside,
                          fill=0)  #这些anchor的真值是0,也即没有值
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)  #内部权重以0填充
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)  #外部权重以0填充

    if DEBUG:
        print('rpn: max max_overlap', np.max(max_overlaps))
        print('rpn: num_positive', np.sum(labels == 1))
        print('rpn: num_negative', np.sum(labels == 0))
        _fg_sum += np.sum(labels == 1)
        _bg_sum += np.sum(labels == 0)
        _count += 1
        print('rpn: num_positive avg', _fg_sum / _count)
        print('rpn: num_negative avg', _bg_sum / _count)

    # labels
    labels = labels.reshape((1, height, width, A))  #reshap一下label
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))#reshape

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))
    rpn_bbox_outside_weights = bbox_outside_weights

    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors):
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    total_anchors = all_anchors.size()[0]
    K = total_anchors / num_anchors

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # pytorch (bs, c, h, w)
    height, width = rpn_cls_score.size()[2:4]

    # only keep anchors inside the image
    inds_inside = (
        (all_anchors.data[:, 0] >= -_allowed_border) &
        (all_anchors.data[:, 1] >= -_allowed_border) &
        (all_anchors.data[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors.data[:, 3] < im_info[0] + _allowed_border)  # height
    ).nonzero()[:, 0].long()

    if DEBUG:
        print('total_anchors', total_anchors)
        print('inds_inside', inds_inside.size()[0])

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    if DEBUG:
        print('anchors.shape', anchors.size())

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = inds_inside.new(inds_inside.size()[0]).fill_(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt) shape is A x G
    overlaps = bbox_overlaps(anchors.data, gt_boxes[:, :4].data)
    max_overlaps, argmax_overlaps = torch.max(overlaps, dim=1)
    gt_max_overlaps, gt_argmax_overlaps = torch.max(overlaps, dim=0)
    gt_argmax_overlaps = (overlaps == (
        gt_max_overlaps.unsqueeze(0).expand_as(overlaps))).nonzero()[:, 0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = (labels == 1).nonzero()[:, 0]
    if fg_inds.numel() > num_fg:
        inds = fg_inds.new(
            npr.choice(np.arange(0, fg_inds.numel()),
                       size=int((len(fg_inds) - num_fg)),
                       replace=False)).long()
        disable_inds = fg_inds[inds]
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - (labels == 1).sum()
    bg_inds = (labels == 0).nonzero()[:, 0]
    if bg_inds.numel() > num_bg:
        inds = bg_inds.new(
            npr.choice(np.arange(0, bg_inds.numel()),
                       size=int((len(bg_inds) - num_bg)),
                       replace=False)).long()
        disable_inds = bg_inds[inds]
        labels[disable_inds] = -1

    bbox_targets = _compute_targets(anchors.data,
                                    gt_boxes[argmax_overlaps][:, :4].data)
    bbox_inside_weights = bbox_targets.new(inds_inside.size()[0], 4).zero_()
    # only the positive ones have regression targets
    inds = (labels == 1).nonzero().view(-1)
    # dim1_inds = inds.unsqueeze(1).expand(inds.size(0), 4)
    # dim2_inds = inds.new((0,1,2,3)).view(-1,4).expand_as(dim1_inds)
    dim_value = bbox_targets.new(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS).view(
        -1, 4).expand(inds.size(0), 4)
    bbox_inside_weights[inds, :] = dim_value

    bbox_outside_weights = bbox_targets.new(inds_inside.size()[0], 4).zero_()
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = (labels >= 0).sum()
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            (labels == 1).sum())
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            (labels == 0).sum())

    inds = (labels == 1).nonzero().view(-1)
    # dim1_inds = inds.unsqueeze(1).expand(inds.size(0), 4)
    # dim2_inds = inds.new((0,1,2,3)).view(-1,4).expand_as(dim1_inds)
    dim_value = bbox_targets.new(positive_weights).view(-1, 4).expand(
        inds.size(0), 4)
    bbox_outside_weights[inds, :] = dim_value

    inds = (labels == 0).nonzero().view(-1)
    # dim1_inds = inds.unsqueeze(1).expand(inds.size(0), 4)
    # dim2_inds = inds.new((0,1,2,3)).view(-1,4).expand_as(dim1_inds)
    dim_value = bbox_targets.new(negative_weights).view(-1, 4).expand(
        inds.size(0), 4)
    bbox_outside_weights[inds, :] = dim_value

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    # labels
    labels = labels.view((1, height, width, A)).permute(0, 3, 1,
                                                        2).contiguous()
    labels = labels.view((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
      .view((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
      .view((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
      .view((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Exemplo n.º 18
0
def gaussian_sample(im, bbox, params, num, stype):
    assert len(bbox) == 4, "Invalid ground-truth(x, y, w, h) form."
    assert bbox[2] > 0 and bbox[3] > 0, "Width or height < 0."
    assert len(
        params
    ) == 5, "Invalid {:d}-tuple params(should be five-tuple).".format(
        len(params))
    assert num > 0, "Number of samples should be larger than 0."

    im_shape = im.shape
    im_w = im_shape[1]
    im_h = im_shape[0]

    # Calculate average of width and height
    centerx = bbox[0] + bbox[2] / 2
    centery = bbox[1] + bbox[3] / 2

    ones = np.ones((num, 1))
    neg_ones = -1 * ones

    mean = round((bbox[2] + bbox[3]) / 2.)
    min_ = np.min(np.hstack((ones, 0.5 * randn(num, 1))), axis=1)
    min_ = min_.reshape((min_.size, 1))
    max_ = np.max(np.hstack((neg_ones, min_)), axis=1)
    offsetx = params[0] * mean * max_
    min_ = np.min(np.hstack((ones, 0.5 * randn(num, 1))), axis=1)
    min_ = min_.reshape((min_.size, 1))
    max_ = np.max(np.hstack((neg_ones, min_)), axis=1)
    offsety = params[1] * mean * max_

    min_ = np.min(np.hstack((ones, 0.5 * randn(num, 1))), axis=1)
    min_ = min_.reshape((min_.size, 1))
    max_ = params[2] * np.max(np.hstack((neg_ones, min_)), axis=1)
    scale = 1.05**max_

    w = (bbox[2] * scale)[:, np.newaxis]
    h = (bbox[3] * scale)[:, np.newaxis]
    tens = np.array([10] * num)[:, np.newaxis]
    w_minus_10 = np.array(w - 10)
    h_minus_10 = np.array(h - 10)
    if stype == 'TRAIN':
        wmin_ = np.min(np.hstack((w_minus_10, w)), axis=1)[:, np.newaxis]
        hmin_ = np.min(np.hstack((h_minus_10, h)), axis=1)[:, np.newaxis]
        ws = np.max(np.hstack((tens, wmin_)), axis=1)
        hs = np.max(np.hstack((tens, hmin_)), axis=1)
    elif stype == 'TEST':
        ws = np.max(np.hstack((tens, w)), axis=1)
        hs = np.max(np.hstack((tens, h)), axis=1)
    bboxes = []
    for i in range(num):
        hw = ws[i] / 2
        hh = hs[i] / 2
        box = (max(0, int(centerx + offsetx[i] - hw)),
               max(0, int(centery + offsety[i] - hh)),
               min(im_w, int(centerx + offsetx[i] + hw)),
               min(im_h, int(centery + offsety[i] + hh)))
        sample = (box[0], box[1], box[2] - box[0], box[3] - box[1])
        if int(sample[2]) <= 0 or int(sample[3]) <= 0:
            continue
        overlap = bbox_overlaps([bbox], [sample])[0]
        if overlap > params[3]:
            bboxes.append({
                'img': im,
                'box': sample,
                'label': 1,
                'overlap': overlap
            })
        elif overlap < params[4]:
            bboxes.append({
                'img': im,
                'box': sample,
                'label': 0,
                'overlap': overlap
            })
    return bboxes
Exemplo n.º 19
0
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride = [16,], anchor_scales = [16,]):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    Parameters
    ----------
    rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer
    gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
    gt_ishard: (G, 1), 1 or 0 indicates difficult or not
    dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare
    rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform)
                            that are the regression objectives
    rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg
    rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg,
                            beacuse the numbers of bgs and fgs mays significiantly different
    """
    _anchors = generate_anchors(scales=np.array(anchor_scales))#生成基本的anchor,一共9个
    _num_anchors = _anchors.shape[0]#9个anchor

    if DEBUG:
        print('anchors:')
        print(_anchors)
        print('anchor shapes:')
        print(np.hstack((
            _anchors[:, 2::4] - _anchors[:, 0::4],
            _anchors[:, 3::4] - _anchors[:, 1::4],
        )))
        _counts = cfg.EPS
        _sums = np.zeros((1, 4))
        _squared_sums = np.zeros((1, 4))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border =  0
    # map of shape (..., H, W)
    #height, width = rpn_cls_score.shape[1:3]

    im_info = im_info[0]#图像的高宽及通道数

    #在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标
    # Algorithm:
    # for each (H, W) location i
    #   generate 9 anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the 9 anchors
    # filter out-of-image anchors
    # measure GT overlap

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]#feature-map的高宽

    if DEBUG:
        print('AnchorTargetLayer: height', height, 'width', width)
        print('')
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))
        print('height, width: ({}, {})'.format(height, width))
        print('rpn: gt_boxes.shape', gt_boxes.shape)
        print('rpn: gt_boxes', gt_boxes)

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order
    # K is H x W
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                        shift_x.ravel(), shift_y.ravel())).transpose()#生成feature-map和真实image上anchor之间的偏移量
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors#9个anchor
    K = shifts.shape[0]#50*37,feature-map的宽乘高的大小
    all_anchors = (_anchors.reshape((1, A, 4)) +
                   shifts.reshape((1, K, 4)).transpose((1, 0, 2)))#相当于复制宽高的维度,然后相加
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    #仅保留那些还在图像内部的anchor,超出图像的都删掉
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border) &
        (all_anchors[:, 1] >= -_allowed_border) &
        (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)    # height
    )[0]

    if DEBUG:
        print('total_anchors', total_anchors)
        print('inds_inside', len(inds_inside))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]#保留那些在图像内的anchor
    if DEBUG:
        print('anchors.shape', anchors.shape)

    #至此,anchor准备好了
    #--------------------------------------------------------------
    # label: 1 is positive, 0 is negative, -1 is dont care
    # (A)
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)#初始化label,均为-1

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt), shape is A x G
    #计算anchor和gt-box的overlap,用来给anchor上标签
    overlaps = bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))#假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组
    # 存放每一个anchor和每一个gtbox之间的overlap
    argmax_overlaps = overlaps.argmax(axis=1) # (A)#找到和每一个gtbox,overlap最大的那个anchor
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0) # G#找到每个位置上9个anchor中与gtbox,overlap最大的那个
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0#先给背景上标签,小于0.3overlap的

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1#每个位置上的9个anchor中overlap最大的认为是前景
    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1#overlap大于0.7的认为是前景

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # preclude dontcare areas
    if dontcare_areas is not None and dontcare_areas.shape[0] > 0:#这里我们暂时不考虑有doncare_area的存在
        # intersec shape is D x A
        intersecs = bbox_intersections(
            np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4
            np.ascontiguousarray(anchors, dtype=np.float) # A x 4
        )
        intersecs_ = intersecs.sum(axis=0) # A x 1
        labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1

    #这里我们暂时不考虑难样本的问题
    # preclude hard samples that are highly occlusioned, truncated or difficult to see
    if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[0] > 0:
        assert gt_ishard.shape[0] == gt_boxes.shape[0]
        gt_ishard = gt_ishard.astype(int)
        gt_hardboxes = gt_boxes[gt_ishard == 1, :]
        if gt_hardboxes.shape[0] > 0:
            # H x A
            hard_overlaps = bbox_overlaps(
                np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4
                np.ascontiguousarray(anchors, dtype=np.float)) # A x 4
            hard_max_overlaps = hard_overlaps.max(axis=0)  # (A)
            labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1
            max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1
            labels[max_intersec_label_inds] = -1 #

    # subsample positive labels if we have too many
    #对正样本进行采样,如果正样本的数量太多的话
    # 限制正样本的数量不超过128个
    #TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False)#随机去除掉一些正样本
        labels[disable_inds] = -1#变为-1

    # subsample negative labels if we have too many
    #对负样本进行采样,如果负样本的数量太多的话
    # 正负样本总数是256,限制正样本数目最多128,
    # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(
            bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        labels[disable_inds] = -1
        #print "was %s inds, disabling %s, now %s inds" % (
            #len(bg_inds), len(disable_inds), np.sum(labels == 0))

    # 至此, 上好标签,开始计算rpn-box的真值
    #--------------------------------------------------------------
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])#根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差)


    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)#内部权重,前景就给1,其他是0

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:#暂时使用uniform 权重,也就是正样本是1,负样本是0
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0) + 1
        # positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        # negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        positive_weights = np.ones((1, 4))
        negative_weights = np.zeros((1, 4))
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            (np.sum(labels == 1)) + 1)
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            (np.sum(labels == 0)) + 1)
    bbox_outside_weights[labels == 1, :] = positive_weights#外部权重,前景是1,背景是0
    bbox_outside_weights[labels == 0, :] = negative_weights

    if DEBUG:
        _sums += bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
        _counts += np.sum(labels == 1)
        means = _sums / _counts
        stds = np.sqrt(_squared_sums / _counts - means ** 2)
        print('means:')
        print(means)
        print('stdevs:')
        print(stds)

    # map up to original set of anchors
    # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)#这些anchor的label是-1,也即dontcare
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)#这些anchor的真值是0,也即没有值
    bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)#内部权重以0填充
    bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)#外部权重以0填充

    if DEBUG:
        print('rpn: max max_overlap', np.max(max_overlaps))
        print('rpn: num_positive', np.sum(labels == 1))
        print('rpn: num_negative', np.sum(labels == 0))
        _fg_sum += np.sum(labels == 1)
        _bg_sum += np.sum(labels == 0)
        _count += 1
        print('rpn: num_positive avg', _fg_sum / _count)
        print('rpn: num_negative avg', _bg_sum / _count)

    # labels
    labels = labels.reshape((1, height, width, A))#reshap一下label
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))#reshape

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))
    rpn_bbox_outside_weights = bbox_outside_weights

    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Exemplo n.º 20
0
def anchor_target_layer(rpn_cls_score,
                        gt_boxes_large,
                        gt_ishard,
                        dontcare_areas,
                        im_info,
                        _feat_stride=[
                            16,
                        ],
                        anchor_scales=[
                            16,
                        ]):
    """
    将gt_box划分为细框
    实现论文中的side-refinement
    arameters
    ----------
    rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer
    gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
    gt_ishard: (G, 1), 1 or 0 indicates difficult or not
    dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    :return:
    """
    gt_boxes = split_frame(gt_boxes_large)
    # gt_width = gt_boxes[:,2]-gt_boxes[:,0]
    _anchors = generate_anchors(
        scales=np.array(anchor_scales))  # 生成基本的anchor,一共9个
    _num_anchors = _anchors.shape[0]  # 9个anchor

    if DEBUG:
        print('anchors:')
        print(_anchors)
        print('anchor shapes:')
        print(
            np.hstack((
                _anchors[:, 2::4] - _anchors[:, 0::4],
                _anchors[:, 3::4] - _anchors[:, 1::4],
            )))
        _counts = cfg.EPS
        _sums = np.zeros((1, 4))
        _squared_sums = np.zeros((1, 4))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    im_info = im_info[0]  # 图像的高宽及通道数

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]  # feature-map的高宽

    if DEBUG:
        print('AnchorTargetLayer: height', height, 'width', width)
        print('')
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))
        print('height, width: ({}, {})'.format(height, width))
        print('rpn: gt_boxes.shape', gt_boxes.shape)

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride  # (W)
    shift_y = np.arange(0, height) * _feat_stride  # (H)
    shift_x, shift_y = np.meshgrid(
        shift_x, shift_y)  # in W H order   # shift_x (H, W)  shift_y (H, W)

    # K is H x W
    shifts = np.vstack(
        (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel()
         )).transpose()  # 生成feature-map和真实image上anchor之间的偏移量     #(H*W, 4)
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors  # 9个anchor
    K = shifts.shape[0]  # 50*37,feature-map的宽乘高的大小
    all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2)))  # 相当于复制宽高的维度,然后相加
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    # 仅保留那些还在图像内部的anchor,超出图像的都删掉
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    if DEBUG:
        print('total_anchors', total_anchors)
        print('inds_inside', len(inds_inside))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]  #保留那些在图像内的anchor   (In, 4)
    if DEBUG:
        print('anchors.shape', anchors.shape)

    #至此,anchor准备好了
    #--------------------------------------------------------------
    # label: 1 is positive, 0 is negative, -1 is dont care
    # (A)
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(0)  #初始化label,均为-1

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt), shape is A x G
    # 计算anchor和gt-box的overlap,用来给anchor上标签
    overlaps = bbox_overlaps(np.ascontiguousarray(
        anchors, dtype=np.float), np.ascontiguousarray(
            gt_boxes,
            dtype=np.float))  # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组
    # 存放每一个anchor和每一个gtbox之间的overlap
    argmax_overlaps = overlaps.argmax(
        axis=1)  # (A)#找到和每一个anchor,overlap最大的那个gt
    max_overlaps = overlaps[np.arange(
        len(inds_inside)
    ), argmax_overlaps]  # 假如在内部的anchor有900个 ,(900,), 表示的是每一个anchor最大的overlaps值
    gt_argmax_overlaps = overlaps.argmax(
        axis=0)  # G#找到所有anchor中与gtbox,overlap最大的那个anchor  # (3)
    if DEBUG:
        print('获取所有anchor中与gt相交最大的哪几个anchor的索引')
        print('gt_argmax_overlaps.shape', gt_argmax_overlaps.shape)
    gt_max_overlaps = overlaps[
        gt_argmax_overlaps, np.arange(
            overlaps.shape[1]
        )]  # 比如有3个gt 那么就得到(3,),表示的是上一步找到的与gt的overlap最大的3个anchor的overlap值
    gt_argmax_overlaps = np.where(
        overlaps == gt_max_overlaps)[0]  # (3, ) 表示的是哪几个与gt有最大overlap的anchor的索引
    if DEBUG:
        print('这一步是找到那些同样与gt有最大overlap的索引,上一步找到的4个,这一步找到其他重复的')
        print('gt_argmax_overlaps.shape', gt_argmax_overlaps.shape)

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps <
               cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0  #先给背景上标签,小于0.3overlap的

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1  #  每个位置上的9个anchor中overlap最大的认为是前景
    # fg label: above threshold IOU
    labels[max_overlaps >=
           cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1  #overlap大于0.7的认为是前景

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    if DEBUG:
        print('在过滤数量之前:')
        print('正样本:', len(np.where(labels == 1)[0]))
        print('负样本:', len(np.where(labels == 0)[0]))
        print('忽略样本:', len(np.where(labels == -1)[0]))

    # 不再限制正负样本的数量

    if DEBUG:
        print("考虑均衡住正负样本以后:")
        print('正样本:', len(np.where(labels == 1)[0]))
        print('负样本:', len(np.where(labels == 0)[0]))
        print('忽略样本:', len(np.where(labels == -1)[0]))
    # 至此, 上好标签,开始计算rpn-box的真值

    #
    v_target, o_target = _compute_targets(anchors, gt_boxes[
        argmax_overlaps, :])  #   根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差)

    # 但是计算损失函数的时候,其实是需要j索引和k索引,所以计算好这两个索引,一并返回,帮助计算损失函数

    # j索引,有效索引:正锚点或者与gt的overlap大于0.5以上的锚点的索引

    # 正锚点
    positive_index = np.where(labels == 1)[0]  # 应该是一个(p,)p应该不大于128

    #
    # ignore_index = np.where(labels==-1)[0]  # 应该是一个(n,)n应该很大,因为忽略的anchor很多
    keep_index = np.where(labels != -1)[0]
    _ = np.where(max_overlaps > 0.5)[0]  # 应该是一个(c,),表示overlap大于0.5的anchor的索引

    remove_ignore = list()
    for i in range(_.shape[0]):
        if i in keep_index:
            remove_ignore.append(_[i])
    remove_ignore = np.array(remove_ignore)
    effect_index = np.append(positive_index, remove_ignore)

    remove_repeat = np.array(list(set(list(effect_index))))

    j_index = remove_repeat.astype(np.int32)

    j_index1 = np.zeros((len(inds_inside)), dtype=np.int32)
    j_index1[j_index] = 1

    # k 索引 , 边缘索引

    # 先找到所有的可以认为是边缘的gt框,这里简单的认为是边缘框和左右各自一个。
    #ori_gt_box = (gt_boxes/im_info[2]).astype(np.int32, copy=False)
    ori_gt_box = gt_boxes.astype(np.float32, copy=False)
    # 找到左右边界框,矩阵操作实现  todo
    list_left_index = list()
    list_right_index = list()
    for i in range(ori_gt_box.shape[0]):
        if ori_gt_box[i][2] - ori_gt_box[i][0] != 15:
            list_left_index.append(i)
        else:
            continue
    list_index1 = list_left_index + list_right_index
    # 去除不属于gt中的索引和重复的索引
    list_index2 = list(set(list_index1))
    list_index3 = sorted(list_index2)
    list_index4 = list()
    for index in list_index3:
        if index in range(ori_gt_box.shape[0]):
            list_index4.append(index)

    # if DEBUG:
    #     print("list_left_index", list_left_index)
    #     print("list_right_index", list_right_index)
    #     print("list_index1", list_index1)
    #     print("list_index2", list_index2)
    #     print("list_index3", list_index3)
    #     print("list_index4", list_index4)

    gt_side_index = np.array(list_index4).astype(np.int32)  # 得到了边界gt框的索引

    # 要得到与这些gt框有最大的overlap的anchors的索引,这些anchor是我们关心的
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    anchor_side_index = gt_argmax_overlaps[
        gt_side_index]  # 得到143个与gt具有最大的overlaps的anchor的索引
    # 还要去掉与边界框overlap为0的anchor,因为这些anhcor不是真的我们关心的anchor,如果不去除,还会造成o_loss异常大
    anchor_side_list = list()
    for i in range(anchor_side_index.shape[0]):
        anchor_index = anchor_side_index[i]
        gt_index = gt_side_index[i]
        overlap = overlaps[anchor_index, gt_index]
        if overlap > 0:
            anchor_side_list.append(anchor_index)
    anchor_side_index = np.array(anchor_side_list, dtype=np.int32)

    anchor_side_index1 = np.array(sorted(list(set(
        list(anchor_side_index))))).astype(np.int32)
    k_index = anchor_side_index1  # (s,) s个边界索引,但是并不是包括之前去除的超过边界框的索引值,所以需要之后的操作

    k_index1 = np.zeros((len(inds_inside)), dtype=np.int32)
    k_index1[k_index] = 1

    if DEBUG:
        print('jIndex1:', j_index1.shape)
        print('k_index1:', k_index1.shape)

    # map up to original set of anchors
    # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来
    labels = _unmap(labels, total_anchors, inds_inside,
                    fill=-1)  # 这些anchor的label是-1,也即dontcare
    v_target = _unmap(v_target, total_anchors, inds_inside,
                      fill=0)  # 这些anchor的真值是0,也即没有值
    o_target = _unmap(o_target, total_anchors, inds_inside, fill=0)
    j_index2 = _unmap(j_index1, total_anchors, inds_inside,
                      fill=0).astype(np.int32)
    k_index2 = _unmap(k_index1, total_anchors, inds_inside,
                      fill=0).astype(np.int32)

    # real_j_index = np.where(j_index2==1)[0]
    # real_k_index = np.where(k_index2==1)[0]

    if DEBUG:
        print('loss_1 index:', np.where(labels != -1)[0].shape[0])
        print('j_index:', j_index.shape)
        print('k_index:', k_index.shape)
        print('j_index2:', j_index2.shape)
        print('k_index2:', k_index2.shape)

        print('label shape', labels.shape)

        print('v_target shape', v_target.shape)
        print('o_target shape', o_target.shape)
    return labels, v_target, o_target, j_index2, k_index2
Exemplo n.º 21
0
def anchor_target_layer(rpn_cls_score,
                        rpn_cls_prob,
                        im_name,
                        gt_boxes_large,
                        gt_ishard,
                        dontcare_areas,
                        im_info,
                        _feat_stride=[
                            16,
                        ],
                        anchor_scales=[
                            16,
                        ]):
    """
    将gt_box划分为细框
    实现论文中的side-refinement
    arameters
    ----------
    rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer
    gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
    gt_ishard: (G, 1), 1 or 0 indicates difficult or not
    dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    :return:
    """
    global img_name
    if img_name != im_name:  # 第一次训练这个图片
        flag_first = True
    else:
        flag_first = False
    img_name = im_name

    if flag_first:  # 如果是第一次见到这个图片,就要重新生成所有tmp对象
        gt_boxes = split_frame(gt_boxes_large)
        # gt_width = gt_boxes[:,2]-gt_boxes[:,0]
        _anchors = generate_anchors(
            scales=np.array(anchor_scales))  # 生成基本的anchor,一共9个
        _num_anchors = _anchors.shape[0]  # 9个anchor

        if DEBUG:
            print('anchors:')
            print(_anchors)
            print('anchor shapes:')
            print(
                np.hstack((
                    _anchors[:, 2::4] - _anchors[:, 0::4],
                    _anchors[:, 3::4] - _anchors[:, 1::4],
                )))
            _counts = cfg.EPS
            _sums = np.zeros((1, 4))
            _squared_sums = np.zeros((1, 4))
            _fg_sum = 0
            _bg_sum = 0
            _count = 0

        # allow boxes to sit over the edge by a small amount
        _allowed_border = 0

        im_info = im_info[0]  # 图像的高宽及通道数

        assert rpn_cls_score.shape[0] == 1, \
            'Only single item batches are supported'

        # map of shape (..., H, W)
        height, width = rpn_cls_score.shape[1:3]  # feature-map的高宽

        if DEBUG:
            print('AnchorTargetLayer: height', height, 'width', width)
            print('')
            print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
            print('scale: {}'.format(im_info[2]))
            print('height, width: ({}, {})'.format(height, width))
            print('rpn: gt_boxes.shape', gt_boxes.shape)

        # 1. Generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, width) * _feat_stride  # (W)
        shift_y = np.arange(0, height) * _feat_stride  # (H)
        shift_x, shift_y = np.meshgrid(
            shift_x,
            shift_y)  # in W H order   # shift_x (H, W)  shift_y (H, W)

        # K is H x W
        shifts = np.vstack(
            (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
             shift_y.ravel()
             )).transpose()  # 生成feature-map和真实image上anchor之间的偏移量     #(H*W, 4)
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = _num_anchors  # 9个anchor
        K = shifts.shape[0]  # 50*37,feature-map的宽乘高的大小
        all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2)))  # 相当于复制宽高的维度,然后相加
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        # 仅保留那些还在图像内部的anchor,超出图像的都删掉
        inds_inside = np.where(
            (all_anchors[:, 0] >= -_allowed_border)
            & (all_anchors[:, 1] >= -_allowed_border)
            & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
            (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
        )[0]

        if DEBUG:
            print('total_anchors', total_anchors)
            print('inds_inside', len(inds_inside))

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]  # 保留那些在图像内的anchor   (In, 4)
        if DEBUG:
            print('anchors.shape', anchors.shape)

        # 至此,anchor准备好了
        # --------------------------------------------------------------
        # label: 1 is positive, 0 is negative, -1 is dont care
        # (A)
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)  # 初始化label,均为-1

        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt), shape is A x G
        # 计算anchor和gt-box的overlap,用来给anchor上标签
        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(
                gt_boxes,
                dtype=np.float))  # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组
        # 存放每一个anchor和每一个gtbox之间的overlap
        argmax_overlaps = overlaps.argmax(
            axis=1)  # (A)#找到和每一个anchor,overlap最大的那个gt
        max_overlaps = overlaps[np.arange(
            len(inds_inside)
        ), argmax_overlaps]  # 假如在内部的anchor有900个 ,(900,), 表示的是每一个anchor最大的overlaps值
        gt_argmax_overlaps = overlaps.argmax(
            axis=0)  # G#找到所有anchor中与gtbox,overlap最大的那个anchor  # (3)

        gt_max_overlaps = overlaps[
            gt_argmax_overlaps,
            np.arange(
                overlaps.shape[1]
            )]  # 比如有3个gt 那么就得到(3,),表示的是上一步找到的与gt的overlap最大的3个anchor的overlap值
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[
            0]  # (3, ) 表示的是哪几个与gt有最大overlap的anchor的索引

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps <
                   cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0  # 先给背景上标签,小于0.3overlap的

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1  # 每个位置上的9个anchor中overlap最大的认为是前景
        # fg label: above threshold IOU
        labels[max_overlaps >=
               cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1  # overlap大于0.7的认为是前景

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        before_filter_labels = labels.copy()  # 过滤之前的标签,方便用来计算hard negtive
        all_bg_index = before_filter_labels == 0
        if DEBUG:
            print('在过滤数量之前:')
            print('正样本:' + str(len(np.where(labels == 1)[0])))
            print('负样本:' + str(len(np.where(labels == 0)[0])))
            print('忽略样本:' + str(len(np.where(labels == -1)[0])))

        # preclude dontcare areas
        if dontcare_areas is not None and dontcare_areas.shape[
                0] > 0:  # 这里我们暂时不考虑有doncare_area的存在
            # intersec shape is D x A
            intersecs = bbox_intersections(
                np.ascontiguousarray(dontcare_areas, dtype=np.float),  # D x 4
                np.ascontiguousarray(anchors, dtype=np.float)  # A x 4
            )
            intersecs_ = intersecs.sum(axis=0)  # A x 1
            labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1

        # 这里我们暂时不考虑难样本的问题
        # preclude hard samples that are highly occlusioned, truncated or difficult to see
        if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[
                0] > 0 and 0:
            assert gt_ishard.shape[0] == gt_boxes.shape[0]
            gt_ishard = gt_ishard.astype(int)
            gt_hardboxes = gt_boxes[gt_ishard == 1, :]
            if gt_hardboxes.shape[0] > 0:
                # H x A
                hard_overlaps = bbox_overlaps(
                    np.ascontiguousarray(gt_hardboxes,
                                         dtype=np.float),  # H x 4
                    np.ascontiguousarray(anchors, dtype=np.float))  # A x 4
                hard_max_overlaps = hard_overlaps.max(axis=0)  # (A)
                labels[
                    hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1
                max_intersec_label_inds = hard_overlaps.argmax(axis=1)  # H x 1
                labels[max_intersec_label_inds] = -1  #

        # subsample positive labels if we have too many
        # 对正样本进行采样,如果正样本的数量太多的话
        # 限制正样本的数量不超过128个
        # TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。
        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = npr.choice(fg_inds,
                                      size=(len(fg_inds) - num_fg),
                                      replace=False)  # 随机去除掉一些正样本
            labels[disable_inds] = -1  # 变为-1

        # subsample negative labels if we have too many
        # 对负样本进行采样,如果负样本的数量太多的话
        # 正负样本总数是512,限制正样本数目最多128,
        # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本
        num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds,
                                      size=(len(bg_inds) - num_bg),
                                      replace=False)
            labels[disable_inds] = -1
            # print "was %s inds, disabling %s, now %s inds" % (
            # len(bg_inds), len(disable_inds), np.sum(labels == 0))

        if DEBUG:
            print("考虑均衡住正负样本以后:")
            print('正样本:' + str(len(np.where(labels == 1)[0])))
            print('负样本:' + str(len(np.where(labels == 0)[0])))
            print('忽略样本:' + str(len(np.where(labels == -1)[0])))
        # 至此,第一次生成好了这个图片的labels,随机抽了512个
        # 生成其他部分的标签
        v_target, o_target = _compute_targets(anchors, gt_boxes[
            argmax_overlaps, :])  # 根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差)

        # 但是计算损失函数的时候,其实是需要j索引和k索引,所以计算好这两个索引,一并返回,帮助计算损失函数
        # j索引,有效索引:正锚点或者与gt的overlap大于0.5以上的锚点的索引
        # 正锚点
        positive_index = np.where(labels == 1)[0]  # 应该是一个(p,)p应该不大于128

        #
        # ignore_index = np.where(labels==-1)[0]  # 应该是一个(n,)n应该很大,因为忽略的anchor很多
        keep_index = np.where(labels != -1)[0]
        _ = np.where(
            max_overlaps > 0.5)[0]  # 应该是一个(c,),表示overlap大于0.5的anchor的索引

        remove_ignore = list()
        for i in range(_.shape[0]):
            if i in keep_index:
                remove_ignore.append(_[i])
        remove_ignore = np.array(remove_ignore)
        effect_index = np.append(positive_index, remove_ignore)

        remove_repeat = np.array(list(set(list(effect_index))))

        j_index = remove_repeat.astype(np.int32)

        j_index1 = np.zeros((len(inds_inside)), dtype=np.int32)
        j_index1[j_index] = 1

        # k 索引 , 边缘索引

        # 先找到所有的可以认为是边缘的gt框,这里简单的认为是边缘框和左右各自一个。
        # ori_gt_box = (gt_boxes/im_info[2]).astype(np.int32, copy=False)
        ori_gt_box = gt_boxes.astype(np.float32, copy=False)
        # 找到左右边界框,矩阵操作实现  todo
        list_left_index = list()
        list_right_index = list()
        for i in range(ori_gt_box.shape[0]):
            if ori_gt_box[i][2] - ori_gt_box[i][0] != 15:
                list_left_index.append(i)
            else:
                continue
        list_index1 = list_left_index + list_right_index
        # 去除不属于gt中的索引和重复的索引
        list_index2 = list(set(list_index1))
        list_index3 = sorted(list_index2)
        list_index4 = list()
        for index in list_index3:
            if index in range(ori_gt_box.shape[0]):
                list_index4.append(index)

        gt_side_index = np.array(list_index4).astype(np.int32)  # 得到了边界gt框的索引

        # 要得到与这些gt框有最大的overlap的anchors的索引,这些anchor是我们关心的
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        anchor_side_index = gt_argmax_overlaps[
            gt_side_index]  # 得到143个与gt具有最大的overlaps的anchor的索引
        # 还要去掉与边界框overlap为0的anchor,因为这些anhcor不是真的我们关心的anchor,如果不去除,还会造成o_loss异常大
        anchor_side_list = list()
        for i in range(anchor_side_index.shape[0]):
            anchor_index = anchor_side_index[i]
            gt_index = gt_side_index[i]
            overlap = overlaps[anchor_index, gt_index]
            if overlap > 0:
                anchor_side_list.append(anchor_index)
        anchor_side_index = np.array(anchor_side_list, dtype=np.int32)

        anchor_side_index1 = np.array(
            sorted(list(set(list(anchor_side_index))))).astype(np.int32)
        k_index = anchor_side_index1  # (s,) s个边界索引,但是并不是包括之前去除的超过边界框的索引值,所以需要之后的操作

        k_index1 = np.zeros((len(inds_inside)), dtype=np.int32)
        k_index1[k_index] = 1

        in_labels = labels.copy()
        # map up to original set of anchors
        # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来
        labels = _unmap(labels, total_anchors, inds_inside,
                        fill=-1)  # 这些anchor的label是-1,也即dontcare
        v_target = _unmap(v_target, total_anchors, inds_inside,
                          fill=0)  # 这些anchor的真值是0,也即没有值
        o_target = _unmap(o_target, total_anchors, inds_inside, fill=0)
        j_index2 = _unmap(j_index1, total_anchors, inds_inside,
                          fill=0).astype(np.int32)
        k_index2 = _unmap(k_index1, total_anchors, inds_inside,
                          fill=0).astype(np.int32)

        # real_j_index = np.where(j_index2==1)[0]
        # real_k_index = np.where(k_index2==1)[0]

        global tmp_labels, tmp_all_bg_index, tmp_v_target, tmp_o_target, tmp_j_index2, tmp_k_index2, tmp_inds_inside
        tmp_labels = in_labels
        tmp_all_bg_index = all_bg_index
        tmp_v_target = v_target
        tmp_o_target = o_target
        tmp_j_index2 = j_index2
        tmp_k_index2 = k_index2
        tmp_inds_inside = inds_inside

        if DEBUG or SHOW_SOME:
            print('第一次这张图')
            print('正样本:' + str(len(np.where(labels == 1)[0])))
            print('负样本:' + str(len(np.where(labels == 0)[0])))
            print('忽略样本:' + str(len(np.where(labels == -1)[0])))
            # print('保存的tmp_labels')
            # print('正样本:' + str(len(np.where(tmp_labels == 1)[0])))
            # print('负样本:' + str(len(np.where(tmp_labels == 0)[0])))
            # print('忽略样本:' + str(len(np.where(tmp_labels == -1)[0])))
        return labels, v_target, o_target, j_index2, k_index2

    else:  # 第二次见过这个图,只用生成hard neg添加进去
        if DEBUG and SHOW_SOME:
            print('不是第一次')
        # 先找出负样本
        bg_index = tmp_all_bg_index
        inds_inside = tmp_inds_inside
        # 找出得分高于某个阈值的
        rpn_cls_prob = np.reshape(rpn_cls_prob, [-1, 2])
        rpn_cls_prob = rpn_cls_prob[inds_inside, :]
        fg_score = rpn_cls_prob[:, 1]
        high_score = fg_score > 0.5

        # 找出即是负样本,又是分数很高的样本
        assert bg_index.shape == high_score.shape
        # 得到了hard negtive的索引,这个是
        hard_neg = bg_index * high_score

        if DEBUG:
            print('负样本的数量:' + str(len(np.where(bg_index == True)[0])))
            print('得分高于0.5的数量:' + str(len(np.where(high_score == True)[0])))
            print('hard negtive 数量' + str(len(np.where(hard_neg == True)[0])))

        # 如果是第二次训练这张图片,训练样本是第一次随机生成的正负样本加这一次的hard negtive
        labels = tmp_labels.copy()
        first_gen_index = labels != -1
        hard_neg_index = hard_neg
        assert first_gen_index.shape == hard_neg_index.shape, 'line 282'
        diff = hard_neg_index * 1 - first_gen_index * 1
        new_hard_index = diff == 1
        assert labels.shape == new_hard_index.shape

        if DEBUG:
            print('加载进来的第一次的labels的负样本的数量:' +
                  str(len(np.where(labels == 0)[0])))
            print('属于难负样本不属于第一次样本的数量:' +
                  str(len(np.where(new_hard_index == True)[0])))
            print('加难样本之前的负样本数量:' + str(len(np.where(labels == 0)[0])))

        labels[new_hard_index] = 0

        if DEBUG or SHOW_SOME:
            print('加难样本之后的负样本数量:' + str(len(np.where(labels == 0)[0])))
            print('正样本:' + str(len(np.where(labels == 1)[0])))
            print('负样本:' + str(len(np.where(labels == 0)[0])))
            print('忽略样本:' + str(len(np.where(labels == -1)[0])))
            # print('第一次保存的tmp_labels的负样本数量应该不变的'+ str(len(np.where(tmp_labels == 0)[0])))

        # 至此,准备好了labels
        # 其他标签不变,加载上次保存的就好
        v_target = tmp_v_target
        o_target = tmp_o_target
        j_index2 = tmp_j_index2
        k_index2 = tmp_k_index2

        return labels, v_target, o_target, j_index2, k_index2
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors):
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # pytorch (bs, c, h, w)
    height, width = rpn_cls_score.shape[2:4]

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1

    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        # positive_weights = np.ones((1, 4))
        # negative_weights = np.zeros((1, 4))
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
      .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score_list, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors):
    """Same as the anchor target layer in original Fast/er RCNN """
    A_s = num_anchors
    total_anchors = all_anchors.shape[0]
    # K = total_anchors / num_anchors

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # pytorch (bs, c, h, w)
    heights = [rpn_cls_score.shape[2] for rpn_cls_score in rpn_cls_score_list]
    widths = [rpn_cls_score.shape[3] for rpn_cls_score in rpn_cls_score_list]

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    if DEBUG:
        print('inds_inside', len(inds_inside))
        print('total anchors', total_anchors)

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]  #laji

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # --------------------ignore handling----------------------------
    #tmp = [gt for gt in gt_boxes if gt[4] == 1]

    tttinds = np.where(gt_boxes[:, 4] == 1)[0]
    if (len(tttinds > 0)):
        tmp = gt_boxes[tttinds, :]
        # calculate overlaps between anchors and ignore regions
        overlaps2 = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(tmp, dtype=np.float))
        # find max value
        argmax_overlaps2 = overlaps2.argmax(axis=1)
        max_overlaps2 = overlaps2[np.arange(len(inds_inside)),
                                  argmax_overlaps2]
        # ignore high overlaps by setting them to -1 (ignore)
        labels[max_overlaps2 >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1
    # --------------------ignore handling----------------------------
    # import pdb; pdb.set_trace()

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1

    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        # positive_weights = np.ones((1, 4))
        # negative_weights = np.zeros((1, 4))
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    begin_cnt = 0
    end_cnt = 0
    begin_cnt_bbox = 0
    end_cnt_bbox = 0
    labels_list = list()
    bbox_targets_list = list()
    bbox_inside_weights_list = list()
    bbox_outside_weights_list = list()
    for height, width, A in zip(heights, widths, A_s):
        begin_cnt = end_cnt
        end_cnt += 1 * height * width * A
        labels_part = labels[begin_cnt:end_cnt]

        # labels
        labels_part = labels_part.reshape(
            (1, height, width, A)).transpose(0, 3, 1, 2)
        labels_part = labels_part.reshape((1, 1, A * height, width)).reshape(
            (-1, ))
        labels_list.append(labels_part)

        # begin_cnt_bbox = end_cnt_bbox
        # end_cnt_bbox += 1*height*width*A*4
        # bbox_targets_part = bbox_targets[begin_cnt_bbox:end_cnt_bbox]
        # bbox_inside_weights_part = bbox_inside_weights[begin_cnt_bbox:end_cnt_bbox]
        # bbox_outside_weights_part = bbox_outside_weights[begin_cnt_bbox:end_cnt_bbox]
        #
        # # bbox_targets
        # bbox_targets_part = bbox_targets_part.reshape((1, height, width, A * 4))
        # bbox_targets_list.append(bbox_targets_part)
        #
        # # bbox_inside_weights
        # bbox_inside_weights_part = bbox_inside_weights_part.reshape((1, height, width, A * 4))
        # bbox_inside_weights_list.append(bbox_inside_weights_part)
        #
        # # bbox_outside_weights
        # bbox_outside_weights_part = bbox_outside_weights_part.reshape((1, height, width, A * 4))
        # bbox_outside_weights_list.append(bbox_outside_weights_part)

    assert total_anchors == end_cnt
    labels = np.concatenate(labels_list, axis=0)
    # labels
    # labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    # labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    # bbox_targets = bbox_targets \
    #   .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    # bbox_inside_weights = bbox_inside_weights \
    #   .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    # bbox_outside_weights = bbox_outside_weights \
    #   .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Exemplo n.º 24
0
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image,
                 rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(all_rois[:, 1:5].data, gt_boxes[:, :4].data)
    max_overlaps, gt_assignment = overlaps.max(1)
    labels = gt_boxes[gt_assignment, [4]]

    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = (
        (max_overlaps < cfg.TRAIN.BG_THRESH_HI) +
        (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1)

    #-----------------------ignore handling--------------------
    # Select foreground RoIs as those with >= FG_THRESH overlap
    # import pdb; pdb.set_trace()
    # gt_boxes2 = [gt for gt in gt_boxes if gt[4].data[0]==15]
    # if len(gt_boxes2) == 0:
    #   import pdb; pdb.set_trace()

    # then we choose positive regions
    # we only keep pedestrain regions

    # import pdb;pdb.set_trace()
    #
    # gt_boxes  = [gt for gt in gt_boxes if gt[4].data[0]==15]
    #
    # gt_boxes = torch.stack(gt_boxes)

    tttinds = (gt_boxes[:, 4] == 15).nonzero().view(-1)
    gt_boxes = gt_boxes[tttinds]

    # import pdb; pdb.set_trace()
    overlaps = bbox_overlaps(all_rois[:, 1:5].data, gt_boxes[:, :4].data)
    max_overlaps, gt_assignment = overlaps.max(1)
    labels = gt_boxes[gt_assignment, [4]]

    fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1)
    #-----------------------ignore handling--------------------
    # print('fg_rois_per_image:',fg_rois_per_image,'fg_inds.numel():',fg_inds.numel())

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.numel() > 0 and bg_inds.numel() > 0:
        '''
    to_replace = fg_inds.numel() < fg_rois_per_image
    fg_inds = fg_inds[torch.from_numpy(
      npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=to_replace)).long().cuda()]
    '''

        fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel())
        fg_inds = fg_inds[torch.from_numpy(
            npr.choice(np.arange(0, fg_inds.numel()),
                       size=int(fg_rois_per_image),
                       replace=False)).long().cuda()]

        bg_rois_per_image = rois_per_image - fg_rois_per_image
        #print(bg_rois_per_image,fg_rois_per_image,'bg_rois_per_image,fg_rois_per_image proposal_target_layer.py(167)')
        to_replace = bg_inds.numel() < bg_rois_per_image
        if to_replace:
            bg_inds = bg_inds[torch.from_numpy(
                npr.choice(np.arange(0, bg_inds.numel()),
                           size=int(bg_rois_per_image),
                           replace=to_replace)).long().cuda()]
        else:
            bg_inds = bg_inds[:int(bg_rois_per_image)]
    elif fg_inds.numel() > 0:
        to_replace = fg_inds.numel() < rois_per_image
        fg_inds = fg_inds[torch.from_numpy(
            npr.choice(np.arange(0, fg_inds.numel()),
                       size=int(rois_per_image),
                       replace=to_replace)).long().cuda()]
        fg_rois_per_image = rois_per_image
    elif bg_inds.numel() > 0:
        to_replace = bg_inds.numel() < rois_per_image
        bg_inds = bg_inds[torch.from_numpy(
            npr.choice(np.arange(0, bg_inds.numel()),
                       size=int(rois_per_image),
                       replace=to_replace)).long().cuda()]
        fg_rois_per_image = 0
    else:
        import pdb
        pdb.set_trace()

    # The indices that we're selecting (both fg and bg)
    keep_inds = torch.cat([fg_inds, bg_inds], 0)
    # Select sampled values from various arrays:
    labels = labels[keep_inds].contiguous()
    # Clamp labels for the background RoIs to 0
    # import pdb;pdb.set_trace()
    # print(fg_rois_per_image,len(labels))
    labels[int(fg_rois_per_image):] = 0
    rois = all_rois[keep_inds].contiguous()
    roi_scores = all_scores[keep_inds].contiguous()

    bbox_target_data = _compute_targets(
        rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data,
        labels.data)

    bbox_targets, bbox_inside_weights = \
      _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
Exemplo n.º 25
0
def rpn_target(anchors, inside_inds, gt_labels, gt_boxes):
    def box_transform(et_boxes, gt_boxes):
        et_ws = et_boxes[:, 2] - et_boxes[:, 0] + 1.0
        et_hs = et_boxes[:, 3] - et_boxes[:, 1] + 1.0
        et_cxs = et_boxes[:, 0] + 0.5 * et_ws
        et_cys = et_boxes[:, 1] + 0.5 * et_hs

        gt_ws = gt_boxes[:, 2] - gt_boxes[:, 0] + 1.0
        gt_hs = gt_boxes[:, 3] - gt_boxes[:, 1] + 1.0
        gt_cxs = gt_boxes[:, 0] + 0.5 * gt_ws
        gt_cys = gt_boxes[:, 1] + 0.5 * gt_hs

        dxs = (gt_cxs - et_cxs) / et_ws
        dys = (gt_cys - et_cys) / et_hs
        dws = np.log(gt_ws / et_ws)
        dhs = np.log(gt_hs / et_hs)

        deltas = np.vstack((dxs, dys, dws, dhs)).transpose()
        return deltas

    CFG = EasyDict()
    CFG.TRAIN = EasyDict()
    CFG.TRAIN.RPN_BATCHSIZE = 100
    CFG.TRAIN.RPN_FG_FRACTION = 0.5
    CFG.TRAIN.RPN_FG_THRESH_LO = 0.7
    CFG.TRAIN.RPN_BG_THRESH_HI = 0.3

    inside_anchors = anchors[inside_inds, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inside_inds), ), dtype=np.int32)
    labels.fill(-1)

    # overlaps between the anchors and the gt process
    overlaps = bbox_overlaps(
        np.ascontiguousarray(inside_anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))

    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inside_inds)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    labels[max_overlaps < CFG.TRAIN.RPN_BG_THRESH_HI] = 0  # bg label
    labels[
        gt_argmax_overlaps] = 1  # fg label: for each gt, anchor with highest overlap
    labels[max_overlaps >=
           CFG.TRAIN.RPN_FG_THRESH_LO] = 1  # fg label: above threshold IOU

    # subsample positive labels
    num_fg = int(CFG.TRAIN.RPN_FG_FRACTION * CFG.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = np.random.choice(fg_inds,
                                        size=(len(fg_inds) - num_fg),
                                        replace=False)
        labels[disable_inds] = -1

    # subsample negative labels
    num_bg = CFG.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = np.random.choice(bg_inds,
                                        size=(len(bg_inds) - num_bg),
                                        replace=False)
        labels[disable_inds] = -1

    idx_label = np.where(labels != -1)[0]
    idx_target = np.where(labels == 1)[0]

    pos_neg_inds = inside_inds[idx_label]
    labels = labels[idx_label]

    pos_inds = inside_inds[idx_target]
    pos_anchors = inside_anchors[idx_target]
    pos_gt_boxes = gt_boxes[argmax_overlaps][idx_target]
    targets = box_transform(pos_anchors, pos_gt_boxes)

    return pos_neg_inds, pos_inds, labels, targets