Пример #1
0
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes,sample_type='fpn', k0 = 4):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_this_image:] = 0
    rois = all_rois[keep_inds]

    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    if sample_type == 'fpn':
        #print 0
        w = (rois[:,3]-rois[:,1])
        h = (rois[:,4]-rois[:,2])
        s = w * h
        s[s<=0]=1e-6
        layer_index = np.floor(k0+np.log2(np.sqrt(s)/224))

        layer_index[layer_index<2]=2
        layer_index[layer_index>5]=5
        #print 1
        return rois, labels, bbox_targets, bbox_inside_weights, layer_index #rois:[512,5]   labels:[512,]
    else:
        return rois, labels, bbox_targets, bbox_inside_weights
Пример #2
0
 def forward(self, regressions, anchors, annotations, iou_thresh=0.5):
     losses = []
     batch_size = regressions.shape[0]
     for j in range(batch_size):
         regression = regressions[j, :, :]
         bbox_annotation = annotations[j, :, :]
         bbox_annotation = bbox_annotation[bbox_annotation[:, -1] != -1]
         if bbox_annotation.shape[0] == 0:
             losses.append(torch.tensor(0).float().cuda())
             continue
         indicator = bbox_overlaps(
             min_area_square(anchors[j, :, :]),
             min_area_square(bbox_annotation[:, :-1])
         )
         overlaps = rbox_overlaps(
             anchors[j, :, :].cpu().numpy(),
             bbox_annotation[:, :-1].cpu().numpy(),
             indicator.cpu().numpy(),
             thresh=1e-1
         )
         if not torch.is_tensor(overlaps):
             overlaps = torch.from_numpy(overlaps).cuda()
         iou_max, iou_argmax = torch.max(overlaps, dim=1)
         positive_indices = torch.ge(iou_max, iou_thresh)
         assigned_annotations = bbox_annotation[iou_argmax, :]
         if positive_indices.sum() > 0:
             all_rois = anchors[j, positive_indices, :]
             gt_boxes = assigned_annotations[positive_indices, :]
             targets = self.box_coder.encode(all_rois, gt_boxes)
             loss = self.criteron(regression[positive_indices, :], targets)
             losses.append(loss)
         else:
             losses.append(torch.tensor(0).float().cuda())
     return torch.stack(losses).mean(dim=0, keepdim=True)
Пример #3
0
def _compute_targets(rois, overlaps, labels):
    """
    Compute bounding-box regression targets for an image.
    for each roi find the corresponding gt_box, then compute the distance.
    """
    # Indices of ground-truth ROIs
    gt_inds = np.where(overlaps == 1)[0]
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return np.zeros((rois.shape[0], 5), dtype=np.float32)
    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = bbox_overlaps(
        np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
        np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]

    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    targets[ex_inds, 0] = labels[ex_inds]
    targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
    return targets
Пример #4
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, \
          'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in range(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes),
                                dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes':
                boxes,
                'gt_classes':
                np.zeros((num_boxes, ), dtype=np.int32),
                'gt_overlaps':
                overlaps,
                'flipped':
                False,
                'seg_areas':
                np.zeros((num_boxes, ), dtype=np.float32),
            })
        return roidb
Пример #5
0
  def create_roidb_from_box_list(self, box_list, gt_roidb):
    assert len(box_list) == self.num_images, \
      'Number of boxes must match number of ground-truth images'
    roidb = []
    for i in range(self.num_images):
      boxes = box_list[i]
      num_boxes = boxes.shape[0]
      overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

      if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
        gt_boxes = gt_roidb[i]['boxes']
        gt_classes = gt_roidb[i]['gt_classes']
        gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                    gt_boxes.astype(np.float))
        argmaxes = gt_overlaps.argmax(axis=1)
        maxes = gt_overlaps.max(axis=1)
        I = np.where(maxes > 0)[0]
        overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

      overlaps = scipy.sparse.csr_matrix(overlaps)
      roidb.append({
        'boxes': boxes,
        'gt_classes': np.zeros((num_boxes,), dtype=np.int32),
        'gt_overlaps': overlaps,
        'flipped': False,
        'seg_areas': np.zeros((num_boxes,), dtype=np.float32),
      })
    return roidb
Пример #6
0
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
  """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
  # overlaps: (rois x gt_boxes)
  overlaps = bbox_overlaps(
    np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
    np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
  gt_assignment = overlaps.argmax(axis=1)
  max_overlaps = overlaps.max(axis=1)
  labels = gt_boxes[gt_assignment, 4]

  # Select foreground RoIs as those with >= FG_THRESH overlap
  fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
  # Guard against the case when an image has fewer than fg_rois_per_image
  # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
  bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                     (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]

  # Small modification to the original version where we ensure a fixed number of regions are sampled
  if fg_inds.size > 0 and bg_inds.size > 0:
    fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
    fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
    bg_rois_per_image = rois_per_image - fg_rois_per_image
    to_replace = bg_inds.size < bg_rois_per_image
    bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
  elif fg_inds.size > 0:
    to_replace = fg_inds.size < rois_per_image
    fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
    fg_rois_per_image = rois_per_image
  elif bg_inds.size > 0:
    to_replace = bg_inds.size < rois_per_image
    bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
    fg_rois_per_image = 0
  else:
    import pdb
    pdb.set_trace()

  # The indices that we're selecting (both fg and bg)
  keep_inds = np.append(fg_inds, bg_inds)
  # Select sampled values from various arrays:
  labels = labels[keep_inds]
  # Clamp labels for the background RoIs to 0
  labels[int(fg_rois_per_image):] = 0
  rois = all_rois[keep_inds]
  roi_scores = all_scores[keep_inds]

  bbox_target_data = _compute_targets(
    rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

  bbox_targets, bbox_inside_weights = \
    _get_bbox_regression_labels(bbox_target_data, num_classes)

  return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
  """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
  npr.seed(cfg.RNG_SEED)
  # overlaps: (rois x gt_boxes)
  overlaps = bbox_overlaps(
    all_rois[:, 1:5].data,
    gt_boxes[:, :4].data)
  max_overlaps, gt_assignment = overlaps.max(1)
  labels = gt_boxes[gt_assignment, [4]]

  # Select foreground RoIs as those with >= FG_THRESH overlap
  fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1)
  # Guard against the case when an image has fewer than fg_rois_per_image
  # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
  bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1)

  # Small modification to the original version where we ensure a fixed number of regions are sampled
  if fg_inds.numel() > 0 and bg_inds.numel() > 0:
    fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel())
    fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()]
    bg_rois_per_image = rois_per_image - fg_rois_per_image
    to_replace = bg_inds.numel() < bg_rois_per_image
    bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()]
  elif fg_inds.numel() > 0:
    to_replace = fg_inds.numel() < rois_per_image
    fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
    fg_rois_per_image = rois_per_image
  elif bg_inds.numel() > 0:
    to_replace = bg_inds.numel() < rois_per_image
    bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
    fg_rois_per_image = 0
  else:
    import pdb
    pdb.set_trace()

  # The indices that we're selecting (both fg and bg)
  keep_inds = torch.cat([fg_inds, bg_inds], 0)
  # Select sampled values from various arrays:
  labels = labels[keep_inds].contiguous()
  # Clamp labels for the background RoIs to 0
  labels[int(fg_rois_per_image):] = 0
  rois = all_rois[keep_inds].contiguous()
  roi_scores = all_scores[keep_inds].contiguous()

  bbox_target_data = _compute_targets(
    rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data)
  
  bbox_targets, bbox_inside_weights = \
    _get_bbox_regression_labels(bbox_target_data, num_classes)

  return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
  """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
  # overlaps: (rois x gt_boxes)
  overlaps = bbox_overlaps(
    all_rois[:, 1:5].data,
    gt_boxes[:, :4].data)
  max_overlaps, gt_assignment = overlaps.max(1)
  labels = gt_boxes[gt_assignment, [4]]

  # Select foreground RoIs as those with >= FG_THRESH overlap
  fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1)
  # Guard against the case when an image has fewer than fg_rois_per_image
  # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
  bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1)

  # Small modification to the original version where we ensure a fixed number of regions are sampled
  if fg_inds.numel() > 0 and bg_inds.numel() > 0:
    fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel())
    fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()]
    bg_rois_per_image = rois_per_image - fg_rois_per_image
    to_replace = bg_inds.numel() < bg_rois_per_image
    bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()]
  elif fg_inds.numel() > 0:
    to_replace = fg_inds.numel() < rois_per_image
    fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
    fg_rois_per_image = rois_per_image
  elif bg_inds.numel() > 0:
    to_replace = bg_inds.numel() < rois_per_image
    bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
    fg_rois_per_image = 0
  else:
    import pdb
    pdb.set_trace()

  # The indices that we're selecting (both fg and bg)
  keep_inds = torch.cat([fg_inds, bg_inds], 0)
  # Select sampled values from various arrays:
  labels = labels[keep_inds].contiguous()
  # Clamp labels for the background RoIs to 0
  labels[int(fg_rois_per_image):] = 0
  rois = all_rois[keep_inds].contiguous()
  roi_scores = all_scores[keep_inds].contiguous()

  bbox_target_data = _compute_targets(
    rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data)

  bbox_targets, bbox_inside_weights = \
    _get_bbox_regression_labels(bbox_target_data, num_classes)

  return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois,all_scores,gt_boxes,fg_rois_per_image,rois_per_image,num_classes):
    # overlaps (rois x gt_boxes)
    overlaps = bbox_overlaps(np.ascontiguousarray(all_rois[:,1:5],dtype=np.float),
                             np.ascontiguousarray(gt_boxes[:,:4],dtype=np.float))
    # 对于每个anchor,重叠最大的gt编号
    gt_assignment = overlaps.argmax(axis = 1)
    max_overlaps = overlaps.max(axis=1)
    # 重叠最大的gt的标签
    labels = gt_boxes[gt_assignment,4]

    # 选择前景rois 大于前景阈值部分
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # 背景rois在背景阈值之间
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI)&(max_overlaps>=cfg.TRAIN.BG_THRESH_LO))[0]
    # bg_inds = np.where(max_overlaps < cfg.TRAIN.BG_THRESH_HI)[0]
    # 确保对固定数量的区域进行采样
    if fg_inds.size > 0 and bg_inds.size >0:
        fg_rois_per_image = min(fg_rois_per_image,fg_inds.size)
        fg_inds = npr.choice(fg_inds,size=int(fg_rois_per_image),replace=False)
        # 每张图片的背景roi数为总roi-前景数
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        # 如果背景数量少于每张图片背景数,replace置为True,即可以对同一元素反复选取
        to_replace = bg_inds.size < bg_rois_per_image
        bg_inds = npr.choice(bg_inds,size=int(bg_rois_per_image),replace=to_replace)
    elif fg_inds.size>0:
        # 没有背景roi,如果前景少于总的 允许重复
        to_replace = fg_inds.size < rois_per_image
        fg_inds = npr.choice(fg_inds,size=int(rois_per_image),replace=to_replace)
        fg_rois_per_image = rois_per_image
    elif bg_inds.size>0:
        # 没有前景,如果背景少于总的,允许重复
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds,size=int(rois_per_image),replace=to_replace)
        fg_rois_per_image = 0
    else:
        # 否则 在线调试代码
        print(1)
        import pdb
        pdb.set_trace()

    # 刚才选择的前景和背景序号
    keep_inds = np.append(fg_inds,bg_inds)
    labels = labels[keep_inds]
    # 将背景标签置为0
    labels[int(fg_rois_per_image):] =0
    rois = all_rois[keep_inds]
    roi_scores = all_scores[keep_inds]
    # 返回目标数据框,标签加四个回归数据目标tx ty tw th
    bbox_target_data = _compute_targets(rois[:,1:5],gt_boxes[gt_assignment[keep_inds],:4],labels)
    # 返回有类别的目标框以及内部权重
    bbox_targets,bbox_inside_weights = _get_bbox_regression_labels(bbox_target_data,num_classes)
    return labels,rois,roi_scores,bbox_targets,bbox_inside_weights
Пример #10
0
def _get_proposal_clusters(all_rois, proposals, im_labels):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    num_images, num_classes = im_labels.shape
    assert num_images == 1, 'batch size shoud be equal to 1'
    # overlaps: (rois x gt_boxes)
    gt_boxes = proposals['gt_boxes']
    gt_labels = proposals['gt_classes']
    #gt_scores = proposals['gt_scores']
    overlaps = bbox_overlaps(
        all_rois.astype(dtype=np.float32, copy=False),
        gt_boxes.astype(dtype=np.float32, copy=False))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_labels[gt_assignment, 0]
    # cls_loss_weights = gt_scores[gt_assignment, 0]

    # # Select foreground RoIs as those with >= FG_THRESH overlap
    # fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    #
    # # Select background RoIs as those with < FG_THRESH overlap
    # bg_inds = np.where(max_overlaps < cfg.TRAIN.FG_THRESH)[0]
    #
    # ig_inds = np.where(max_overlaps < cfg.TRAIN.BG_THRESH)[0]

    # cls_loss_weights[ig_inds] = 0.0
    #
    # labels[bg_inds] = 0
    # gt_assignment[bg_inds] = -1
    #
    # img_cls_loss_weights = np.zeros(gt_boxes.shape[0], dtype=np.float32)
    # pc_probs = np.zeros(gt_boxes.shape[0], dtype=np.float32)
    # pc_labels = np.zeros(gt_boxes.shape[0], dtype=np.int32)
    # pc_count = np.zeros(gt_boxes.shape[0], dtype=np.int32)
    #
    # for i in xrange(gt_boxes.shape[0]):
    #     po_index = np.where(gt_assignment == i)[0]
    #     img_cls_loss_weights[i] = np.sum(cls_loss_weights[po_index])
    #     pc_labels[i] = gt_labels[i, 0]
    #     pc_count[i] = len(po_index)
    #     pc_probs[i] = np.average(cls_prob[po_index, pc_labels[i]])
    return max_overlaps, labels
Пример #11
0
    def forward(self, classifications, anchors, annotations, iou_thresh=0.5):
        losses = []
        batch_size = classifications.shape[0]

        for j in range(batch_size):
            classification = classifications[j, :, :]
            bbox_annotation = annotations[j, :, :]
            bbox_annotation = bbox_annotation[bbox_annotation[:, -1] != -1]
            if bbox_annotation.shape[0] == 0:
                losses.append(torch.tensor(0).float().cuda())
                continue
            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
            indicator = bbox_overlaps(
                min_area_square(anchors[j, :, :]),
                min_area_square(bbox_annotation[:, :-1])
            )
            overlaps = rbox_overlaps(
                anchors[j, :, :].cpu().numpy(),
                bbox_annotation[:, :-1].cpu().numpy(),
                indicator.cpu().numpy(),
                thresh=1e-1
            )
            if not torch.is_tensor(overlaps):
                overlaps = torch.from_numpy(overlaps).cuda()
            iou_max, iou_argmax = torch.max(overlaps, dim=1)
            targets = (torch.ones(classification.shape) * -1).cuda()
            targets[torch.lt(iou_max, 0.4), :] = 0
            positive_indices = torch.ge(iou_max, iou_thresh)
            num_positive_anchors = positive_indices.sum()
            assigned_annotations = bbox_annotation[iou_argmax, :]
            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices, -1].long()] = 1
            alpha_factor = torch.ones(targets.shape).cuda() * self.alpha
            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, self.gamma)
            bin_cross_entropy = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
            cls_loss = focal_weight * bin_cross_entropy
            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
            losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0))
        return torch.stack(losses).mean(dim=0, keepdim=True)
Пример #12
0
def bbox_vote(dets_NMS, dets_all, thresh=0.5):
    dets_voted = np.zeros_like(
        dets_NMS)  # Empty matrix with the same shape and type

    _overlaps = bbox_overlaps(
        np.ascontiguousarray(dets_NMS[:, 0:4], dtype=np.float),
        np.ascontiguousarray(dets_all[:, 0:4], dtype=np.float))

    # for each survived box
    for i, det in enumerate(dets_NMS):
        dets_overlapped = dets_all[np.where(_overlaps[i, :] >= thresh)[0]]
        assert (len(dets_overlapped) > 0)

        boxes = dets_overlapped[:, 0:4]
        scores = dets_overlapped[:, 4]

        out_box = np.dot(scores, boxes)

        dets_voted[i][0:4] = out_box / sum(scores)  # Weighted bounding boxes
        dets_voted[i][4] = det[4]  # Keep the original score

        # Weighted scores (if enabled)
        if cfg.TEST.BBOX_VOTE_N_WEIGHTED_SCORE > 1:
            n_agreement = cfg.TEST.BBOX_VOTE_N_WEIGHTED_SCORE
            w_empty = cfg.TEST.BBOX_VOTE_WEIGHT_EMPTY

            n_detected = len(scores)

            if n_detected >= n_agreement:
                top_scores = -np.sort(-scores)[:n_agreement]
                new_score = np.average(top_scores)
            else:
                new_score = np.average(scores) * (
                    n_detected * 1.0 +
                    (n_agreement - n_detected) * w_empty) / n_agreement

            dets_voted[i][4] = min(new_score, dets_voted[i][4])

    return dets_voted
Пример #13
0
def anchor_target_layer(rpn_cls_score,
                        gt_boxes,
                        gt_ishard,
                        dontcare_areas,
                        im_info,
                        _feat_stride=[
                            16,
                        ],
                        anchor_scales=[
                            16,
                        ]):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    Parameters
    ----------
    rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer
    gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
    gt_ishard: (G, 1), 1 or 0 indicates difficult or not
    dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare
    rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform)
                            that are the regression objectives
    rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg
    rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg,
                            beacuse the numbers of bgs and fgs mays significiantly different
    """
    _anchors = generate_anchors(
        scales=np.array(anchor_scales))  #生成基本的anchor,一共9个
    _num_anchors = _anchors.shape[0]  #9个anchor

    if DEBUG:
        print('anchors:')
        print(_anchors)
        print('anchor shapes:')
        print(
            np.hstack((
                _anchors[:, 2::4] - _anchors[:, 0::4],
                _anchors[:, 3::4] - _anchors[:, 1::4],
            )))
        _counts = cfg.EPS
        _sums = np.zeros((1, 4))
        _squared_sums = np.zeros((1, 4))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0
    # map of shape (..., H, W)
    #height, width = rpn_cls_score.shape[1:3]

    im_info = im_info[0]  #图像的高宽及通道数

    #在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标
    # Algorithm:
    # for each (H, W) location i
    #   generate 9 anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the 9 anchors
    # filter out-of-image anchors
    # measure GT overlap

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]  #feature-map的高宽

    if DEBUG:
        print('AnchorTargetLayer: height', height, 'width', width)
        print('')
        print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print('scale: {}'.format(im_info[2]))
        print('height, width: ({}, {})'.format(height, width))
        print('rpn: gt_boxes.shape', gt_boxes.shape)
        print('rpn: gt_boxes', gt_boxes)

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)  # in W H order
    # K is H x W
    shifts = np.vstack(
        (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
         shift_y.ravel())).transpose()  #生成feature-map和真实image上anchor之间的偏移量
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors  #9个anchor
    K = shifts.shape[0]  #50*37,feature-map的宽乘高的大小
    all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2)))  #相当于复制宽高的维度,然后相加
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    #仅保留那些还在图像内部的anchor,超出图像的都删掉
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    if DEBUG:
        print('total_anchors', total_anchors)
        print('inds_inside', len(inds_inside))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]  #保留那些在图像内的anchor
    if DEBUG:
        print('anchors.shape', anchors.shape)

    #至此,anchor准备好了
    #--------------------------------------------------------------
    # label: 1 is positive, 0 is negative, -1 is dont care
    # (A)
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)  #初始化label,均为-1

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt), shape is A x G
    #计算anchor和gt-box的overlap,用来给anchor上标签
    overlaps = bbox_overlaps(np.ascontiguousarray(
        anchors, dtype=np.float), np.ascontiguousarray(
            gt_boxes,
            dtype=np.float))  #假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组
    # 存放每一个anchor和每一个gtbox之间的overlap
    argmax_overlaps = overlaps.argmax(
        axis=1)  # (A)#找到和每一个gtbox,overlap最大的那个anchor
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(
        axis=0)  # G#找到每个位置上9个anchor中与gtbox,overlap最大的那个
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps <
               cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0  #先给背景上标签,小于0.3overlap的

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1  #每个位置上的9个anchor中overlap最大的认为是前景
    # fg label: above threshold IOU
    labels[max_overlaps >=
           cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1  #overlap大于0.7的认为是前景

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # preclude dontcare areas
    if dontcare_areas is not None and dontcare_areas.shape[
            0] > 0:  #这里我们暂时不考虑有doncare_area的存在
        # intersec shape is D x A
        intersecs = bbox_intersections(
            np.ascontiguousarray(dontcare_areas, dtype=np.float),  # D x 4
            np.ascontiguousarray(anchors, dtype=np.float)  # A x 4
        )
        intersecs_ = intersecs.sum(axis=0)  # A x 1
        labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1

    #这里我们暂时不考虑难样本的问题
    # preclude hard samples that are highly occlusioned, truncated or difficult to see
    if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[
            0] > 0:
        assert gt_ishard.shape[0] == gt_boxes.shape[0]
        gt_ishard = gt_ishard.astype(int)
        gt_hardboxes = gt_boxes[gt_ishard == 1, :]
        if gt_hardboxes.shape[0] > 0:
            # H x A
            hard_overlaps = bbox_overlaps(
                np.ascontiguousarray(gt_hardboxes, dtype=np.float),  # H x 4
                np.ascontiguousarray(anchors, dtype=np.float))  # A x 4
            hard_max_overlaps = hard_overlaps.max(axis=0)  # (A)
            labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1
            max_intersec_label_inds = hard_overlaps.argmax(axis=1)  # H x 1
            labels[max_intersec_label_inds] = -1  #

    # subsample positive labels if we have too many
    #对正样本进行采样,如果正样本的数量太多的话
    # 限制正样本的数量不超过128个
    #TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)  #随机去除掉一些正样本
        labels[disable_inds] = -1  #变为-1

    # subsample negative labels if we have too many
    #对负样本进行采样,如果负样本的数量太多的话
    # 正负样本总数是256,限制正样本数目最多128,
    # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1
        #print "was %s inds, disabling %s, now %s inds" % (
        #len(bg_inds), len(disable_inds), np.sum(labels == 0))

    # 至此, 上好标签,开始计算rpn-box的真值
    #--------------------------------------------------------------
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(
        anchors,
        gt_boxes[argmax_overlaps, :])  #根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差)

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)  #内部权重,前景就给1,其他是0

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:  #暂时使用uniform 权重,也就是正样本是1,负样本是0
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0) + 1
        # positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        # negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        positive_weights = np.ones((1, 4))
        negative_weights = np.zeros((1, 4))
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            (np.sum(labels == 1)) + 1)
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            (np.sum(labels == 0)) + 1)
    bbox_outside_weights[labels == 1, :] = positive_weights  #外部权重,前景是1,背景是0
    bbox_outside_weights[labels == 0, :] = negative_weights

    if DEBUG:
        _sums += bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts += np.sum(labels == 1)
        means = _sums / _counts
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print('means:')
        print(means)
        print('stdevs:')
        print(stds)

    # map up to original set of anchors
    # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来
    labels = _unmap(labels, total_anchors, inds_inside,
                    fill=-1)  #这些anchor的label是-1,也即dontcare
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside,
                          fill=0)  #这些anchor的真值是0,也即没有值
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)  #内部权重以0填充
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)  #外部权重以0填充

    if DEBUG:
        print('rpn: max max_overlap', np.max(max_overlaps))
        print('rpn: num_positive', np.sum(labels == 1))
        print('rpn: num_negative', np.sum(labels == 0))
        _fg_sum += np.sum(labels == 1)
        _bg_sum += np.sum(labels == 0)
        _count += 1
        print('rpn: num_positive avg', _fg_sum / _count)
        print('rpn: num_negative avg', _bg_sum / _count)

    # labels
    labels = labels.reshape((1, height, width, A))  #reshap一下label
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))#reshape

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))
    rpn_bbox_outside_weights = bbox_outside_weights

    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Пример #14
0
    def forward(self, bottom, top):


        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        # map of shape (..., H, W)
        h = []
        w = []
        for i in range(5):
            height, width = bottom[i].data.shape[-2:]
            h.append(height)
            w.append(width)
        # GT boxes (x1, y1, x2, y2, label)
        gt_boxes = bottom[5].data
        # im_info
        im_info = bottom[6].data[0, :]

        all_anchors_list = []
        inds_inside_list = []
        total_anchors = 0

        feat_strides = self._feat_stride
        ratios = self._ratios

        scales = self._scales

        fpn_args = []
        fpn_anchors_fid = np.zeros(0).astype(int)
        fpn_anchors = np.zeros([0, 4])
        fpn_labels = np.zeros(0)
        fpn_inds_inside = []
        for feat_id in range(len(feat_strides)):
            # len(scales.shape) == 1 just for backward compatibility, will remove in the future

            base_anchors = generate_anchors(base_size=feat_strides[feat_id],
                                            ratios=ratios,
                                            scales=scales)

            num_anchors = base_anchors.shape[0]
            feat_height = h[feat_id]
            feat_width = w[feat_id]

            # 1. generate proposals from bbox deltas and shifted anchors
            shift_x = np.arange(0, feat_width) * feat_strides[feat_id]
            shift_y = np.arange(0, feat_height) * feat_strides[feat_id]
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = num_anchors
            K = shifts.shape[0]
            all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            all_anchors = all_anchors.reshape((K * A, 4))
            total_anchors = int(K * A)

            # only keep anchors inside the image
            inds_inside = np.where(
                (all_anchors[:, 0] >= -self._allowed_border)
                & (all_anchors[:, 1] >= -self._allowed_border)
                & (all_anchors[:, 2] < im_info[1] + self._allowed_border)
                & (all_anchors[:, 3] < im_info[0] + self._allowed_border))[0]

            # keep only inside anchors
            anchors = all_anchors[inds_inside, :]

            # label: 1 is positive, 0 is negative, -1 is dont care
            # for sigmoid classifier, ignore the 'background' class
            labels = np.empty((len(inds_inside), ), dtype=np.float32)
            labels.fill(-1)

            fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside)))
            fpn_anchors = np.vstack((fpn_anchors, anchors))
            fpn_labels = np.hstack((fpn_labels, labels))
            fpn_inds_inside.append(inds_inside)
            fpn_args.append([feat_height, feat_width, A, total_anchors])

        if gt_boxes.size > 0:
            # overlap between the anchors and the gt boxes
            # overlaps (ex, gt)
            overlaps = bbox_overlaps(fpn_anchors.astype(np.float),
                                     gt_boxes.astype(np.float))
            argmax_overlaps = overlaps.argmax(axis=1)
            max_overlaps = overlaps[np.arange(len(fpn_anchors)),
                                    argmax_overlaps]
            gt_argmax_overlaps = overlaps.argmax(axis=0)
            gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                       np.arange(overlaps.shape[1])]
            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

            if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
                # assign bg labels first so that positive labels can clobber them
                fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
            # fg label: for each gt, anchor with highest overlap
            fpn_labels[gt_argmax_overlaps] = 1
            # fg label: above threshold IoU
            fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
            if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
                # assign bg labels last so that negative labels can clobber positives
                fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
        else:
            fpn_labels[:] = 0
        # subsample positive labels if we have too many
        num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCHSIZE == -1 else int(
            cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
        fg_inds = np.where(fpn_labels >= 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = npr.choice(fg_inds,
                                      size=(len(fg_inds) - num_fg),
                                      replace=False)
            if DEBUG:
                disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
            fpn_labels[disable_inds] = -1

        # subsample negative labels if we have too many
        num_bg = fpn_labels.shape[
            0] if cfg.TRAIN.RPN_BATCHSIZE == -1 else cfg.TRAIN.RPN_BATCHSIZE - np.sum(
                fpn_labels >= 1)
        bg_inds = np.where(fpn_labels == 0)[0]
        fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum()))

        # if balance_scale_bg:
        #     num_bg_scale = num_bg / len(feat_strides)
        #     for feat_id in range(0, len(feat_strides)):
        #         bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id+1])]
        #         if len(bg_ind_scale) > num_bg_scale:
        #             disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False)
        #             fpn_labels[disable_inds] = -1
        # else:
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds,
                                      size=(len(bg_inds) - num_bg),
                                      replace=False)
            if DEBUG:
                disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
            fpn_labels[disable_inds] = -1

        fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32)
        if gt_boxes.size > 0:
            fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform(
                fpn_anchors[fpn_labels >= 1, :],
                gt_boxes[argmax_overlaps[fpn_labels >= 1], :4])
            # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4])
        # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)
        fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32)

        fpn_bbox_weights[fpn_labels >= 1, :] = np.array(
            cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
        fpn_bbox_outside_weights = np.zeros((len(fpn_anchors), 4),
                                            dtype=np.float32)
        if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
            # uniform weighting of examples (given non-uniform sampling)
            num_examples = np.sum(fpn_labels >= 0)
            positive_weights = np.ones((1, 4)) * 1.0 / num_examples
            negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        else:
            assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                    (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
            positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                                np.sum(fpn_labels == 1))
            negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                                np.sum(fpn_labels == 0))

        fpn_bbox_outside_weights[fpn_labels == 1, :] = positive_weights
        fpn_bbox_outside_weights[fpn_labels == 0, :] = negative_weights

        label_list = []
        bbox_target_list = []
        bbox_weight_list = []
        bbox_outside_weight_list = []
        for feat_id in range(0, len(feat_strides)):
            feat_height, feat_width, A, total_anchors = fpn_args[feat_id]
            # map up to original set of anchors
            labels = _unmap(
                fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id +
                                                                    1]],
                total_anchors,
                fpn_inds_inside[feat_id],
                fill=-1)
            bbox_targets = _unmap(fpn_bbox_targets[
                fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]],
                                  total_anchors,
                                  fpn_inds_inside[feat_id],
                                  fill=0)
            bbox_weights = _unmap(fpn_bbox_weights[
                fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]],
                                  total_anchors,
                                  fpn_inds_inside[feat_id],
                                  fill=0)
            bbox_outside_weights = _unmap(fpn_bbox_outside_weights[
                fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]],
                                          total_anchors,
                                          fpn_inds_inside[feat_id],
                                          fill=0)

            labels = labels.reshape(
                (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
            labels = labels.reshape((1, A * feat_height * feat_width))

            bbox_targets = bbox_targets.reshape(
                (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
            bbox_targets = bbox_targets.reshape((1, A * 4, -1))
            bbox_weights = bbox_weights.reshape(
                (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
            bbox_weights = bbox_weights.reshape((1, A * 4, -1))

            bbox_outside_weights = bbox_outside_weights.reshape(
                (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
            bbox_outside_weights = bbox_outside_weights.reshape((1, A * 4, -1))

            label_list.append(labels)
            bbox_target_list.append(bbox_targets)
            bbox_weight_list.append(bbox_weights)
            bbox_outside_weight_list.append(bbox_outside_weights)
            # label.update({'label_p' + str(feat_id + feat_id_start): labels,
            #               'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets,
            #               'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights})

        labels = np.concatenate(label_list, axis=1)
        bbox_targets = np.concatenate(bbox_target_list, axis=2)
        bbox_inside_weights = np.concatenate(bbox_weight_list, axis=2)
        bbox_outside_weights = np.concatenate(bbox_outside_weight_list, axis=2)

        # print bbox_targets.shape
        # print bbox_inside_weights.shape
        # print bbox_outside_weights.shape
        # print labels.shape

        top[0].reshape(*labels.shape)
        top[0].data[...] = labels

        # bbox_targets

        top[1].reshape(*bbox_targets.shape)
        top[1].data[...] = bbox_targets

        # bbox_inside_weights

        top[2].reshape(*bbox_inside_weights.shape)
        top[2].data[...] = bbox_inside_weights

        # bbox_outside_weights

        top[3].reshape(*bbox_outside_weights.shape)
        top[3].data[...] = bbox_outside_weights
Пример #15
0
def _sample_rois(all_rois, all_scores, gt_boxes, gt_texts, gt_pair,
                 fg_rois_per_image, rois_per_image, num_classes,
                 gt_rois_per_image):
    """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
    # overlaps: (rois x gt_boxes)

    no_gt_size = all_rois.size(0) - gt_rois_per_image
    #print("all_rois")
    #print(all_rois)
    overlaps = bbox_overlaps(all_rois[:, 1:5].data, gt_boxes[:, :4].data)
    max_overlaps, gt_assignment = overlaps.max(1)
    labels = gt_boxes[gt_assignment, [4]]
    texts = [gt_texts[i] for i in gt_assignment]

    pair = torch.LongTensor([int(gt_pair[i]) for i in gt_assignment]).cuda()
    pair_ = torch.FloatTensor([int(gt_pair[i]) for i in gt_assignment]).cuda()
    #####################################################################
    ##### Till now, the GT class and other info can be implemented ######
    #####  into the proposal regions, ###################################
    #####################################################################
    '''
  print("pair")
  print(pair)
  print("labels")
  print(labels)
  print("all_scores")
  print(all_scores)
  '''
    '''
  bar_inds = (labels == 9).nonzero().view(-1)
  print("bar_inds")
  print(bar_inds)
  '''
    '''
  bar_inds = ((labels == 9)+(pair == 0)==2).nonzero().view(-1)
  #bar_inds = ((pair_ == 0)).nonzero().view(-1)
  print("bar_inds")
  print(bar_inds)
  '''

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = (max_overlaps[:no_gt_size] >=
               cfg.TRAIN.FG_THRESH).nonzero().view(-1)
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = ((max_overlaps[:no_gt_size] < cfg.TRAIN.BG_THRESH_HI) +
               (max_overlaps[:no_gt_size] >= cfg.TRAIN.BG_THRESH_LO) == 2
               ).nonzero().view(-1)

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.numel() > 0 and bg_inds.numel() > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel())
        fg_inds = fg_inds[torch.from_numpy(
            npr.choice(np.arange(0, fg_inds.numel()),
                       size=int(fg_rois_per_image),
                       replace=False)).long().cuda()]
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        to_replace = bg_inds.numel() < bg_rois_per_image
        bg_inds = bg_inds[torch.from_numpy(
            npr.choice(np.arange(0, bg_inds.numel()),
                       size=int(bg_rois_per_image),
                       replace=to_replace)).long().cuda()]
    elif fg_inds.numel() > 0:
        to_replace = fg_inds.numel() < rois_per_image
        fg_inds = fg_inds[torch.from_numpy(
            npr.choice(np.arange(0, fg_inds.numel()),
                       size=int(rois_per_image),
                       replace=to_replace)).long().cuda()]
        fg_rois_per_image = rois_per_image
    elif bg_inds.numel() > 0:
        to_replace = bg_inds.numel() < rois_per_image
        bg_inds = bg_inds[torch.from_numpy(
            npr.choice(np.arange(0, bg_inds.numel()),
                       size=int(rois_per_image),
                       replace=to_replace)).long().cuda()]
        fg_rois_per_image = 0
    else:
        import pdb
        pdb.set_trace()
    '''
  if (gt_rois_per_image>0):
    gt_inds = torch.arange(no_gt_size,gt_rois_per_image+no_gt_size)
  '''

    gt_inds = torch.arange(no_gt_size,
                           gt_rois_per_image + no_gt_size).long().cuda()
    # The indices that we're selecting (both fg and bg)
    keep_inds = torch.cat(
        [fg_inds[:fg_rois_per_image - gt_rois_per_image], gt_inds, bg_inds], 0)
    # Select sampled values from various arrays:
    labels = labels[keep_inds].contiguous()
    texts = [texts[i] for i in keep_inds]
    pair = torch.LongTensor([int(pair[i]) for i in keep_inds])
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_image):] = 0
    #print("after clamp")
    #print(labels)
    rois = all_rois[keep_inds].contiguous()
    roi_scores = all_scores[keep_inds].contiguous()

    bbox_target_data = _compute_targets(
        rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data,
        labels.data)

    bbox_targets, bbox_inside_weights = \
      _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, texts, pair, rois, roi_scores, bbox_targets, bbox_inside_weights
Пример #16
0
def anchor_target_layer_torch(gt_boxes, gt_boxes_dc, info, all_anchors,
                              num_anchors, height, width, dev):
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    #print('num anchors')
    #print(num_anchors)
    #print(im_info[1])
    #print(im_info[0])
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # only keep anchors inside the image
    inds_inside = torch.where(
        (all_anchors[:, 0] >= info[0] - _allowed_border) &  #width_max
        (all_anchors[:, 1] >= info[2] - _allowed_border) &  #height_min
        (all_anchors[:, 2] < info[1] + _allowed_border) &  # width_max
        (all_anchors[:, 3] < info[3] + _allowed_border)  # height_max
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    #Subset of anchors within image boundary
    labels = torch.full((len(inds_inside), ), -1,
                        dtype=torch.int64).to(device=dev)
    #labels.fill(-1)
    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    #from utils.bbox import bbox_overlaps
    overlaps = bbox_overlaps(anchors.contiguous(), gt_boxes.contiguous())
    if cfg.TRAIN.IGNORE_DC:
        overlaps_dc = bbox_overlaps(anchors.contiguous(),
                                    gt_boxes_dc.contiguous())
        overlaps_dc_idx = torch.argwhere(overlaps_dc > cfg.TRAIN.DC_THRESH)
        labels[overlaps_dc_idx[:, 0]] = -1
    #overlaps: (N, K) overlap between boxes and query_boxes
    argmax_overlaps = overlaps.argmax(dim=1)
    #grab subset of 2D array to only get [:,max_overlap_index]
    max_overlaps = overlaps[torch.arange(len(inds_inside)).to(device=dev),
                            argmax_overlaps]
    #max_overlaps_2 = torch.index_select(overlaps, 0, argmax_overlaps)
    gt_argmax_overlaps = overlaps.argmax(dim=0)
    #grab same subset of 2D array to get corresponding GT boxes with their max overlap counterpart
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               torch.arange(overlaps.shape[1]).to(device=dev)]
    gt_max_overlaps = torch.clamp(gt_max_overlaps,
                                  torch.finfo(torch.float32).eps, float('inf'))
    gt_argmax_overlaps = torch.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    #gt_argmax_overlaps is an index subset of the anchors that max overlap with a gt box
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    #anything else needs a large overlap as well
    nz_max_overlaps = max_overlaps.nonzero()
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = torch.where(labels == 1)[0]
    #TODO: Really, randomly select indices to disable? Why not worst ones? At least dont do this for the argmax..
    #If too many foreground entries
    if len(fg_inds) > num_fg:
        perm = torch.randperm(fg_inds.numel(), device=dev)[num_fg:]
        fg_inds_subset = fg_inds[perm]
        labels[fg_inds_subset] = -1

    # subsample negative labels if we have too many
    fg_sum = torch.sum(labels == 1)
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - fg_sum
    bg_inds = torch.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        perm = torch.randperm(bg_inds.numel(), device=dev)[num_bg:]
        bg_inds_subset = bg_inds[perm]
        labels[bg_inds_subset] = -1
    #Find target bounding boxes
    #bbox_targets = torch.zeros((len(inds_inside), 4), dtype=torch.float32).to(device=dev)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
    #print('GT BOXES')
    #print(bbox_targets.shape)
    bbox_inside_weights = torch.zeros((len(inds_inside), 4),
                                      dtype=torch.float32).to(device=dev)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = torch.from_numpy(
        np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS,
                 dtype=np.float32)).to(device=dev)
    bbox_outside_weights = torch.zeros((len(inds_inside), 4),
                                       dtype=torch.float32).to(device=dev)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling) num_examples is a max of 256 by default
        num_examples = torch.sum(labels >= 0)
        #positive_weights = torch.ones((1, 4)) * 1.0 / num_examples
        #negative_weights = torch.ones((1, 4)) * 1.0 / num_examples
        positive_weights = 1.0 / float(num_examples)
        negative_weights = 1.0 / float(num_examples)
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        #TODO: Broken
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            torch.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            torch.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights
    #print('bbox weights')
    #print(bbox_outside_weights)
    #print(bbox_inside_weights)
    # map up to original set of anchors
    labels = _unmap(labels.type(dtype=torch.float32),
                    total_anchors,
                    inds_inside,
                    fill=-1,
                    dev=dev)
    bbox_targets = _unmap(bbox_targets,
                          total_anchors,
                          inds_inside,
                          fill=0,
                          dev=dev)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0,
                                 dev=dev)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0,
                                  dev=dev)

    # labels
    labels = labels.reshape((1, height, width, A)).permute(0, 3, 1, 2)
    #labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
      .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Пример #17
0
    def evaluate_recall(self,
                        candidate_boxes=None,
                        thresholds=None,
                        area='all',
                        limit=None):
        """Evaluate detection proposal recall metrics.

    Returns:
        results: dictionary of results with keys
            'ar': average recall
            'recalls': vector recalls at each IoU overlap threshold
            'thresholds': vector of IoU overlap thresholds
            'gt_overlaps': vector of all ground-truth overlaps
    """
        # Record max overlap value for each gt box
        # Return vector of overlap values
        areas = {
            'all': 0,
            'small': 1,
            'medium': 2,
            'large': 3,
            '96-128': 4,
            '128-256': 5,
            '256-512': 6,
            '512-inf': 7
        }
        area_ranges = [
            [0**2, 1e5**2],  # all
            [0**2, 32**2],  # small
            [32**2, 96**2],  # medium
            [96**2, 1e5**2],  # large
            [96**2, 128**2],  # 96-128
            [128**2, 256**2],  # 128-256
            [256**2, 512**2],  # 256-512
            [512**2, 1e5**2],  # 512-inf
        ]
        assert area in areas, 'unknown area range: {}'.format(area)
        area_range = area_ranges[areas[area]]
        gt_overlaps = np.zeros(0)
        num_pos = 0
        for i in range(self.num_images):
            # Checking for max_overlaps == 1 avoids including crowd annotations
            # (...pretty hacking :/)
            max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(
                axis=1)
            gt_inds = np.where((self.roidb[i]['gt_classes'] > 0)
                               & (max_gt_overlaps == 1))[0]
            gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
            gt_areas = self.roidb[i]['seg_areas'][gt_inds]
            valid_gt_inds = np.where((gt_areas >= area_range[0])
                                     & (gt_areas <= area_range[1]))[0]
            gt_boxes = gt_boxes[valid_gt_inds, :]
            num_pos += len(valid_gt_inds)

            if candidate_boxes is None:
                # If candidate_boxes is not supplied, the default is to use the
                # non-ground-truth boxes from this roidb
                non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
                boxes = self.roidb[i]['boxes'][non_gt_inds, :]
            else:
                boxes = candidate_boxes[i]
            if boxes.shape[0] == 0:
                continue
            if limit is not None and boxes.shape[0] > limit:
                boxes = boxes[:limit, :]

            overlaps = bbox_overlaps(boxes.astype(np.float),
                                     gt_boxes.astype(np.float))

            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in range(gt_boxes.shape[0]):
                # find which proposal box maximally covers each gt box
                argmax_overlaps = overlaps.argmax(axis=0)
                # and get the iou amount of coverage for each gt box
                max_overlaps = overlaps.max(axis=0)
                # find which gt box is 'best' covered (i.e. 'best' = most iou)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert (gt_ovr >= 0)
                # find the proposal box that covers the best covered gt box
                box_ind = argmax_overlaps[gt_ind]
                # record the iou coverage of this gt box
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert (_gt_overlaps[j] == gt_ovr)
                # mark the proposal box and the gt box as used
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1
            # append recorded iou coverage level
            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        gt_overlaps = np.sort(gt_overlaps)
        if thresholds is None:
            step = 0.05
            thresholds = np.arange(0.5, 0.95 + 1e-5, step)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        # ar = 2 * np.trapz(recalls, thresholds)
        ar = recalls.mean()
        return {
            'ar': ar,
            'recalls': recalls,
            'thresholds': thresholds,
            'gt_overlaps': gt_overlaps
        }
Пример #18
0
def _build_graph(boxes, iou_threshold):
    """Build graph based on box IoU"""
    overlaps = bbox_overlaps(boxes.astype(dtype=np.float32, copy=False),
                             boxes.astype(dtype=np.float32, copy=False))
    return (overlaps > iou_threshold).astype(np.float32)
Пример #19
0
def _sample_rois(all_rois, all_scores, gt_boxes, gt_weights, fg_rois_per_image, rois_per_image, num_classes):
  """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
  # overlaps: (rois x gt_boxes)
  overlaps = bbox_overlaps(
    all_rois[:, 1:5].data,
    gt_boxes[:, :4].data)
  max_overlaps, gt_assignment = overlaps.max(1)
  labels = gt_boxes[gt_assignment, [4]]
  '''
      add weights items by pseudo scores
  '''
  gt_weights = gt_weights.detach().data
  gt_weights_tile = gt_weights.view(1,-1).expand_as(overlaps)
  loss_weights = gt_weights_tile[torch.arange(0,overlaps.size(0)).long(), gt_assignment]
  #print((gt_assignment==1).sum())
  #print(loss_weights)
  '''
      end of modification
  '''
  # Select foreground RoIs as those with >= FG_THRESH overlap
  fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1)
  # Guard against the case when an image has fewer than fg_rois_per_image
  # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
  bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1)

  # Small modification to the original version where we ensure a fixed number of regions are sampled
  if fg_inds.numel() > 0 and bg_inds.numel() > 0:
    fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel())
    fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()]
    bg_rois_per_image = rois_per_image - fg_rois_per_image
    to_replace = bg_inds.numel() < bg_rois_per_image
    bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()]
  elif fg_inds.numel() > 0:
    to_replace = fg_inds.numel() < rois_per_image
    fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
    fg_rois_per_image = rois_per_image
  elif bg_inds.numel() > 0:
    to_replace = bg_inds.numel() < rois_per_image
    bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
    fg_rois_per_image = 0
  else:
    import pdb
    pdb.set_trace()

  # The indices that we're selecting (both fg and bg)
  keep_inds = torch.cat([fg_inds, bg_inds], 0)
  # Select sampled values from various arrays:
  labels = labels[keep_inds].contiguous()
  # Clamp labels for the background RoIs to 0
  labels[int(fg_rois_per_image):] = 0
  rois = all_rois[keep_inds].contiguous()
  roi_scores = all_scores[keep_inds].contiguous()
  
  
  bbox_target_data = _compute_targets(
    rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data)

  bbox_targets, bbox_inside_weights = \
    _get_bbox_regression_labels(bbox_target_data, num_classes)

  '''
  modified by jiajie
  '''
  #loss_weights = loss_weights[keep_inds].contiguous() + 1.0
  loss_weights = loss_weights[keep_inds].contiguous()
  loss_weights[int(fg_rois_per_image):] = 1.0
  '''
  end of modification
  '''
  
  #bbox_outside_weights[labels == 1, :] = loss_weights[labels==1].reshape(-1,1) * positive_weights
  #bbox_outside_weights[labels == 0, :] = loss_weights[labels==0].reshape(-1,1) * negative_weights 

  return labels, rois, roi_scores, bbox_targets, bbox_inside_weights, loss_weights
Пример #20
0
    def _write_detect_results_file(self, all_boxes, conf_thresh, iou_thresh,
                                   net_name):  #added by yuesongtian
        filename = cfg.ROOT_DIR + '/output/FP_Net_end2end/voc_2007_test/' + str(
            len(self.classes)) + '_' + net_name + '.txt'
        print 'Writing detection results to {}'.format(filename)
        if not os.path.exists(filename):
            os.system(r'touch %s' % filename)
        with open(filename, 'wt') as f:
            for im_ind, index in enumerate(self.image_index):

                f.write('{:s}'.format(index))

                for cls_ind, cls in enumerate(self.classes):
                    if cls == '__background__':
                        continue
                    max_gt_overlaps = self.roidb[im_ind][
                        'gt_overlaps'].toarray().max(axis=1)
                    gt_inds = np.where(
                        (self.roidb[im_ind]['gt_classes'] == cls_ind)
                        & (max_gt_overlaps == 1))[0]
                    gt_boxes = self.roidb[im_ind]['boxes'][gt_inds, :]
                    gt_areas = self.roidb[im_ind]['seg_areas'][gt_inds]
                    valid_gt_inds = np.where((gt_areas >= 0**2)
                                             & (gt_areas <= 1e5**2))[0]
                    gt_boxes = gt_boxes[valid_gt_inds, :]
                    if (len(all_boxes[cls_ind][im_ind]) == 0):
                        continue
                    inds = np.where(
                        all_boxes[cls_ind][im_ind][:, 4] > conf_thresh)[0]
                    dets_ = all_boxes[cls_ind][im_ind][inds, :]
                    if dets_ == [] or gt_boxes.shape[0] == 0:
                        continue
                    overlaps = bbox_overlaps(dets_.astype(np.float),
                                             gt_boxes.astype(np.float))

                    if overlaps.shape[0] == 1:
                        #print 'overlaps is ', overlaps, overlaps.shape[0], dets_.shape, gt_boxes.shape
                        argmax_overlaps = overlaps.argmax(axis=0)
                        max_overlaps = overlaps.max(axis=0)
                        gt_ind = max_overlaps.argmax()
                        gt_ovr = max_overlaps.max()
                        if gt_ovr >= iou_thresh:
                            box_ind = argmax_overlaps[gt_ind]
                            f.write(
                                '   {:.1f}({:.3f}) {:.1f} {:.1f} {:.1f} {:.1f}'
                                .format(cls_ind, dets_[box_ind, -1],
                                        dets_[box_ind, 0] + 1,
                                        dets_[box_ind, 1] + 1,
                                        dets_[box_ind, 2] + 1,
                                        dets_[box_ind, 3] + 1))
                        if (gt_boxes.shape[0] == 1):
                            f.write('*')
                            f.write('   {:.1f} {:.1f} {:.1f} {:.1f}'.format(
                                gt_boxes[0, 0] + 1, gt_boxes[0, 1] + 1,
                                gt_boxes[0, 2] + 1, gt_boxes[0, 3] + 1))
                        else:
                            f.write('*')
                            for gt_index in range(gt_boxes.shape[0]):
                                f.write(
                                    '   {:.1f} {:.1f} {:.1f} {:.1f}'.format(
                                        gt_boxes[gt_index, 0] + 1,
                                        gt_boxes[gt_index, 1] + 1,
                                        gt_boxes[gt_index, 2] + 1,
                                        gt_boxes[gt_index, 3] + 1))

                    elif overlaps.shape[0] > 1:
                        for j in xrange(gt_boxes.shape[0]):
                            # find which proposal box maximally covers each gt box
                            argmax_overlaps = overlaps.argmax(axis=0)
                            # and get the iou amount of coverage for each gt box
                            max_overlaps = overlaps.max(axis=0)
                            # find which gt box is 'best' covered (i.e. 'best' = most iou)
                            gt_ind = max_overlaps.argmax()
                            gt_ovr = max_overlaps.max()
                            if gt_ovr < 0:
                                break
                            assert (gt_ovr >= 0)
                            box_ind = argmax_overlaps[gt_ind]
                            if gt_ovr < iou_thresh:
                                overlaps[box_ind, :] = -1
                                overlaps[:, gt_ind] = -1
                                continue
                            # write box > iou_thresh to f
                            f.write(
                                '   {:.1f}({:.3f}) {:.1f} {:.1f} {:.1f} {:.1f}'
                                .format(cls_ind, dets_[box_ind, -1],
                                        dets_[box_ind, 0] + 1,
                                        dets_[box_ind, 1] + 1,
                                        dets_[box_ind, 2] + 1,
                                        dets_[box_ind, 3] + 1))
                            # mark the proposal box and the gt box as used
                            overlaps[box_ind, :] = -1
                            overlaps[:, gt_ind] = -1

                        if (gt_boxes.shape[0] == 1):
                            f.write('*')
                            f.write('   {:.1f} {:.1f} {:.1f} {:.1f}'.format(
                                gt_boxes[0, 0] + 1, gt_boxes[0, 1] + 1,
                                gt_boxes[0, 2] + 1, gt_boxes[0, 3] + 1))
                        else:
                            f.write('*')
                            for gt_index in range(gt_boxes.shape[0]):
                                f.write(
                                    '   {:.1f} {:.1f} {:.1f} {:.1f}'.format(
                                        gt_boxes[gt_index, 0] + 1,
                                        gt_boxes[gt_index, 1] + 1,
                                        gt_boxes[gt_index, 2] + 1,
                                        gt_boxes[gt_index, 3] + 1))

                    else:
                        if (gt_boxes.shape[0] == 1):
                            f.write('*')
                            f.write('   {:.1f} {:.1f} {:.1f} {:.1f}'.format(
                                gt_boxes[0, 0] + 1, gt_boxes[0, 1] + 1,
                                gt_boxes[0, 2] + 1, gt_boxes[0, 3] + 1))
                        else:
                            f.write('*')
                            for gt_index in range(gt_boxes.shape[0]):
                                f.write(
                                    '   {:.1f} {:.1f} {:.1f} {:.1f}'.format(
                                        gt_boxes[gt_index, 0] + 1,
                                        gt_boxes[gt_index, 1] + 1,
                                        gt_boxes[gt_index, 2] + 1,
                                        gt_boxes[gt_index, 3] + 1))
                f.write('\n')
        f.close()
        return filename
Пример #21
0
def anchor_target_layer(gt_boxes, gt_boxes_dc, info, _feat_stride, all_anchors,
                        num_anchors, height, width):
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    #print('num anchors')
    #print(num_anchors)
    #print(info[1])
    #print(info[0])
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # only keep anchors inside the frame
    #TODO: Torchify

    #TODO: Subtract minimum value between GT boxes and anchors as to not get the overlaps issue (maybe also track and see it happen?)

    inds_inside = np.where(
        (all_anchors[:, 0] >= info[0] - _allowed_border) &  #width_max
        (all_anchors[:, 1] >= info[2] - _allowed_border) &  #height_min
        (all_anchors[:, 2] < info[1] + _allowed_border) &  # width_max
        (all_anchors[:, 3] < info[3] + _allowed_border)  # height_max
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    #Subset of anchors within image boundary
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    #from utils.bbox import bbox_overlaps
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    #np.set_printoptions(threshold=np.inf)
    #print('----------------------------------------------')
    #overlaps_trimmed = overlaps[~np.all(overlaps == 0, axis=1)]
    #print(overlaps_trimmed)
    #print('----------------------------------------------')
    if cfg.TRAIN.IGNORE_DC:
        overlaps_dc = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes_dc, dtype=np.float))
        overlaps_dc_idx = np.argwhere(overlaps_dc > cfg.TRAIN.DC_THRESH)
        labels[overlaps_dc_idx[:, 0]] = -1
    #overlaps: (N, K) overlap between boxes and query_boxes
    argmax_overlaps = overlaps.argmax(
        axis=1)  #Best fiting GT for each anchor (1,N)
    gt_argmax_overlaps = overlaps.argmax(
        axis=0)  #Best fitting anchor for each GT box (K,1)
    #grab subset of 2D array to only get [:,max_overlap_index]
    #max_overlaps = overlaps.take(argmax_overlaps,axis=1)
    #np.set_printoptions(threshold=np.inf)
    #print(argmax_overlaps)
    #print(overlaps)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    #max_overlaps = overlaps[:, argmax_overlaps]
    #max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    #grab same subset of 2D array to get corresponding GT boxes with their max overlap counterpart
    #gt_max_overlaps = overlaps[gt_argmax_overlaps,
    #                           np.arange(overlaps.shape[1])]
    #TODO: How the f**k does this work
    #gt_max_overlaps = overlaps[gt_argmax_overlaps,:]
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    #gt_argmax_overlaps is an index subset of the anchors that max overlap with a gt box
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    #anything else needs a large overlap as well
    #TODO: Distance based overlap threshold?
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    #TODO: Really, randomly select indices to disable? Why not worst ones? At least dont do this for the argmax..
    #If too many foreground entries
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1
    #Find target bounding boxes
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
    #print('GT BOXES')
    #print(bbox_targets.shape)
    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    #Create a mask where labels == 1
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    #Sample weighting is turned off
    if int(cfg.TRAIN.RPN_POSITIVE_WEIGHT) == -1:
        # uniform weighting of examples (given non-uniform sampling) num_examples is a max of 256 by default
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            np.sum(labels == 0))

    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights
    #print('bbox weights')
    #print(bbox_outside_weights)
    #print(bbox_inside_weights)
    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    #labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
      .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Пример #22
0
  def evaluate_recall(self, candidate_boxes=None, thresholds=None,
                      area='all', limit=None):
    """Evaluate detection proposal recall metrics.

    Returns:
        results: dictionary of results with keys
            'ar': average recall
            'recalls': vector recalls at each IoU overlap threshold
            'thresholds': vector of IoU overlap thresholds
            'gt_overlaps': vector of all ground-truth overlaps
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {'all': 0, 'small': 1, 'medium': 2, 'large': 3,
             '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
    area_ranges = [[0 ** 2, 1e5 ** 2],  # all
                   [0 ** 2, 32 ** 2],  # small
                   [32 ** 2, 96 ** 2],  # medium
                   [96 ** 2, 1e5 ** 2],  # large
                   [96 ** 2, 128 ** 2],  # 96-128
                   [128 ** 2, 256 ** 2],  # 128-256
                   [256 ** 2, 512 ** 2],  # 256-512
                   [512 ** 2, 1e5 ** 2],  # 512-inf
                   ]
    assert area in areas, 'unknown area range: {}'.format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = np.zeros(0)
    num_pos = 0
    for i in range(self.num_images):
      # Checking for max_overlaps == 1 avoids including crowd annotations
      # (...pretty hacking :/)
      max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
      gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
                         (max_gt_overlaps == 1))[0]
      gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
      gt_areas = self.roidb[i]['seg_areas'][gt_inds]
      valid_gt_inds = np.where((gt_areas >= area_range[0]) &
                               (gt_areas <= area_range[1]))[0]
      gt_boxes = gt_boxes[valid_gt_inds, :]
      num_pos += len(valid_gt_inds)

      if candidate_boxes is None:
        # If candidate_boxes is not supplied, the default is to use the
        # non-ground-truth boxes from this roidb
        non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
        boxes = self.roidb[i]['boxes'][non_gt_inds, :]
      else:
        boxes = candidate_boxes[i]
      if boxes.shape[0] == 0:
        continue
      if limit is not None and boxes.shape[0] > limit:
        boxes = boxes[:limit, :]

      overlaps = bbox_overlaps(boxes.astype(np.float),
                               gt_boxes.astype(np.float))

      _gt_overlaps = np.zeros((gt_boxes.shape[0]))
      for j in range(gt_boxes.shape[0]):
        # find which proposal box maximally covers each gt box
        argmax_overlaps = overlaps.argmax(axis=0)
        # and get the iou amount of coverage for each gt box
        max_overlaps = overlaps.max(axis=0)
        # find which gt box is 'best' covered (i.e. 'best' = most iou)
        gt_ind = max_overlaps.argmax()
        gt_ovr = max_overlaps.max()
        assert (gt_ovr >= 0)
        # find the proposal box that covers the best covered gt box
        box_ind = argmax_overlaps[gt_ind]
        # record the iou coverage of this gt box
        _gt_overlaps[j] = overlaps[box_ind, gt_ind]
        assert (_gt_overlaps[j] == gt_ovr)
        # mark the proposal box and the gt box as used
        overlaps[box_ind, :] = -1
        overlaps[:, gt_ind] = -1
      # append recorded iou coverage level
      gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

    gt_overlaps = np.sort(gt_overlaps)
    if thresholds is None:
      step = 0.05
      thresholds = np.arange(0.5, 0.95 + 1e-5, step)
    recalls = np.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
      recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
            'gt_overlaps': gt_overlaps}
Пример #23
0
 def get_refine_supervision(self, refine_prob, ss_boxes, image_level_label):
     '''
     refine_prob: num_box x 20 or num_box x 21
     ss_boxes; num_box x 4
     image_level_label: 1 dim vector with 20 elements
     '''
     
     cls_prob = refine_prob.data.cpu().numpy()
     #rois = ss_boxes.numpy()
     
     roi_per_image = cfg.TRAIN.MIL_BATCHSIZE
     
     if refine_prob.shape[1] == self._num_classes + 1:
         cls_prob = cls_prob[:, 1:]
     roi_labels = np.zeros([refine_prob.shape[0], self._num_classes + 1], dtype = np.int32)  # num_box x 21
     roi_labels[:,0] = 1                                                                        # the 0th elements is the bg
     roi_weights = np.zeros((refine_prob.shape[0], 1), dtype=np.float32)     # num_box x 1 weights of the rois
     
     max_score_box = np.zeros((0, 4), dtype = np.float32)
     max_box_score = np.zeros((0, 1), dtype = np.float32)
     max_box_classes = np.zeros((0, 1), dtype = np.int32)
     
     #print('ss_boxes ', ss_boxes[:5,:])
     for i in range(self._num_classes):
         if image_level_label[0, i] == 1:
             cls_prob_tmp = cls_prob[:, i]
             max_index = np.argmax(cls_prob_tmp)
             
             max_score_box = np.concatenate((max_score_box, ss_boxes[max_index, 1:].reshape(1, -1)), axis=0)
             max_box_classes = np.concatenate((max_box_classes, (i+1)*np.ones((1, 1), dtype=np.int32)), axis=0)
             max_box_score = np.concatenate((max_box_score, cls_prob_tmp[max_index]*np.ones((1, 1), dtype=np.float32)), axis=0)
     #print('image_level_labels ', image_level_label)
     #print('max_box_class ', max_box_classes)
     #print('max_box_score ', max_box_score)
     overlaps = bbox_overlaps(ss_boxes[:,1:], max_score_box)
     gt_assignment = overlaps.argmax(axis=1)
     max_over_laps = overlaps.max(axis=1)
     #print('max_over_laps', max_over_laps.max())
     #print('over laps', overlaps.shape)
     roi_weights[:, 0] = max_box_score[gt_assignment, 0]
     labels = max_box_classes[gt_assignment, 0]
     
     fg_inds = np.where(max_over_laps > cfg.TRAIN.MIL_FG_THRESH)[0]
     
     roi_labels[fg_inds,labels[fg_inds]] = 1
     roi_labels[fg_inds, 0] = 0
     
     bg_inds = (np.array(max_over_laps >= cfg.TRAIN.MIL_BG_THRESH_LO, dtype=np.int32) + \
                np.array(max_over_laps < cfg.TRAIN.MIL_BG_THRESH_HI, dtype=np.int32)==2).nonzero()[0]
     
     if len(fg_inds) > 0 and len(bg_inds) > 0:
         fg_rois_num = min(cfg.TRAIN.MIL_NUM_FG, len(fg_inds))
         fg_inds = fg_inds[np.random.choice(np.arange(0, len(fg_inds)), size=int(fg_rois_num), replace=False)]
         
         bg_rois_num = min(cfg.TRAIN.MIL_NUM_BG, len(bg_inds))
         bg_inds = bg_inds[np.random.choice(np.arange(0, len(bg_inds)), size=int(bg_rois_num), replace=False)]
     
     elif len(fg_inds) > 0:
         fg_rois_num = min(cfg.TRAIN.MIL_NUM_FG, len(fg_inds))
         fg_inds = fg_inds[np.random.choice(np.arange(0, len(fg_inds)), size=int(fg_rois_num), replace=False)]
     elif len(bg_inds) > 0:
         bg_rois_num = min(cfg.TRAIN.MIL_NUM_BG, len(bg_inds))
         bg_inds = bg_inds[np.random.choice(np.arange(0, len(bg_inds)), size=int(bg_rois_num), replace=False)]
     else:
         import pdb
         pdb.set_trace()
     
     # print(len(fg_inds), len(bg_inds))
     keep_inds = np.concatenate([fg_inds, bg_inds])
     
     return roi_labels[keep_inds, :], roi_weights[keep_inds,0].reshape(-1,1), keep_inds
Пример #24
0
def _sample_rois(all_rois, all_scores, gt_boxes, gt_masks, fg_rois_per_image,
                 rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background examples.
  Return:
  - labels: (Nkp, )
  - rois  : (Nkp, 5), [0 x1 y1 x2 y2]
  - roi_scores  : (Nkp, )
  - bbox_targets: (Nkp, 4k)
  - bbox_inside_weights: (Nkp, 4k)
  """
    # overlaps: (rois x gt_boxes)
    all_rois_data = all_rois.data
    gt_boxes_data = gt_boxes.data
    overlaps = bbox_overlaps(all_rois_data[:, 1:5], gt_boxes_data[:, :4])
    max_overlaps, gt_assignment = overlaps.max(1)  # cuda tensor
    labels = gt_boxes[gt_assignment, [4]]  # cuda Variable

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1)
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = (
        (max_overlaps < cfg.TRAIN.BG_THRESH_HI) +
        (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1)

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.numel() > 0 and bg_inds.numel() > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel())
        fg_inds = fg_inds[torch.from_numpy(
            npr.choice(np.arange(0, fg_inds.numel()),
                       size=int(fg_rois_per_image),
                       replace=False)).long().cuda()]
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        to_replace = bg_inds.numel() < bg_rois_per_image
        bg_inds = bg_inds[torch.from_numpy(
            npr.choice(np.arange(0, bg_inds.numel()),
                       size=int(bg_rois_per_image),
                       replace=to_replace)).long().cuda()]
    elif fg_inds.numel() > 0:
        to_replace = fg_inds.numel() < rois_per_image
        fg_inds = fg_inds[torch.from_numpy(
            npr.choice(np.arange(0, fg_inds.numel()),
                       size=int(rois_per_image),
                       replace=to_replace)).long().cuda()]
        fg_rois_per_image = rois_per_image
    elif fg_inds.numel() == 0:
        # we always make fg_inds.numel() > 0
        zeros = Variable(all_rois.data.new(gt_boxes.size(0), 1))
        all_rois = torch.cat((all_rois, torch.cat(
            (zeros, gt_boxes[:, :-1]), 1)), 0)
        # not sure if it a wise appending, but anyway i am not using it
        all_scores = torch.cat((all_scores, zeros), 0)
        return _sample_rois(all_rois, all_scores, gt_boxes, gt_masks,
                            fg_rois_per_image, rois_per_image, num_classes)
    # elif bg_inds.numel() > 0:
    #   to_replace = bg_inds.numel() < rois_per_image
    #   bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
    #   fg_rois_per_image = 0
    else:
        import pdb
        pdb.set_trace()

    # The indices that we're selecting (both fg and bg)
    keep_inds = torch.cat([fg_inds, bg_inds], 0)
    # Select sampled values from various arrays:
    labels = labels[keep_inds].contiguous()
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_image):] = 0
    rois = all_rois[keep_inds].contiguous()
    roi_scores = all_scores[keep_inds].contiguous()

    bbox_target_data = _compute_targets(
        rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data,
        labels.data)

    bbox_targets, bbox_inside_weights = \
      _get_bbox_regression_labels(bbox_target_data, num_classes)

    # Get masks, float (num_boxes, 14, 14)
    # corresponding to the selected boxes
    mask_targets = torch.FloatTensor(fg_inds.numel(), cfg.MASK_SIZE,
                                     cfg.MASK_SIZE).cuda()
    mix = 0
    for i in fg_inds.cpu().numpy().tolist():
        roi = all_rois_data[i]  # tensor [xyxyc]
        cropped = gt_masks[gt_assignment[i],
                           int(roi[2]):int(roi[4]) + 1,
                           int(roi[1]):int(roi[3]) + 1]  # uint8 {0,1}
        cropped = imresize(cropped, (cfg.MASK_SIZE, cfg.MASK_SIZE),
                           interp='nearest')  # still uint8 {0,1}
        cropped = cropped.astype(np.float32)  # float32, range [0,1]
        mask_targets[mix, :, :] = torch.from_numpy(cropped).cuda()
        mix += 1
    assert mask_targets.max() <= 1.0001

    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights, mask_targets
Пример #25
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate 9 anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the 9 anchors
        # filter out-of-image anchors
        # measure GT overlap

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        # map of shape (..., H, W)
        height, width = bottom[0].data.shape[-2:]
        # GT boxes (x1, y1, x2, y2, label)
        gt_boxes = bottom[1].data
        # im_info
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print ''
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])
            print 'height, width: ({}, {})'.format(height, width)
            print 'rpn: gt_boxes.shape', gt_boxes.shape
            print 'rpn: gt_boxes', gt_boxes

        # 1. Generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        all_anchors = (self._anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where(
            (all_anchors[:, 0] >= -self._allowed_border)
            & (all_anchors[:, 1] >= -self._allowed_border)
            & (all_anchors[:, 2] < im_info[1] + self._allowed_border)
            &  # width
            (all_anchors[:, 3] < im_info[0] + self._allowed_border)  # height
        )[0]

        if DEBUG:
            print 'total_anchors', total_anchors
            print 'inds_inside', len(inds_inside)

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
        if DEBUG:
            print 'anchors.shape', anchors.shape

        # label: 1 is positive, 0 is negative, -1 is dont care
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt)
        if gt_boxes.shape[0] != 0:
            overlaps = bbox_overlaps(
                np.ascontiguousarray(anchors, dtype=np.float),
                np.ascontiguousarray(gt_boxes, dtype=np.float))
            argmax_overlaps = overlaps.argmax(axis=1)
            max_overlaps = overlaps[np.arange(len(inds_inside)),
                                    argmax_overlaps]
            gt_argmax_overlaps = overlaps.argmax(axis=0)
            gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                       np.arange(overlaps.shape[1])]
            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

            if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
                # assign bg labels first so that positive labels can clobber them
                labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

            # fg label: for each gt, anchor with highest overlap
            labels[gt_argmax_overlaps] = 1

            # fg label: above threshold IOU
            labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

            if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
                # assign bg labels last so that negative labels can clobber positives
                labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
        else:
            labels.fill(0)

        # subsample positive labels if we have too many
        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = npr.choice(fg_inds,
                                      size=(len(fg_inds) - num_fg),
                                      replace=False)
            labels[disable_inds] = -1

        # subsample negative labels if we have too many
        num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds,
                                      size=(len(bg_inds) - num_bg),
                                      replace=False)
            labels[disable_inds] = -1
            #print "was %s inds, disabling %s, now %s inds" % (
            #len(bg_inds), len(disable_inds), np.sum(labels == 0))

        bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
        if gt_boxes.shape[0] != 0:
            bbox_targets = _compute_targets(anchors,
                                            gt_boxes[argmax_overlaps, :])

        bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_inside_weights[labels == 1, :] = np.array(
            cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

        bbox_outside_weights = np.zeros((len(inds_inside), 4),
                                        dtype=np.float32)
        if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
            # uniform weighting of examples (given non-uniform sampling)
            num_examples = np.sum(labels >= 0)
            positive_weights = np.ones((1, 4)) * 1.0 / num_examples
            negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        else:
            assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                    (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
            positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                                np.sum(labels == 1))
            negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                                np.sum(labels == 0))
        bbox_outside_weights[labels == 1, :] = positive_weights
        bbox_outside_weights[labels == 0, :] = negative_weights

        if DEBUG:
            self._sums += bbox_targets[labels == 1, :].sum(axis=0)
            self._squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0)
            self._counts += np.sum(labels == 1)
            means = self._sums / self._counts
            stds = np.sqrt(self._squared_sums / self._counts - means**2)
            print 'means:'
            print means
            print 'stdevs:'
            print stds

        # map up to original set of anchors
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
        bbox_inside_weights = _unmap(bbox_inside_weights,
                                     total_anchors,
                                     inds_inside,
                                     fill=0)
        bbox_outside_weights = _unmap(bbox_outside_weights,
                                      total_anchors,
                                      inds_inside,
                                      fill=0)

        if DEBUG:
            if gt_boxes.shape[0] != 0:
                print 'rpn: max max_overlap', np.max(max_overlaps)
            else:
                print 'rpn: max max_overlap', 0
            print 'rpn: num_positive', np.sum(labels == 1)
            print 'rpn: num_negative', np.sum(labels == 0)
            self._fg_sum += np.sum(labels == 1)
            self._bg_sum += np.sum(labels == 0)
            self._count += 1
            print 'rpn: num_positive avg', self._fg_sum / self._count
            print 'rpn: num_negative avg', self._bg_sum / self._count

        # labels
        labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, 1, A * height, width))
        top[0].reshape(*labels.shape)
        top[0].data[...] = labels

        # bbox_targets
        bbox_targets = bbox_targets \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        top[1].reshape(*bbox_targets.shape)
        top[1].data[...] = bbox_targets

        # bbox_inside_weights
        bbox_inside_weights = bbox_inside_weights \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        assert bbox_inside_weights.shape[2] == height
        assert bbox_inside_weights.shape[3] == width
        top[2].reshape(*bbox_inside_weights.shape)
        top[2].data[...] = bbox_inside_weights

        # bbox_outside_weights
        bbox_outside_weights = bbox_outside_weights \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        assert bbox_outside_weights.shape[2] == height
        assert bbox_outside_weights.shape[3] == width
        top[3].reshape(*bbox_outside_weights.shape)
        top[3].data[...] = bbox_outside_weights
Пример #26
0
def _sample_rois_manually(gt_boxes_origin, fg_rois_per_image, rois_per_image,
                          num_classes, gt_truncated, im_info):
    """Args:
    gt_boxes_origin: Variable, [gt_num, 5], [x1, y1, x2, y2, class_id]
    fg_rois_per_image: int, 64
    rois_per_image: float, 256.0
    num_classes: int, 21
    gt_truncated: ndarray.bool, [gt_num]
    """
    fg_num = fg_rois_per_image
    rois_per_image = int(rois_per_image)
    gt_boxes_origin = gt_boxes_origin.data.cpu()
    img_width = float(im_info[0])
    img_height = float(im_info[1])
    """Remove truncated gt_boxes"""
    gt_truncated = gt_truncated.astype(int)
    gt_truncated = torch.from_numpy(gt_truncated)
    truncated_idx = (gt_truncated == 0).nonzero().view(-1)

    if len(truncated_idx) != 0:
        gt_boxes = torch.index_select(gt_boxes_origin, 0, truncated_idx)
        untruncted_gt_num = len(gt_boxes)
        """get width and height of every untruncated gt_box"""
        width = gt_boxes[:, 2] - gt_boxes[:, 0]  # x2-x1
        height = gt_boxes[:, 3] - gt_boxes[:, 1]
        """for every untruncated gt_box:"""
        for i in range(untruncted_gt_num):
            # get the number of fg_rois that the ith gt should generate.
            if i == untruncted_gt_num - 1:
                fg_num_per_gt = fg_rois_per_image - (
                    untruncted_gt_num - 1) * int(
                        fg_rois_per_image / untruncted_gt_num)
            else:
                fg_num_per_gt = int(fg_rois_per_image / untruncted_gt_num)

            # get the width and height delta.
            delta = torch.rand(fg_num_per_gt, 4) * 0.2 - 0.1  # [-0.1, 0.1)
            delta = delta * torch.FloatTensor(
                [width[i], height[i], width[i], height[i]])

            if i == 0:
                fg_rois = delta + gt_boxes[i, :-1]
                labels = torch.ones(fg_num_per_gt) * gt_boxes[i, 4]
            else:
                fg_rois = torch.cat((fg_rois, delta + gt_boxes[i, :-1]))
                labels = torch.cat(
                    (labels, torch.ones(fg_num_per_gt) * gt_boxes[i, 4]))
        """manage the boundary"""
        fg_rois[:, 0] = torch.max(torch.FloatTensor([0]), fg_rois[:, 0])
        fg_rois[:, 1] = torch.min(torch.FloatTensor([img_width]), fg_rois[:,
                                                                          1])
        fg_rois[:, 2] = torch.max(torch.FloatTensor([0]), fg_rois[:, 2])
        fg_rois[:, 3] = torch.min(torch.FloatTensor([img_height]), fg_rois[:,
                                                                           3])
    else:
        fg_num = 0
        fg_rois = torch.FloatTensor()
        gt_boxes = torch.FloatTensor()
        labels = torch.FloatTensor()
    """v3.0: generate truncated_rois"""
    if len(gt_boxes) != 0:
        truncated_rois, truncated_label, truncated_rois_num = genarate_truncated_rois(
            gt_boxes, fg_rois_per_image)
    else:
        truncated_rois = torch.FloatTensor()
        truncated_label = torch.FloatTensor()
        truncated_rois_num = 0
    """ generate bg_rois """
    bg_num = rois_per_image - fg_num - truncated_rois_num
    x1_bg = (torch.rand(bg_num * 2) * img_width).type(torch.FloatTensor)
    y1_bg = (torch.rand(bg_num * 2) * img_height).type(torch.FloatTensor)
    if fg_num != 0:
        bg_width = torch.min(width) + torch.rand(
            bg_num * 2) * (torch.max(width) - torch.min(width))
        bg_height = torch.min(height) + torch.rand(
            bg_num * 2) * (torch.max(height) - torch.min(height))
    else:
        width_origin = gt_boxes_origin[:, 2] - gt_boxes_origin[:, 0]  # x2-x1
        height_origin = gt_boxes_origin[:, 3] - gt_boxes_origin[:, 1]
        bg_width = torch.min(width_origin) + torch.rand(
            bg_num * 2) * (torch.max(width_origin) - torch.min(width_origin))
        bg_height = torch.min(height_origin) + torch.rand(
            bg_num * 2) * (torch.max(height_origin) - torch.min(height_origin))
    x2_bg = x1_bg + bg_width
    y2_bg = y1_bg + bg_height
    bg_rois = torch.cat((torch.unsqueeze(x1_bg, 1), torch.unsqueeze(
        y1_bg, 1), torch.unsqueeze(x2_bg, 1), torch.unsqueeze(y2_bg, 1)), 1)
    """cannot overlap with every gt"""
    overlaps = bbox_overlaps(bg_rois, gt_boxes_origin[:, :-1])
    max_overlaps, _ = overlaps.max(1)
    bg_inds = (max_overlaps == 0).nonzero().view(-1)
    if len(bg_inds) != 0:
        bg_rois = bg_rois[bg_inds]
    else:  # Rare case: gt too large, no bg
        bg_rois = torch.unsqueeze(torch.FloatTensor([10, 10, 20, 20]), 0)
    # manage the bound
    bg_inds = (bg_rois[:, 0] >= 0).numpy() & (bg_rois[:, 1] <= img_width).numpy() & \
              (bg_rois[:, 2] >= 0).numpy() & (bg_rois[:, 3] <= img_height).numpy()
    if max(bg_inds == 0):
        bg_rois = torch.unsqueeze(torch.FloatTensor([10, 10, 20, 20]), 0)
        bg_inds = np.asarray([1])
    bg_inds = torch.FloatTensor(bg_inds.astype(float)).nonzero().view(-1)
    """select 256-64 bg randomly"""
    to_replace = bg_inds.numel() < bg_num
    bg_inds = bg_inds[torch.from_numpy(
        npr.choice(np.arange(0, bg_inds.numel()),
                   size=int(bg_num),
                   replace=to_replace)).long()]
    bg_rois = bg_rois[bg_inds]
    """set return vars"""
    rois = torch.cat((fg_rois, truncated_rois, bg_rois), 0)
    rois = torch.cat((torch.zeros(len(rois), 1), rois),
                     1)  # add 0s at first column.
    rois = Variable(rois.type(torch.cuda.FloatTensor), requires_grad=True)
    labels = torch.cat((labels, truncated_label, torch.zeros(bg_num)))
    labels = Variable(labels.type(torch.cuda.FloatTensor), requires_grad=False)
    roi_scores = Variable(torch.zeros(256, 1).type(torch.cuda.FloatTensor),
                          requires_grad=True)
    bbox_targets = torch.zeros(256,
                               num_classes * 4).type(torch.cuda.FloatTensor)
    bbox_inside_weights = torch.zeros(256, num_classes * 4).type(
        torch.cuda.FloatTensor)

    assert len(rois) == 256, "len"
    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
    """return:
Пример #27
0
    def forward(self, bottom, top):

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        # map of shape (..., H, W)
        height, width = bottom[0].data.shape[-2:]
        # GT boxes (x1, y1, x2, y2)
        gt_boxes = bottom[1].data
        # im_info
        im_info = bottom[2].data[0, :]
        # side_pos
        side_pos = bottom[3].data

        if DEBUG:
            print ''
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'height, width: ({}, {})'.format(height, width)
            print 'rpn: gt_boxes.shape', gt_boxes.shape
            print 'rpn: gt_boxes'
            print gt_boxes
            print 'rpn: side_pos.shape', side_pos.shape
            print 'rpn: side_pos'
            print side_pos

        A = self._num_anchors
        all_anchors = self.anchor_generator.locate_anchors((height, width),
                                                           self._feat_stride)
        total_anchors = all_anchors.shape[0]

        # only keep anchors inside the image
        inds_inside = np.where((all_anchors[:, 0] >= 0)
                               & (all_anchors[:, 1] >= 0)
                               & (all_anchors[:, 2] < im_info[1]) &  # width
                               (all_anchors[:, 3] < im_info[0])  # height
                               )[0]
        if DEBUG:
            print 'total_anchors', total_anchors
            print 'inside_anchors', len(inds_inside)

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
        if DEBUG:
            print 'anchors.shape', anchors.shape

        # label: 1 is positive, 0 is negative, -1 is dont care
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        init_gt_argmax_overlaps = gt_argmax_overlaps
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if DEBUG:
            print "overlaps shape", overlaps.shape
            print "argmax_overlaps shape", argmax_overlaps.shape
            print "gt_argmax_overlaps shape", gt_argmax_overlaps.shape
            print "init_gt_argmax_overlaps shape", init_gt_argmax_overlaps.shape
            print "init_gt_argmax_overlaps"
            print init_gt_argmax_overlaps
            print "max overlaps anchors"
            print anchors[init_gt_argmax_overlaps]

        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps < cfg.TRAIN_RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.TRAIN_RPN_POSITIVE_OVERLAP] = 1

        if DEBUG:
            print "before sample"
            print "positive anchor num", np.sum(labels == 1)
            print "negative anchor num", np.sum(labels == 0)

        # sample positive labels if we have too many
        num_fg = int(cfg.TRAIN_RPN_FG_FRACTION * cfg.TRAIN_RPN_BATCHSIZE)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = npr.choice(fg_inds,
                                      size=(len(fg_inds) - num_fg),
                                      replace=False)
            labels[disable_inds] = -1

        # sample negative labels if we have too many
        num_bg = cfg.TRAIN_RPN_BATCHSIZE - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds,
                                      size=(len(bg_inds) - num_bg),
                                      replace=False)
            labels[disable_inds] = -1
        if DEBUG:
            print "after sample"
            print "positive anchor num", np.sum(labels == 1)
            print "positive anchor", np.where(labels == 1)[0]
            print "negative anchor num", np.sum(labels == 0)

        bbox_targets = np.zeros((len(inds_inside), 2), dtype=np.float32)
        bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

        bbox_inside_weights = np.zeros((len(inds_inside), 2), dtype=np.float32)
        bbox_inside_weights[labels == 1, :] = np.array([1, 1])

        bbox_outside_weights = np.zeros((len(inds_inside), 2),
                                        dtype=np.float32)
        bbox_outside_weights[labels == 1, :] = np.array([1, 1])

        if DEBUG:
            print "before map:"
            print "labels.shape", labels.shape
            print "bbox_targets.shape", bbox_targets.shape
            print "bbox_inside_weights.shape", bbox_inside_weights.shape
            print "bbox_outside_weights.shape", bbox_outside_weights.shape

        # map up to original set of anchors
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
        bbox_inside_weights = _unmap(bbox_inside_weights,
                                     total_anchors,
                                     inds_inside,
                                     fill=0)
        bbox_outside_weights = _unmap(bbox_outside_weights,
                                      total_anchors,
                                      inds_inside,
                                      fill=0)

        max_anchor_inds = inds_inside[init_gt_argmax_overlaps]
        if DEBUG:
            print "max anchors"
            print all_anchors[max_anchor_inds]

        sr_targets = np.empty((total_anchors, ), dtype=np.float32)
        sr_targets.fill(0)

        sr_anchor_inds = []
        for i in range(len(side_pos)):
            if side_pos[i] < 0:
                continue
            inds = max_anchor_inds[i]
            side = side_pos[i]
            line_num = int(inds) / int(10 * width)
            for x in [-10, 0, 10]:
                tmp_inds = inds + x
                tmp_line_num = int(tmp_inds) / int(10 * width)
                if tmp_line_num == line_num:
                    center = (all_anchors[tmp_inds][0] +
                              all_anchors[tmp_inds][2]) / 2.0
                    if abs(center - side) > cfg.TRAIN_SIDE_REFINE_MAX:
                        continue
                    sr_anchor_inds.append(tmp_inds)
                    sr_targets[tmp_inds] = (side -
                                            center) / cfg.TEXT_PROPOSALS_WIDTH

        sr_anchor_inds = [
            x for x in sr_anchor_inds if sr_anchor_inds.count(x) == 1
        ]
        if len(sr_anchor_inds) > cfg.TRAIN_SR_BATCH:
            sr_anchor_inds = npr.choice(sr_anchor_inds,
                                        size=(cfg.TRAIN_SR_BATCH),
                                        replace=False)

        sr_inside_weights = np.empty((total_anchors, ), dtype=np.float32)
        sr_inside_weights.fill(0)
        sr_inside_weights[sr_anchor_inds] = 1
        sr_outside_weights = np.empty((total_anchors, ), dtype=np.float32)
        sr_outside_weights.fill(0)
        sr_outside_weights[sr_anchor_inds] = 1

        if DEBUG:
            print "after map:"
            print "labels.shape", labels.shape
            print "bbox_targets.shape", bbox_targets.shape
            print "bbox_inside_weights.shape", bbox_inside_weights.shape
            print "bbox_outside_weights.shape", bbox_outside_weights.shape
            print "sr_targets.shape", sr_targets.shape
            print "sr_inside_weights.shape", sr_inside_weights.shape
            print "sr_outside_weights.shape", sr_outside_weights.shape
            print "side refinement:"
            print "sr_anchor_inds", sr_anchor_inds
            print "sr_anchor", all_anchors[sr_anchor_inds]
            print "sr_targets", sr_targets[sr_anchor_inds]
            print "sr_inside_weights", sr_inside_weights[sr_anchor_inds]
            print "sr_outside_weights", sr_outside_weights[sr_anchor_inds]

        # labels
        labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, 1, A * height, width))
        top[0].reshape(*labels.shape)
        top[0].data[...] = labels

        # bbox_targets
        bbox_targets = bbox_targets \
            .reshape((1, height, width, A * 2)).transpose(0, 3, 1, 2)
        top[1].reshape(*bbox_targets.shape)
        top[1].data[...] = bbox_targets

        # bbox_inside_weights
        bbox_inside_weights = bbox_inside_weights \
            .reshape((1, height, width, A * 2)).transpose(0, 3, 1, 2)
        assert bbox_inside_weights.shape[2] == height
        assert bbox_inside_weights.shape[3] == width
        top[2].reshape(*bbox_inside_weights.shape)
        top[2].data[...] = bbox_inside_weights

        # bbox_outside_weights
        bbox_outside_weights = bbox_outside_weights \
            .reshape((1, height, width, A * 2)).transpose(0, 3, 1, 2)
        assert bbox_outside_weights.shape[2] == height
        assert bbox_outside_weights.shape[3] == width
        top[3].reshape(*bbox_outside_weights.shape)
        top[3].data[...] = bbox_outside_weights

        # sr_targets
        sr_targets = sr_targets \
            .reshape((1, height, width, A)).transpose(0, 3, 1, 2)
        top[4].reshape(*sr_targets.shape)
        top[4].data[...] = sr_targets

        # sr_inside_weights
        sr_inside_weights = sr_inside_weights \
            .reshape((1, height, width, A)).transpose(0, 3, 1, 2)
        assert sr_inside_weights.shape[2] == height
        assert sr_inside_weights.shape[3] == width
        top[5].reshape(*sr_inside_weights.shape)
        top[5].data[...] = sr_inside_weights

        # sr_outside_weights
        sr_outside_weights = sr_outside_weights \
            .reshape((1, height, width, A)).transpose(0, 3, 1, 2)
        assert sr_outside_weights.shape[2] == height
        assert sr_outside_weights.shape[3] == width
        top[6].reshape(*sr_outside_weights.shape)
        top[6].data[...] = sr_outside_weights
Пример #28
0
def prepare_roidb(imdb):
    """Enrich the imdb's roidb by adding some derived quantities that
    are useful for training. This function precomputes the maximum
    overlap, taken over ground-truth boxes, between each ROI and
    each ground-truth box. The class with maximum overlap is also
    recorded.
    """
    cache_file = os.path.join(imdb.cache_path,
                              imdb.name + '_gt_roidb_prepared.pkl')
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as fid:
            imdb._roidb = cPickle.load(fid)
        print '{} gt roidb prepared loaded from {}'.format(
            imdb.name, cache_file)
        return

    roidb = imdb.roidb
    for i in xrange(len(imdb.image_index)):
        roidb[i]['image'] = imdb.image_path_at(i)
        boxes = roidb[i]['boxes']
        labels = roidb[i]['gt_classes']
        info_boxes = np.zeros((0, 18), dtype=np.float32)

        if boxes.shape[0] == 0:
            roidb[i]['info_boxes'] = info_boxes
            continue

        # compute grid boxes
        s = PIL.Image.open(imdb.image_path_at(i)).size
        image_height = s[1]
        image_width = s[0]
        boxes_grid, cx, cy = get_boxes_grid(image_height, image_width)

        # for each scale
        for scale_ind, scale in enumerate(cfg.TRAIN.SCALES):
            boxes_rescaled = boxes * scale

            # compute overlap
            overlaps = bbox_overlaps(boxes_grid.astype(np.float),
                                     boxes_rescaled.astype(np.float))
            max_overlaps = overlaps.max(axis=1)
            argmax_overlaps = overlaps.argmax(axis=1)
            max_classes = labels[argmax_overlaps]

            # select positive boxes
            fg_inds = []
            for k in xrange(1, imdb.num_classes):
                fg_inds.extend(
                    np.where((max_classes == k)
                             & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0])

            if len(fg_inds) > 0:
                gt_inds = argmax_overlaps[fg_inds]
                # bounding box regression targets
                gt_targets = _compute_targets(boxes_grid[fg_inds, :],
                                              boxes_rescaled[gt_inds, :])
                # scale mapping for RoI pooling
                scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind]
                scale_map = cfg.TRAIN.SCALES[scale_ind_map]
                # contruct the list of positive boxes
                # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)
                info_box = np.zeros((len(fg_inds), 18), dtype=np.float32)
                info_box[:, 0] = cx[fg_inds]
                info_box[:, 1] = cy[fg_inds]
                info_box[:, 2] = scale_ind
                info_box[:, 3:7] = boxes_grid[fg_inds, :]
                info_box[:, 7] = scale_ind_map
                info_box[:, 8:12] = boxes_grid[fg_inds, :] * scale_map / scale
                info_box[:, 12] = labels[gt_inds]
                info_box[:, 14:] = gt_targets
                info_boxes = np.vstack((info_boxes, info_box))

        roidb[i]['info_boxes'] = info_boxes

    with open(cache_file, 'wb') as fid:
        cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
    print 'wrote gt roidb prepared to {}'.format(cache_file)
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors):
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border) &
        (all_anchors[:, 1] >= -_allowed_border) &
        (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(
            bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (
            cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1))
        negative_weights = (
            (1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(
        bbox_inside_weights, total_anchors, inds_inside, fill=0)
    bbox_outside_weights = _unmap(
        bbox_outside_weights, total_anchors, inds_inside, fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
      .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors):
    """
  Same as the anchor target layer in original Fast/er RCNN
  1 筛选anchor
  2 计算IoU
  3 根据IoU标记正负
  4 留下256个标签
  5 做回归计算
  6 反映射到原来19494个anchor中
  """

    A = num_anchors  #9
    total_anchors = all_anchors.shape[0]  #19494个anchor
    K = total_anchors / num_anchors

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]  #height=57,width=38

    # only keep anchors inside the image,得到序号满足(x>0,y<0,w<W, h<H)
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    # overlaps[inds_inside,gt]
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)  #得到序号[inds_inside]
    #[inds_inside]
    max_overlaps = overlaps[np.arange(len(
        inds_inside)), argmax_overlaps]  #得到最大值的值,每行的最大值,确定这个anchor属于哪个boxes
    gt_argmax_overlaps = overlaps.argmax(axis=0)  #得到最大值的序号,按列方向搜索,[gt]
    gt_max_overlaps = overlaps[
        gt_argmax_overlaps, np.arange(
            overlaps.shape[1])]  #得到最大值的值,每列的最大值,哪个anchor [gt],确定是哪个anchor
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[
        0]  #得到最大值的序号(行号),满足overlap中等于gt_max_overlaps

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives小于0.3,标负标签,表示这一行都小于0.3,即这个box和任何gt都不重叠
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap,最大值肯定是正的,这一行
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU,,每一行大于0.7的也是正的
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many 前景数量不超过batchsize的一半
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]  #得到前景的index
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1  # 多余的前景转为不感兴趣

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1  # 多余的背景转为不感兴趣
    # 留下RPN_BATCHSIZE个标签
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # 计算筛选后的anchor和有最大IoU的gt的bounding box regression

    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets [1.0, 1.0, 1.0, 1.0] ,
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)  #256
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples

    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            np.sum(labels == 0))

    #初始化weight用作训练其他网络 [1.0, 1.0, 1.0, 1.0]/num_examples
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors, 映射到原始的anchor
    labels = _unmap(labels, total_anchors, inds_inside,
                    fill=-1)  #[label, 19494, 7061,,-1]
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)
    #映射到原来19494个anchor中

    # labels
    labels = labels.reshape(
        (1, height, width, A)).transpose(0, 3, 1, 2)  #[1,A,height,width]
    labels = labels.reshape((1, 1, A * height, width))  #[1,1,A * height,width]
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
      .reshape((1, height, width, A * 4))# [1,height,width ,9*4]

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
      .reshape((1, height, width, A * 4))# [1,height,width ,9*4]

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
      .reshape((1, height, width, A * 4))# [1,height,width ,9*4]

    rpn_bbox_outside_weights = bbox_outside_weights

    ##[1,1,A * height,width]标签 [1,height,width ,9*4]回归 [1,height,width ,9*4] [1,height,width ,9*4]
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors):
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
      .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Пример #32
0
    def forward(self, classifications, regressions, anchors, annotations,iou_thres=0.5):
        
        cls_losses = []
        reg_losses = []
        batch_size = classifications.shape[0]
        all_pred_boxes = self.box_coder.decode(anchors, regressions, mode='xywht')
        for j in range(batch_size):
            classification = classifications[j, :, :]
            regression = regressions[j, :, :]
            bbox_annotation = annotations[j, :, :]
            bbox_annotation = bbox_annotation[bbox_annotation[:, -1] != -1]
            pred_boxes = all_pred_boxes[j, :, :]
            if bbox_annotation.shape[0] == 0:
                cls_losses.append(torch.tensor(0).float().cuda())
                reg_losses.append(torch.tensor(0).float().cuda())
                continue
            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
            indicator = bbox_overlaps(
                min_area_square(anchors[j, :, :]),
                min_area_square(bbox_annotation[:, :-1])
            )
            ious = rbox_overlaps(
                anchors[j, :, :].cpu().numpy(),
                bbox_annotation[:, :-1].cpu().numpy(),
                indicator.cpu().numpy(),
                thresh=1e-1
            )
            if not torch.is_tensor(ious):
                ious = torch.from_numpy(ious).cuda()
            
            iou_max, iou_argmax = torch.max(ious, dim=1)
           
            positive_indices = torch.ge(iou_max, iou_thres)

            max_gt, argmax_gt = ious.max(0) 
            if (max_gt < iou_thres).any():
                positive_indices[argmax_gt[max_gt < iou_thres]]=1
              
            # cls loss
            cls_targets = (torch.ones(classification.shape) * -1).cuda()
            cls_targets[torch.lt(iou_max, iou_thres - 0.1), :] = 0
            num_positive_anchors = positive_indices.sum()
            assigned_annotations = bbox_annotation[iou_argmax, :]
            cls_targets[positive_indices, :] = 0
            cls_targets[positive_indices, assigned_annotations[positive_indices, -1].long()] = 1
            alpha_factor = torch.ones(cls_targets.shape).cuda() * self.alpha
            alpha_factor = torch.where(torch.eq(cls_targets, 1.), alpha_factor, 1. - alpha_factor)
            focal_weight = torch.where(torch.eq(cls_targets, 1.), 1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, self.gamma)
            bin_cross_entropy = -(cls_targets * torch.log(classification+1e-6) + (1.0 - cls_targets) * torch.log(1.0 - classification+1e-6))
            cls_loss = focal_weight * bin_cross_entropy 
            cls_loss = torch.where(torch.ne(cls_targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
            cls_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0))
            # reg loss
            if positive_indices.sum() > 0:
                all_rois = anchors[j, positive_indices, :]
                gt_boxes = assigned_annotations[positive_indices, :]
                reg_targets = self.box_coder.encode(all_rois, gt_boxes)
                reg_loss = self.criteron(regression[positive_indices, :], reg_targets)
                reg_losses.append(reg_loss)

                if not torch.isfinite(reg_loss) :
                    import ipdb; ipdb.set_trace()
            else:
                reg_losses.append(torch.tensor(0).float().cuda())
        loss_cls = torch.stack(cls_losses).mean(dim=0, keepdim=True)
        loss_reg = torch.stack(reg_losses).mean(dim=0, keepdim=True)
        return loss_cls, loss_reg
Пример #33
0
def _sample_rois(all_rois, all_scores, all_anchors_3d, gt_boxes, true_gt_boxes,
                 gt_boxes_dc, fg_rois_per_frame, rois_per_frame, num_classes,
                 num_bbox_target_elem):
    """Generate a random sample of RoIs comprising foreground and background
  examples. This will provide the 'best-case scenario' for the proposal layer to act as a target 
  Arguments:
  all_rois -> all roi's generated by the RPN (Nx5) where dim1 = [k,x1,y1,x2,y2]
  all_scores -> all predicted softmax value for winning class, generated by RPN
  gt_boxes -> all gt_boxes (Nx5) where dim1 = [x1,y1,x2,y2,k]
  true_gt_boxes -> all gt boxes in 3d form (Nx8) where dim1 = [xc,yc,zc,l,w,h,ry,k]
  gt_boxes_dc   -> bounding boxes containing dont care areas (Nx4)
  fg_rois_per_frame -> Maximum allowed foreground ROI's to submit to the 2nd stage
  """
    # overlaps: (rois x gt_boxes)
    #print('gt boxes')
    #print(gt_boxes)
    max_overlaps_dc = torch.tensor([])
    #Remove all indices that cover dc areas
    if (cfg.TRAIN.IGNORE_DC and list(gt_boxes_dc.size())[0] > 0):
        overlaps_dc = bbox_overlaps(
            all_rois[:, 1:5].data, gt_boxes_dc[:, :4].data
        )  #NxK Output N= num roi's k = num gt entries on image
        max_overlaps_dc, _ = overlaps_dc.max(
            1
        )  #Returns max value of all input elements along dimension and their index
        dc_inds = (max_overlaps_dc < cfg.TRAIN.DC_THRESH).nonzero().view(-1)
        dc_filtered_rois = all_rois[dc_inds, :]
        dc_filtered_scores = all_scores[dc_inds, :]
        dc_filtered_anchors_3d = all_anchors_3d[dc_inds, :]
    else:
        dc_filtered_rois = all_rois
        dc_filtered_scores = all_scores
        dc_filtered_anchors_3d = all_anchors_3d
    overlaps = bbox_overlaps(
        dc_filtered_rois[:, 1:5].data, gt_boxes[:, :4].data
    )  #NxK Output N= num roi's k = num gt entries on image
    max_overlaps, gt_assignment = overlaps.max(
        1
    )  #Returns max value of all input elements along dimension and their index
    #Very strange syntax, but maps a new array (size gt_assignment) and populates every element with the selected index from gt_assignment,4
    labels = gt_boxes[gt_assignment, [
        4
    ]]  #Contains which gt box each overlap is assigned to and the class it belongs to as well
    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1)
    # Guard against the case when an image has fewer than fg_rois_per_frame
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = (
        (max_overlaps < cfg.TRAIN.BG_THRESH_HI) +
        (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(
            -1)  #.nonzero() returns all elements that are non zero in array
    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.numel() > 0 and bg_inds.numel(
    ) > 0:  #numel() returns number of elements in tensor
        fg_rois_per_frame = min(fg_rois_per_frame, fg_inds.numel())
        fg_inds = fg_inds[torch_choice(fg_inds.numel(),
                                       int(fg_rois_per_frame),
                                       gt_boxes.device,
                                       to_replace=False)]
        bg_rois_per_frame = rois_per_frame - fg_rois_per_frame
        to_replace = bg_inds.numel(
        ) < bg_rois_per_frame  #Multiple entries of the same bg inds if too small
        bg_inds = bg_inds[torch_choice(bg_inds.numel(),
                                       int(bg_rois_per_frame),
                                       gt_boxes.device,
                                       to_replace=to_replace)]
    elif fg_inds.numel() > 0:  #Only foreground ROI's were generated
        to_replace = fg_inds.numel() < rois_per_frame
        fg_inds = fg_inds[torch_choice(fg_inds.numel(),
                                       int(rois_per_frame),
                                       gt_boxes.device,
                                       to_replace=to_replace)]
        fg_rois_per_frame = rois_per_frame
    elif bg_inds.numel() > 0:  #Only background ROI's were generated
        to_replace = bg_inds.numel() < rois_per_frame
        bg_inds = bg_inds[torch_choice(bg_inds.numel(),
                                       int(rois_per_frame),
                                       gt_boxes.device,
                                       to_replace=to_replace)]
        fg_rois_per_frame = 0
    else:
        print('importing pdb')
        import pdb
        pdb.set_trace()

    # The indices that we're selecting (both fg and bg)
    keep_inds = torch.cat([fg_inds, bg_inds], 0)
    # Select sampled values from various arrays:
    labels = labels[keep_inds].contiguous()
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_frame):] = 0
    rois = dc_filtered_rois[keep_inds].contiguous()
    roi_scores = dc_filtered_scores[keep_inds].contiguous()
    anchors_3d = dc_filtered_anchors_3d[keep_inds].contiguous()

    #Right here, bbox_target_data is actually the delta.
    if (cfg.NET_TYPE == 'lidar'):
        #TODO: Multiple anchors??
        bbox_target_data = _compute_lidar_targets(
            rois[:, 1:5].data, anchors_3d.data,
            true_gt_boxes[gt_assignment[keep_inds]][:, :-1].data, labels.data)

    elif (cfg.NET_TYPE == 'image'):
        bbox_target_data = _compute_targets(
            rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data,
            labels.data)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes, num_bbox_target_elem)

    return labels, rois, anchors_3d, roi_scores, bbox_targets, bbox_inside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes, parsing_labels=None):
  """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
  # overlaps: (rois x, gt_boxes)  (2000, 15)
  # 每个roi和每个gt box的iou
  overlaps = bbox_overlaps(
    all_rois[:, 1:5].data,
    gt_boxes[:, :4].data)
  max_overlaps, gt_assignment = overlaps.max(1)  # 对于每个roi,它与所有gtboxes中iou最大的作为它的gt
  # max_overlaps 每个roi与给它指定的gtbox之间的iou
  labels = gt_boxes[gt_assignment, [4]]  # 每个roi被指定的cls  gt_boxes(15,5)
  if cfg.SUB_CATEGORY:
    sub_labels = gt_boxes[gt_assignment, [5]]


  # Select foreground RoIs as those with >= FG_THRESH overlap
  fg_inds = (max_overlaps >= cfg.TRAIN.FG_THRESH).nonzero().view(-1)
  #print(fg_inds)
  # Guard against the case when an image has fewer than fg_rois_per_image
  # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
  # 0.1-0.5的被看成是背景
  bg_inds = ((max_overlaps < cfg.TRAIN.BG_THRESH_HI) + (max_overlaps >= cfg.TRAIN.BG_THRESH_LO) == 2).nonzero().view(-1)

  # Small modification to the original version where we ensure a fixed number of regions are sampled
  if fg_inds.numel() > 0 and bg_inds.numel() > 0:
    fg_rois_per_image = min(fg_rois_per_image, fg_inds.numel())
    fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(fg_rois_per_image), replace=False)).long().cuda()]
    bg_rois_per_image = rois_per_image - fg_rois_per_image
    to_replace = bg_inds.numel() < bg_rois_per_image
    bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(bg_rois_per_image), replace=to_replace)).long().cuda()]
  elif fg_inds.numel() > 0:
    to_replace = fg_inds.numel() < rois_per_image
    fg_inds = fg_inds[torch.from_numpy(npr.choice(np.arange(0, fg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
    fg_rois_per_image = rois_per_image
  elif bg_inds.numel() > 0:
    to_replace = bg_inds.numel() < rois_per_image
    bg_inds = bg_inds[torch.from_numpy(npr.choice(np.arange(0, bg_inds.numel()), size=int(rois_per_image), replace=to_replace)).long().cuda()]
    fg_rois_per_image = 0
  else:
    import pdb
    pdb.set_trace()

  if cfg.DO_PARSING:
    mask_rois = all_rois[fg_inds]#.contiguous()
    #print(mask_rois.size()) (64,5)
    mask_cls_labels = labels[fg_inds]#.contiguous()
    #print(mask_cls_labels.size())
    assert parsing_labels.size(0) == 1
    # parsing_labels (48, 320, 320) -> (48, 1, 28, 28)
    # TODO : parsing_labels = parsing_labels[0][gt_assignment[fg_inds], :, :]
    # gt_assignment只是指定的gt box序号,但是label是对应gtbox的第五个维度,也就是说第1个box的label不一定是1
    # print (gt_assignment.size(), labels.size())
    # print (type(gt_assignment), type(labels))
    parsing_labels = parsing_labels[0][labels.data.long()[fg_inds], :, :]  # batch channel h w  batch == 1
    #print(parsing_labels.size())

    mask_parsing_labels = gen_mask_parsing_labels(parsing_labels, mask_rois)
    #print(mask_parsing_labels.size())
    mask_unit = {}
    mask_unit['mask_rois'] = mask_rois
    mask_unit['mask_cls_labels'] = mask_cls_labels
    mask_unit['mask_parsing_labels'] = mask_parsing_labels
    #print(mask_unit['mask_parsing_labels'].size())

  # The indices that we're selecting (both fg and bg)
  keep_inds = torch.cat([fg_inds, bg_inds], 0)   # 2000个roi中256个被选为fg和bg的索引
  # Select sampled values from various arrays:
  labels = labels[keep_inds].contiguous()  # 被选中roi的cls label (256,)
  # Clamp labels for the background RoIs to 0
  labels[int(fg_rois_per_image):] = 0  # 将背景的label固定为0  (256,)
  if cfg.SUB_CATEGORY:
    sub_labels = sub_labels[keep_inds].contiguous()
    sub_labels[int(fg_rois_per_image):] = 0
  rois = all_rois[keep_inds].contiguous()  # 只留下被选中的roi
  roi_scores = all_scores[keep_inds].contiguous()  # 只留下被选中roi的score(rpn网络预测这个roi为物体的概率)

  # if cfg.DO_PARSING:
  #   mask_unit = {}
  #   mask_unit['mask_rois'] = rois[:int(fg_rois_per_image),...]
  #   mask_unit['mask_cls_labels'] = labels[:int(fg_rois_per_image)]
  #   mask_unit['mask_parsing_labels'] = parsing_labels[0][labels[:int(fg_rois_per_image)], :, :]


  #  把被选择的roi和给它指定的gtbox的坐标和类别 送入_compute_targets
  #  roi的坐标 rois[:, 1:5].data(256, 4)
  #  匹配的gt的坐标 gt_boxes[gt_assignment[keep_inds]][:, :4].data (256, 4)
  #  匹配的类别 labels.data(256,)
  #  返回 (256, 5) 类别和4个回归值

  bbox_target_data = _compute_targets(
  rois[:, 1:5].data, gt_boxes[gt_assignment[keep_inds]][:, :4].data, labels.data)

  bbox_targets, bbox_inside_weights = \
    _get_bbox_regression_labels(bbox_target_data, num_classes)
  if cfg.SUB_CATEGORY:
    if cfg.DO_PARSING:
      return labels, sub_labels, rois, roi_scores, bbox_targets, bbox_inside_weights, mask_unit
    else:
      return labels, sub_labels, rois, roi_scores, bbox_targets, bbox_inside_weights
  else:
    if cfg.DO_PARSING:
      return labels, rois, roi_scores, bbox_targets, bbox_inside_weights, mask_unit
    else:
      return labels, rois, roi_scores, bbox_targets, bbox_inside_weights