Exemplo n.º 1
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, \
                'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in xrange(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = cython_bbox.bbox_overlaps(boxes.astype(np.float),
                                                        gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes' : boxes,
                'gt_classes' : np.zeros((num_boxes,), dtype=np.int32),
                'gt_overlaps' : overlaps,
                'flipped' : False,
                'seg_areas' : np.zeros((num_boxes,), dtype=np.float32),
            })
        return roidb
Exemplo n.º 2
0
def sample_rpn_outputs_wrt_gt_boxes(boxes, scores, gt_boxes, is_training=False, only_positive=False):
    """sample boxes for refined output"""
    boxes, scores, batch_inds = sample_rpn_outputs(boxes, scores, is_training, only_positive)

    if gt_boxes.size > 0:
        overlaps = cython_bbox.bbox_overlaps(
                np.ascontiguousarray(boxes[:, 0:4], dtype=np.float),
                np.ascontiguousarray(gt_boxes[:, 0:4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1) # B
        max_overlaps = overlaps[np.arange(boxes.shape[0]), gt_assignment] # B
        fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0]
        if _DEBUG and np.argmax(overlaps[fg_inds],axis=1).size < gt_boxes.size/5.0:
            print("gt_size")
            print(gt_boxes)
            gt_height = (gt_boxes[:,2]-gt_boxes[:,0])
            gt_width = (gt_boxes[:,3]-gt_boxes[:,1])
            gt_dim = np.vstack((gt_height, gt_width))
            print(np.transpose(gt_dim))
            #print(gt_height)
            #print(gt_width)

            print('SAMPLE: %d after overlaps by %s' % (len(fg_inds),cfg.FLAGS.fg_threshold))
            print("detected object no.")
            print(np.argmax(overlaps[fg_inds],axis=1))
            print("total object")
            print(gt_boxes.size/5.0)

        mask_fg_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]
        if mask_fg_inds.size > cfg.FLAGS.masks_per_image:
            mask_fg_inds = np.random.choice(mask_fg_inds, size=cfg.FLAGS.masks_per_image, replace=False)

        if True:
            gt_argmax_overlaps = overlaps.argmax(axis=0) # G
            fg_inds = np.union1d(gt_argmax_overlaps, fg_inds)

	fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))
      	if fg_inds.size > 0 and fg_rois < fg_inds.size:
       	   fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False)
      	
	# TODO: sampling strategy
      	bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0]
      	bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 8)#64
      	if bg_inds.size > 0 and bg_rois < bg_inds.size:
           bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)

        keep_inds = np.append(fg_inds, bg_inds)
        #print(gt_boxes[np.argmax(overlaps[fg_inds],axis=1),4])
    else:
        bg_inds = np.arange(boxes.shape[0])
        bg_rois = min(int(cfg.FLAGS.rois_per_image * (1-cfg.FLAGS.fg_roi_fraction)), 8)#64
        if bg_rois < bg_inds.size:
            bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)

        keep_inds = bg_inds
        mask_fg_inds = np.arange(0)
    
    return boxes[keep_inds, :], scores[keep_inds], batch_inds[keep_inds],\
           boxes[mask_fg_inds, :], scores[mask_fg_inds], batch_inds[mask_fg_inds]
Exemplo n.º 3
0
def sample_rpn_outputs_wrt_gt_boxes(boxes, scores, gt_boxes, is_training=False, only_positive=False):
    """sample boxes for refined output"""
    boxes, scores, batch_inds = sample_rpn_outputs(boxes, scores, is_training, only_positive)

    if gt_boxes.size > 0:
        overlaps = cython_bbox.bbox_overlaps(
                np.ascontiguousarray(boxes[:, 0:4], dtype=np.float),
                np.ascontiguousarray(gt_boxes[:, 0:4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1) # B
        max_overlaps = overlaps[np.arange(boxes.shape[0]), gt_assignment] # B
        fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0]
        if _DEBUG and np.argmax(overlaps[fg_inds],axis=1).size < gt_boxes.size/5.0:
            print("gt_size")
            print(gt_boxes)
            gt_height = (gt_boxes[:,2]-gt_boxes[:,0])
            gt_width = (gt_boxes[:,3]-gt_boxes[:,1])
            gt_dim = np.vstack((gt_height, gt_width))
            print(np.transpose(gt_dim))
            #print(gt_height)
            #print(gt_width)

            print('SAMPLE: %d after overlaps by %s' % (len(fg_inds),cfg.FLAGS.fg_threshold))
            print("detected object no.")
            print(np.argmax(overlaps[fg_inds],axis=1))
            print("total object")
            print(gt_boxes.size/5.0)

        mask_fg_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]
        if mask_fg_inds.size > cfg.FLAGS.masks_per_image:
            mask_fg_inds = np.random.choice(mask_fg_inds, size=cfg.FLAGS.masks_per_image, replace=False)

        if True:
            gt_argmax_overlaps = overlaps.argmax(axis=0) # G
            fg_inds = np.union1d(gt_argmax_overlaps, fg_inds)

        fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))
        if fg_inds.size > 0 and fg_rois < fg_inds.size:
            fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False)
      	
        # TODO: sampling strategy
      	bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0]
        bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 8)#64
        if bg_inds.size > 0 and bg_rois < bg_inds.size:
           bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)

        keep_inds = np.append(fg_inds, bg_inds)
        #print(gt_boxes[np.argmax(overlaps[fg_inds],axis=1),4])
    else:
        bg_inds = np.arange(boxes.shape[0])
        bg_rois = min(int(cfg.FLAGS.rois_per_image * (1-cfg.FLAGS.fg_roi_fraction)), 8)#64
        if bg_rois < bg_inds.size:
            bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)

        keep_inds = bg_inds
        mask_fg_inds = np.arange(0)
    
    return boxes[keep_inds, :], scores[keep_inds], batch_inds[keep_inds],\
           boxes[mask_fg_inds, :], scores[mask_fg_inds], batch_inds[mask_fg_inds]
Exemplo n.º 4
0
def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width):
    """Encode masks groundtruth into learnable targets
  Sample some exmaples
  
  Params
  ------
  gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw)
  gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class]
  rois:     the bounding boxes of shape (N, 4),
  ## scores:   scores of shape (N, 1)
  num_classes; K
  mask_height, mask_width: height and width of output masks
  
  Returns
  -------
  # rois: boxes sampled for cropping masks, of shape (M, 4)
  labels: class-ids of shape (M, 1)
  mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values
  mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1} indicating which mask is sampled
  """
    total_masks = rois.shape[0]
    # B x G
    overlaps = cython_bbox.bbox_overlaps(
        np.ascontiguousarray(rois[:, 0:4], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)  # shape is N
    max_overlaps = overlaps[np.arange(len(gt_assignment)), gt_assignment]  # N
    labels = gt_boxes[gt_assignment, 4]  # N

    # sample positive rois which intersection is more than 0.5
    keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]
    num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image))
    if keep_inds.size > 0:
        keep_inds = np.random.choice(keep_inds, size=num_masks, replace=False)
        LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\
                     %(num_masks, rois.shape[0], gt_masks.shape[0]))

    # rois = rois[inds]
    # labels = labels[inds].astype(np.int32)
    # gt_assignment = gt_assignment[inds]

    mask_targets = np.zeros(
        (total_masks, mask_height, mask_width, num_classes), dtype=np.int32)
    mask_inside_weights = np.zeros(
        (total_masks, mask_height, mask_width, num_classes), dtype=np.float32)

    # TODO: speed bottleneck?
    for i in keep_inds:
        roi = rois[i, :4]
        cropped = gt_masks[gt_assignment[i],
                           int(roi[1]):int(roi[3]) + 1,
                           int(roi[0]):int(roi[2]) + 1]
        cropped = cv2.resize(cropped, (mask_width, mask_height),
                             interpolation=cv2.INTER_NEAREST)

        mask_targets[i, :, :, int(labels[i])] = cropped
        mask_inside_weights[i, :, :, int(labels[i])] = 1
    return labels, mask_targets, mask_inside_weights
Exemplo n.º 5
0
def encode(gt_boxes, all_anchors):
    """

    :param gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class]
    :param all_anchors: an array of shape (h, w, A, 4)
    :return: labels: (N x 1) array in [-1, num_classes], negative labels are ignored
    bbox_targets: (N x 4) regression targets
    bbox_inside_weights: (N x 4), in {0, 1} indicating to which class is assigned
    """

    all_anchors = all_anchors.reshape([-1, 4])
    anchors = all_anchors
    total_anchors = all_anchors.shape[0]
    bbox_flags_ = np.zeros([total_anchors], dtype=np.int32)

    if gt_boxes.size > 0:
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))

        # (A)
        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(total_anchors), gt_assignment]

        # (G)
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        # Add Mask.
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        # 0 - background, 1 - foreground, -1 - ignore
        labels = gt_boxes[gt_assignment, 4]
        labels[max_overlaps < cfg.rpn_bg_threshold] = 0
        # ignore rpn_bg_threshold <= max_overlaps < rpn_fg_threshold
        labels[np.logical_and(max_overlaps < cfg.rpn_fg_threshold,
                              max_overlaps >= cfg.rpn_bg_threshold)] = -1
        bbox_flags_[max_overlaps >= 0.5] = 1

        labels[gt_argmax_overlaps] = gt_boxes[
            gt_assignment[gt_argmax_overlaps], 4]

        if cfg.rpn_clobber_positives:
            labels[max_overlaps < cfg.rpn_bg_threshold] = 0
        bbox_flags_[labels >= 1] = 1

        if _DEBUG:
            pass

        ignored_inds = np.where(gt_boxes[:, -1] < 0)[0]
        if ignored_inds.size > 0:
            ignored_areas = gt_boxes[ignored_inds, :]
            intersecs = cython_bbox.bbox_intersections(np.ascontiguousarray(),
                                                       np.ascontiguousarray())
Exemplo n.º 6
0
def sample_rpn_outputs_wrt_gt_boxes(boxes,
                                    scores,
                                    gt_boxes,
                                    is_training=False,
                                    only_positive=False):
    """sample boxes for refined output"""
    boxes, scores, batch_inds = sample_rpn_outputs(boxes, scores, is_training,
                                                   only_positive)

    if gt_boxes.size > 0:
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(boxes[:, 0:4], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, 0:4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)  # B
        max_overlaps = overlaps[np.arange(boxes.shape[0]), gt_assignment]  # B
        fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0]

        mask_fg_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]
        if mask_fg_inds.size > cfg.FLAGS.masks_per_image:
            mask_fg_inds = np.random.choice(mask_fg_inds,
                                            size=cfg.FLAGS.masks_per_image,
                                            replace=False)

        if True:
            gt_argmax_overlaps = overlaps.argmax(axis=0)  # G
            fg_inds = np.union1d(gt_argmax_overlaps, fg_inds)

        fg_rois = int(
            min(fg_inds.size,
                cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))
        if fg_inds.size > 0 and fg_rois < fg_inds.size:
            fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False)

# TODO: sampling strategy
        bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0]
        bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64)
        if bg_inds.size > 0 and bg_rois < bg_inds.size:
            bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)

        keep_inds = np.append(fg_inds, bg_inds)
    else:
        bg_inds = np.arange(boxes.shape[0])
        bg_rois = min(
            int(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction)),
            64)
        if bg_rois < bg_inds.size:
            bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)

        keep_inds = bg_inds
        mask_fg_inds = np.arange(0)

    return boxes[keep_inds, :], scores[keep_inds], batch_inds[keep_inds],\
           boxes[mask_fg_inds, :], scores[mask_fg_inds], batch_inds[mask_fg_inds]
Exemplo n.º 7
0
def encode(gt_boxes, rois, num_classes):
    """Matching and Encoding groundtruth boxes (gt_boxes) into learning targets to boxes
  Sampling
  Parameters
  ---------
  gt_boxes an array of shape (G x 5), [x1, y1, x2, y2, class]
  gt_classes an array of shape (G x 1), each value is in [0, num_classes]
  rois an array of shape (R x 4), [x1, y1, x2, y2]
  
  Returns
  --------
  labels: Nx1 array in [0, num_classes)
  # rois:   Sampled rois of shape (N, 4)
  bbox_targets: N x (Kx4) regression targets
  bbox_inside_weights: N x (Kx4), in {0, 1} indicating which class is assigned.
  """

    all_rois = rois
    num_rois = rois.shape[0]
    # R x G matrix
    overlaps = cython_bbox.bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 0:4], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)  # R
    # max_overlaps = overlaps.max(axis=1)      # R
    max_overlaps = overlaps[np.arange(rois.shape[0]), gt_assignment]
    labels = gt_boxes[gt_assignment, 4]

    # sample rois as to 1:3
    fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0]
    fg_rois = int(
        min(fg_inds.size,
            cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))
    if fg_inds.size > 0:
        fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False)
    # print(fg_rois)

    bg_rois = cfg.FLAGS.rois_per_image - fg_rois
    bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0]
    labels[bg_inds] = 0
    # print(bg_rois)
    if bg_inds.size > 0 and bg_rois < bg_inds.size:
        bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)

    keep_inds = np.append(fg_inds, bg_inds)

    bbox_targets, bbox_inside_weights = _compute_targets(
        rois[keep_inds, 0:4], gt_boxes[gt_assignment[keep_inds], :4], labels,
        num_classes)
    bbox_targets = _unmap(bbox_targets, num_rois, keep_inds, 0)
    bbox_inside_weights = _unmap(bbox_inside_weights, num_rois, keep_inds, 0)

    return labels, bbox_targets, bbox_inside_weights
Exemplo n.º 8
0
def sample_rpn_outputs_wrt_gt_boxes(boxes, scores, gt_boxes, is_training=False, only_positive=False):
    """sample boxes for refined output"""
    boxes, scores, batch_inds = sample_rpn_outputs(boxes, scores, is_training, only_positive)

    if gt_boxes.size > 0:
        overlaps = cython_bbox.bbox_overlaps(
                np.ascontiguousarray(boxes[:, 0:4], dtype=np.float),
                np.ascontiguousarray(gt_boxes[:, 0:4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1) # B
        max_overlaps = overlaps[np.arange(boxes.shape[0]), gt_assignment] # B
        fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0]

        mask_fg_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]
        if mask_fg_inds.size > cfg.FLAGS.masks_per_image:
            mask_fg_inds = np.random.choice(mask_fg_inds, size=cfg.FLAGS.masks_per_image, replace=False)

        if True:
            gt_argmax_overlaps = overlaps.argmax(axis=0) # G
            fg_inds = np.union1d(gt_argmax_overlaps, fg_inds)

	fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))
      	if fg_inds.size > 0 and fg_rois < fg_inds.size:
       	   fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False)
      	
	# TODO: sampling strategy
      	bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0]
      	bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64)
      	if bg_inds.size > 0 and bg_rois < bg_inds.size:
           bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)

	keep_inds = np.append(fg_inds, bg_inds)
    else:
        bg_inds = np.arange(boxes.shape[0])
        bg_rois = min(int(cfg.FLAGS.rois_per_image * (1-cfg.FLAGS.fg_roi_fraction)), 64)
        if bg_rois < bg_inds.size:
            bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)

        keep_inds = bg_inds
        mask_fg_inds = np.arange(0)
    
    return boxes[keep_inds, :], scores[keep_inds], batch_inds[keep_inds],\
           boxes[mask_fg_inds, :], scores[mask_fg_inds], batch_inds[mask_fg_inds]
Exemplo n.º 9
0
def sample_rois(boxes,
                image_inds,
                gt_boxes_list,
                fg_overlap_threshold=0.5,
                rois_per_image=512,
                fg_fraction=0.25,
                ignore_threshold=0.2):
    """filter out ignored areas and keep the fg/bg ratio at 1:3"""
    boxes_np = boxes.data.cpu().numpy() if boxes.is_cuda else boxes.data.numpy(
    )
    image_inds_np = image_inds.data.cpu().numpy(
    ) if image_inds.is_cuda else image_inds.data.numpy()

    num_boxes = boxes_np.shape[0]
    assert num_boxes == image_inds_np.size
    sampled_boxes = []
    sampled_probs = []
    sampled_labels = []
    sampled_image_inds = []
    batch_size = len(gt_boxes_list)

    for i, gt_boxes in enumerate(gt_boxes_list):
        boxes_im = boxes_np[image_inds_np == i]
        image_inds_im = image_inds_np[image_inds_np == i]

        keep_inds = filter_boxes(boxes_im)
        boxes_im = boxes_im[keep_inds]
        image_inds_im = image_inds_im[keep_inds]

        num_boxes_im = boxes_im.shape[0]
        labels = np.zeros((boxes_im.shape[0], ), dtype=np.int64)

        # TODO: what if is no gt_boxes
        if gt_boxes.size > 0:
            # B x G
            overlaps = cython_bbox.bbox_overlaps(
                np.ascontiguousarray(boxes_im, dtype=np.float),
                np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))

            gt_assignment = overlaps.argmax(axis=1)  # (B)
            max_overlaps = overlaps[np.arange(num_boxes_im), gt_assignment]

            labels[:] = gt_boxes[gt_assignment, 4]
            labels[max_overlaps < fg_overlap_threshold] = 0

            # ignoring areas
            ignored_mask = gt_boxes[:, 4] <= 0
            if np.any(ignored_mask):
                ignored_areas = gt_boxes[ignored_mask]
                ignored = cython_bbox.bbox_exclude_ignored_areas(
                    np.ascontiguousarray(boxes_im, dtype=np.float),
                    np.ascontiguousarray(ignored_areas[:, :4], dtype=np.float),
                    ignore_threshold)
                labels[ignored == 1] = -1

            # add ground-thruth boxes
            if True:
                valid_inds = np.where(gt_boxes[:, 4] > 0)[0]
                gb = gt_boxes[valid_inds][:, :4].astype(np.float32)
                gb = jitter_boxes(gb)
                cls = gt_boxes[valid_inds][:, 4].astype(np.int64)
                boxes_im = np.concatenate((boxes_im, gb), axis=0)
                labels = np.concatenate((labels, cls), axis=0)
                assert labels.shape[0] == boxes_im.shape[0]

                gn = gb.shape[0]

                new_inds = np.zeros((gn, ), dtype=image_inds_im.dtype) + i
                image_inds_im = np.concatenate((image_inds_im, new_inds),
                                               axis=0)
        else:
            labels = np.zeros((boxes_im.shape[0], ), dtype=np.float32)

        sampled_boxes.append(boxes_im[labels >= 0])
        sampled_labels.append(labels[labels >= 0])
        sampled_image_inds.append(image_inds_im[labels >= 0])

    sampled_boxes = np.concatenate(sampled_boxes, axis=0)
    sampled_labels = np.concatenate(sampled_labels, axis=0).astype(np.int64)
    sampled_image_inds = np.concatenate(sampled_image_inds,
                                        axis=0).astype(np.int64)

    # sampling
    bg_inds = np.where(sampled_labels == 0)[0]
    fg_inds = np.where(sampled_labels > 0)[0]
    # num_fg = min(fg_inds.size, 64)
    # if fg_inds.size > 0:
    #     fg_inds = np.random.choice(fg_inds, num_fg)
    if False:
        # sample all foregrounds
        num_fg = fg_inds.size
        num_bg = max(min(3 * num_fg, bg_inds.size), 16)
        if bg_inds.size > 0:
            bg_inds = np.random.choice(bg_inds, num_bg)
        keep_inds = np.append(fg_inds, bg_inds)
    else:
        # faster rcnn sampling
        num_fg = min(fg_inds.size,
                     int(fg_fraction * rois_per_image * batch_size))
        if num_fg > 0:
            fg_inds = np.random.choice(fg_inds, num_fg, replace=False)
        num_bg = rois_per_image * batch_size - num_fg
        num_bg = min(num_bg, bg_inds.size)
        if bg_inds.size > 0:
            bg_inds = np.random.choice(bg_inds, num_bg, replace=False)
        keep_inds = np.append(fg_inds, bg_inds)

    sampled_labels = sampled_labels[keep_inds]
    sampled_boxes = sampled_boxes[keep_inds]
    sampled_image_inds = sampled_image_inds[keep_inds]

    # Guard against the case no sampled rois
    if sampled_labels.size == 0:
        sampled_boxes = boxes_np[:1, :]
        sampled_labels = np.array([-1], dtype=np.int64)
        sampled_image_inds = image_inds_np[:1].astype(np.int64)

    if boxes.is_cuda:
        return torch.from_numpy(sampled_boxes).cuda(), \
               torch.from_numpy(sampled_labels).cuda(), \
               torch.from_numpy(sampled_image_inds).cuda()
    return torch.from_numpy(sampled_boxes), \
           torch.from_numpy(sampled_labels), \
           torch.from_numpy(sampled_image_inds)
Exemplo n.º 10
0
def encode(gt_boxes, all_anchors, height, width, stride):
    """Matching and Encoding groundtruth into learning targets
  Sampling
  
  Parameters
  ---------
  gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class]
  all_anchors: an array of shape (h, w, A, 4),
  width: width of feature
  height: height of feature
  stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32]
  Returns
  --------
  labels:   Nx1 array in [0, num_classes]
  bbox_targets: N x (4) regression targets
  bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned.
  """
    # TODO: speedup this module
    # if all_anchors is None:
    #   all_anchors = anchors_plane(height, width, stride=stride)

    # # anchors, inds_inside, total_anchors
    # border = cfg.FLAGS.allow_border
    # all_anchors = all_anchors.reshape((-1, 4))
    # inds_inside = np.where(
    #   (all_anchors[:, 0] >= -border) &
    #   (all_anchors[:, 1] >= -border) &
    #   (all_anchors[:, 2] < (width * stride) + border) &
    #   (all_anchors[:, 3] < (height * stride) + border))[0]
    # anchors = all_anchors[inds_inside, :]
    all_anchors = all_anchors.reshape([-1, 4])
    anchors = all_anchors
    total_anchors = all_anchors.shape[0]

    # labels = np.zeros((anchors.shape[0], ), dtype=np.float32)
    labels = np.empty((anchors.shape[0], ), dtype=np.float32)
    labels.fill(-1)

    if gt_boxes.size > 0:
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))

        # if _DEBUG:
        #     print ('gt_boxes shape: ', gt_boxes.shape)
        #     print ('anchors shape: ', anchors.shape)
        #     print ('overlaps shape: ', overlaps.shape)

        gt_assignment = overlaps.argmax(axis=1)  # (A)
        max_overlaps = overlaps[np.arange(total_anchors), gt_assignment]
        gt_argmax_overlaps = overlaps.argmax(axis=0)  # G
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]

        labels[max_overlaps < cfg.FLAGS.rpn_bg_threshold] = 0

        if True:
            # this is sentive to boxes of little overlaps, no need!
            # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

            # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps
            labels[gt_argmax_overlaps] = 1

            # exclude examples with little overlaps
            # added later
            # excludes = np.where(gt_max_overlaps < cfg.FLAGS.bg_threshold)[0]
            # labels[gt_argmax_overlaps[excludes]] = -1

            if _DEBUG:
                min_ov = np.min(gt_max_overlaps)
                max_ov = np.max(gt_max_overlaps)
                mean_ov = np.mean(gt_max_overlaps)
                if min_ov < cfg.FLAGS.bg_threshold:
                    LOG('ANCHOREncoder: overlaps: (min %.3f mean:%.3f max:%.3f), stride: %d, shape:(h:%d, w:%d)'
                        % (min_ov, mean_ov, max_ov, stride, height, width))
                    worst = gt_boxes[np.argmin(gt_max_overlaps)]
                    anc = anchors[
                        gt_argmax_overlaps[np.argmin(gt_max_overlaps)], :]
                    LOG('ANCHOREncoder: worst case: overlap: %.3f, box:(%.1f, %.1f, %.1f, %.1f %d), anchor:(%.1f, %.1f, %.1f, %.1f)'
                        % (min_ov, worst[0], worst[1], worst[2], worst[3],
                           worst[4], anc[0], anc[1], anc[2], anc[3]))

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.FLAGS.rpn_fg_threshold] = 1
        # print (np.min(labels), np.max(labels))

        # subsample positive labels if there are too many
        num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = np.random.choice(fg_inds,
                                            size=(len(fg_inds) - num_fg),
                                            replace=False)
            labels[disable_inds] = -1
    else:
        # if there is no gt
        labels[:] = 0

    # TODO: mild hard negative mining
    # subsample negative labels if there are too many
    num_fg = np.sum(labels == 1)
    num_bg = max(min(cfg.FLAGS.rpn_batch_size - num_fg, num_fg * 3), 8)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = np.random.choice(bg_inds,
                                        size=(len(bg_inds) - num_bg),
                                        replace=False)
        labels[disable_inds] = -1

    bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :])
    bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = 0.1

    # # mapping to whole outputs
    # labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    # bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    # bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)

    labels = labels.reshape((1, height, width, -1))
    bbox_targets = bbox_targets.reshape((1, height, width, -1))
    bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, -1))

    return labels, bbox_targets, bbox_inside_weights
Exemplo n.º 11
0
def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width):
    """Encode masks groundtruth into learnable targets
  Sample some exmaples
  
  Params
  ------
  gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw)
  #actually modified by me, gt_mask is of shape (G,imh,imw,7)
  gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class]
  rois:     the bounding boxes of shape (N, 4),
  ## scores:   scores of shape (N, 1)
  num_classes; K
  mask_height, mask_width: height and width of output masks
  
  Returns
  -------
  # rois: boxes sampled for cropping masks, of shape (M, 4)
  labels: class-ids of shape (M, 1)
  mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values
  mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1}Í indicating which mask is sampled
  """
    total_masks = rois.shape[0]
    if gt_boxes.size > 0:
        # B x G
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(rois[:, 0:4], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)  # shape is N
        max_overlaps = overlaps[np.arange(len(gt_assignment)),
                                gt_assignment]  # N
        # note: this will assign every rois with a positive label
        # labels = gt_boxes[gt_assignment, 4] # N
        labels = np.zeros((total_masks, ), np.float32)
        labels[:] = -1

        # sample positive rois which intersection is more than 0.5
        keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]
        num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image))
        if keep_inds.size > 0 and num_masks < keep_inds.size:
            keep_inds = np.random.choice(keep_inds,
                                         size=num_masks,
                                         replace=False)
            LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\
                         %(num_masks, rois.shape[0], gt_masks.shape[0]))

        labels[keep_inds] = gt_boxes[gt_assignment[keep_inds], -1]

        # rois = rois[inds]
        # labels = labels[inds].astype(np.int32)
        # gt_assignment = gt_assignment[inds]

        # ignore rois with overlaps between fg_threshold and bg_threshold
        # mask are only defined on positive rois
        ignore_inds = np.where((max_overlaps < cfg.FLAGS.fg_threshold))[0]
        labels[ignore_inds] = -1

        mask_targets = np.zeros(
            (total_masks, mask_height, mask_width, num_classes),
            dtype=np.int32)
        mask_inside_weights = np.zeros(
            (total_masks, mask_height, mask_width, num_classes),
            dtype=np.float32)
        rois[rois < 0] = 0

        # TODO: speed bottleneck?
        for i in keep_inds:
            roi = rois[i, :4]

            for x in range(7):
                cropped = gt_masks[gt_assignment[i],
                                   int(roi[1]):int(roi[3]) + 1,
                                   int(roi[0]):int(roi[2]) + 1, x]
                cropped = cv2.resize(cropped, (mask_width, mask_height),
                                     interpolation=cv2.INTER_NEAREST)
                mask_targets[i, :, :, x] = cropped
                mask_inside_weights[i, :, :, x] = 1
    else:
        # there is no gt
        labels = np.zeros((total_masks, ), np.float32)
        labels[:] = -1
        mask_targets = np.zeros(
            (total_masks, mask_height, mask_width, num_classes),
            dtype=np.int32)
        mask_inside_weights = np.zeros(
            (total_masks, mask_height, mask_height, num_classes),
            dtype=np.float32)
    #np.save("/home/czurini/Alex/rois.npy",rois)
    #                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     np.save("/home/czurini/Alex/mask_targets.npy",mask_targets)
    return labels, mask_targets, mask_inside_weights
Exemplo n.º 12
0
def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width):
  """Encode masks groundtruth into learnable targets
  Sample some exmaples
  
  Params
  ------
  gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw)
  gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class]
  rois:     the bounding boxes of shape (N, 4),
  ## scores:   scores of shape (N, 1)
  num_classes; K
  mask_height, mask_width: height and width of output masks
  
  Returns
  -------
  # rois: boxes sampled for cropping masks, of shape (M, 4)
  labels: class-ids of shape (M, 1)
  mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values
  mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1}Í indicating which mask is sampled
  """
  total_masks = rois.shape[0]
  if gt_boxes.size > 0: 
      # B x G
      overlaps = cython_bbox.bbox_overlaps(
          np.ascontiguousarray(rois[:, 0:4], dtype=np.float),
          np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
      gt_assignment = overlaps.argmax(axis=1)  # shape is N
      max_overlaps = overlaps[np.arange(len(gt_assignment)), gt_assignment] # N
      # note: this will assign every rois with a positive label 
      # labels = gt_boxes[gt_assignment, 4] # N
      labels = np.zeros((total_masks, ), np.float32)
      labels[:] = -1

      # sample positive rois which intersection is more than 0.5
      keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]
      num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image))
      if keep_inds.size > 0 and num_masks < keep_inds.size:
        keep_inds = np.random.choice(keep_inds, size=num_masks, replace=False)
        LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\
                     %(num_masks, rois.shape[0], gt_masks.shape[0]))

      labels[keep_inds] = gt_boxes[gt_assignment[keep_inds], -1]
        
      # rois = rois[inds]
      # labels = labels[inds].astype(np.int32)
      # gt_assignment = gt_assignment[inds]

      # ignore rois with overlaps between fg_threshold and bg_threshold 
      # mask are only defined on positive rois
      ignore_inds = np.where((max_overlaps < cfg.FLAGS.fg_threshold))[0]
      labels[ignore_inds] = -1 

      mask_targets = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.int32)
      mask_inside_weights = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.float32)
      rois [rois < 0] = 0
      
      # TODO: speed bottleneck?
      for i in keep_inds:
        roi = rois[i, :4]
        cropped = gt_masks[gt_assignment[i], int(roi[1]):int(roi[3])+1, int(roi[0]):int(roi[2])+1]
        cropped = cv2.resize(cropped, (mask_width, mask_height), interpolation=cv2.INTER_NEAREST)
        
        mask_targets[i, :, :, int(labels[i])] = cropped
        mask_inside_weights[i, :, :, int(labels[i])] = 1
  else:
      # there is no gt
      labels = np.zeros((total_masks, ), np.float32)
      labels[:] = -1
      mask_targets = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.int32)
      mask_inside_weights = np.zeros((total_masks, mask_height, mask_height, num_classes), dtype=np.float32)
  return labels, mask_targets, mask_inside_weights
Exemplo n.º 13
0
def encode(gt_boxes, all_anchors):
    """Single Shot
    Sampling

    Parameters
    ---------
    gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class]
    all_anchors: an array of shape (h, w, A, 4),
    Returns
    --------
    labels:   Nx1 array in [-1, num_classes], negative labels are ignored
    bbox_targets: N x (4) regression targets
    bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned.
    """

    all_anchors = all_anchors.reshape([-1, 4])
    anchors = all_anchors
    total_anchors = all_anchors.shape[0]
    bbox_flags_ = np.zeros([total_anchors], dtype=np.int32)

    if gt_boxes.size > 0:
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))

        gt_assignment = overlaps.argmax(axis=1)  # (A)
        max_overlaps = overlaps[np.arange(total_anchors), gt_assignment]
        gt_argmax_overlaps = overlaps.argmax(axis=0)  # (G)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        labels = gt_boxes[gt_assignment, 4]
        labels[max_overlaps < cfg.rpn_bg_threshold] = 0
        labels[np.logical_and(max_overlaps < cfg.rpn_fg_threshold,
                              max_overlaps >= cfg.rpn_bg_threshold)] = -1
        bbox_flags_[max_overlaps >= 0.5] = 1

        # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps
        labels[gt_argmax_overlaps] = gt_boxes[
            gt_assignment[gt_argmax_overlaps], 4]
        # bbox_flags_[gt_argmax_overlaps] = 1

        # if clobber positive: there may exist some positive objs (jaccard overlap < bg_th) that are not assigned to any anchors
        if cfg.rpn_clobber_positives:
            labels[max_overlaps < cfg.rpn_bg_threshold] = 0
        bbox_flags_[labels >= 1] = 1

        if _DEBUG:
            min_ov = np.min(gt_max_overlaps)
            max_ov = np.max(gt_max_overlaps)
            mean_ov = np.mean(gt_max_overlaps)
            if min_ov < cfg.rpn_bg_threshold:
                LOG('ANCHORSS: overlaps: (min %.3f mean:%.3f max:%.3f)' %
                    (min_ov, mean_ov, max_ov))
                worst = gt_boxes[np.argmin(gt_max_overlaps)]
                anc = anchors[
                    gt_argmax_overlaps[np.argmin(gt_max_overlaps)], :]
                LOG('ANCHORSS: worst overlap:%.3f, box:(%.1f, %.1f, %.1f, %.1f %d), anchor:(%.1f, %.1f, %.1f, %.1f)'
                    % (min_ov, worst[0], worst[1], worst[2], worst[3],
                       worst[4], anc[0], anc[1], anc[2], anc[3]))

        ## handle ignored regions (the gt_class of crowd boxes is set to -1)
        ignored_inds = np.where(gt_boxes[:, -1] < 0)[0]
        if ignored_inds.size > 0:
            ignored_areas = gt_boxes[ignored_inds, :]
            # intersec shape is D x A
            intersecs = cython_bbox.bbox_intersections(
                np.ascontiguousarray(ignored_areas, dtype=np.float),
                np.ascontiguousarray(anchors, dtype=np.float))
            intersecs_ = intersecs.sum(axis=0)  # A x 1
            labels[intersecs_ > cfg.ignored_area_intersection_fraction] = -1
            bbox_flags_[
                intersecs_ > cfg.ignored_area_intersection_fraction] = 0

    else:
        # if there is no gt
        labels = np.zeros([total_anchors], dtype=np.float32)

    label_weights = np.zeros((total_anchors, ), dtype=np.float32)

    if cfg.rpn_sample_strategy == 'traditional':
        """subsample positive labels if there are too many, inherited from fastrcnn"""
        num_fg = int(cfg.rpn_fg_fraction * cfg.rpn_batch_size)
        fg_inds = np.where(labels >= 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = np.random.choice(fg_inds,
                                            size=(len(fg_inds) - num_fg),
                                            replace=False)
            labels[disable_inds] = -1
        else:
            num_fg = len(fg_inds)
        # subsample negative labels if there are too many
        num_bg = max(min(cfg.rpn_batch_size - num_fg, num_fg * 5), 128)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = np.random.choice(bg_inds,
                                            size=(len(bg_inds) - num_bg),
                                            replace=False)
            labels[disable_inds] = -1

    elif cfg.rpn_sample_strategy == 'simple':
        """using label_weights to balance example losses"""
        fg_inds = np.where(labels >= 1)[0]
        num_fg = len(fg_inds)
        label_weights[fg_inds] = 1.0
        bg_inds = np.where(labels == 0)[0]
        num_bg = len(bg_inds)
        label_weights[bg_inds] = 3 * max(num_fg, 1.0) / max(
            max(num_bg, num_fg), 1.0)

    elif cfg.rpn_sample_strategy == 'advanced':
        """no implemented yet"""
        # deal with ignored lables?
    else:
        raise ValueError(
            'RPN sample strategy %s has not been implemented yet' %
            cfg.rpn_sample_strategy)

    # if True: # person only
    #     nonperson_inds = np.where(np.logical_and(labels != 1, labels != -1))[0]
    #     labels[nonperson_inds] = 0
    #     label_weights[nonperson_inds] = 0
    #     kept_inds = np.random.choice(nonperson_inds, size=(1000), replace=False)
    #     label_weights[kept_inds] = 0.02

    bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :])
    bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32)
    # bbox_inside_weights[labels >= 1, :] = np.asarray(cfg.bbweights, dtype=np.float32)
    bbox_inside_weights[bbox_flags_ == 1, :] = np.asarray(cfg.bbweights,
                                                          dtype=np.float32)

    labels = labels.reshape((-1, ))
    bbox_targets = bbox_targets.reshape((-1, 4))
    bbox_inside_weights = bbox_inside_weights.reshape((-1, 4))

    return labels.astype(np.float32), label_weights, bbox_targets.astype(
        np.float32), bbox_inside_weights.astype(np.float32)
Exemplo n.º 14
0
    gt_boxes = [
        [121, 120, 140, 150, 0],
        [100, 100, 120, 130, 0],
        [121, 120, 140, 150, -1],
        [100, 100, 120, 130, -1],
        [1, 1, 8, 8, 3],
        [13, 10, 23, 20, 4],
    ]
    gt_batch_inds = [0, 0, 1, 1, 1, 1]
    rois = np.asarray(rois, dtype=np.float32)
    gt_boxes = np.asarray(gt_boxes, dtype=np.float32)
    gt_batch_inds = np.asarray(gt_batch_inds, dtype=np.float32)
    roi_batch_inds = np.asarray(roi_batch_inds, dtype=np.float32)

    overlaps = cython_bbox.bbox_overlaps(
        np.ascontiguousarray(rois[:, :4], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))

    gt_assignment = overlaps.argmax(axis=1)  # (R)
    max_overlaps = overlaps[np.arange(rois.shape[0], ), gt_assignment]

    rois = Variable(torch.from_numpy(rois)).cuda()
    gt_boxes = Variable(torch.from_numpy(gt_boxes)).cuda()
    gt_batch_inds = Variable(torch.from_numpy(gt_batch_inds)).cuda().long()
    roi_batch_inds = Variable(torch.from_numpy(roi_batch_inds)).cuda().long()

    roi_target = RoITarget(0.55, box_encoding='fastrcnn')
    labels, deltas, bbwght = roi_target(rois, roi_batch_inds, gt_boxes, gt_batch_inds)

    labels.cpu()
    print(labels.size(), deltas.size(), bbwght.size())
Exemplo n.º 15
0
def sample_rpn_outputs_wrt_gt_boxes(boxes,
                                    scores,
                                    gt_boxes,
                                    is_training=False,
                                    only_positive=False):
    """sample boxes using RPN scores 
    only_positive: Flag to exclude bbox with RPN score less than 0.5
    with_nms: Flag to use NMS
    """
    boxes, scores, batch_inds = sample_rpn_outputs(boxes,
                                                   scores,
                                                   is_training=is_training,
                                                   only_positive=only_positive,
                                                   with_nms=True)

    if gt_boxes.size > 0 and boxes.size > 0:
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(boxes[:, 0:4], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, 0:4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)  # B
        max_overlaps = overlaps[np.arange(boxes.shape[0]), gt_assignment]  # B

        ## rcnn foreground bbox with high overlap
        fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0]
        ## rcnn foreground bbox with highest overlap area on gt
        gt_argmax_overlaps = overlaps.argmax(axis=0)  # G

        fg_inds = np.union1d(gt_argmax_overlaps, fg_inds)

        ## mask foreground bbox with high overlap
        mask_fg_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]

        ## limit mask foreground bbox
        if mask_fg_inds.size > cfg.FLAGS.masks_per_image:
            mask_fg_inds = np.random.choice(mask_fg_inds,
                                            size=cfg.FLAGS.masks_per_image,
                                            replace=False)

        ## limit rcnn foreground bbox
        fg_rois = int(
            min(fg_inds.size,
                cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))
        if fg_inds.size > 0 and fg_rois < fg_inds.size:
            fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False)

        ## limit rcnn background bbox
        ## TODO: sampling strategy
        bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0]
        bg_rois = int(
            max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 8)
        )  #cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))#128
        if bg_inds.size > 0 and bg_rois < bg_inds.size:
            bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)
        keep_inds = np.append(fg_inds, bg_inds)

        ## quick fix for mask foreground is null
        if mask_fg_inds.size is 0:
            mask_fg_inds = keep_inds
    else:
        bg_inds = np.arange(boxes.shape[0])
        bg_rois = int(
            min(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction), 8)
        )  # cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))#128
        if bg_rois < bg_inds.size:
            bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)

        keep_inds = bg_inds
        mask_fg_inds = bg_inds

    return boxes[keep_inds, :], scores[keep_inds], batch_inds[keep_inds], \
           boxes[mask_fg_inds, :], scores[mask_fg_inds], batch_inds[mask_fg_inds]
Exemplo n.º 16
0
def encode(gt_boxes,
           all_anchors,
           feature_height,
           feature_width,
           stride,
           image_height,
           image_width,
           ignore_cross_boundary=True):
    """Matching and Encoding groundtruth into learning targets
    Sampling
    
    Parameters
    ---------
    gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class]
    all_anchors: an array of shape (h, w, A, 4),
    feature_height: height of feature
    feature_width: width of feature
    image_height: height of image
    image_width: width of image
    stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32]
    Returns
    --------
    labels:   Nx1 array in [0, num_classes]
    bbox_targets: N x (4) regression targets
    bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned.
    """
    # TODO: speedup this module
    allow_border = cfg.FLAGS.allow_border
    all_anchors = all_anchors.reshape([-1, 4])
    total_anchors = all_anchors.shape[0]

    labels = np.empty((total_anchors, ), dtype=np.int32)
    labels.fill(-1)

    jittered_gt_boxes = jitter_gt_boxes(gt_boxes[:, :4])
    clipped_gt_boxes = clip_boxes(jittered_gt_boxes,
                                  (image_height, image_width))

    if gt_boxes.size > 0:
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(all_anchors, dtype=np.float),
            np.ascontiguousarray(clipped_gt_boxes, dtype=np.float))

        gt_assignment = overlaps.argmax(axis=1)  # (A)
        max_overlaps = overlaps[np.arange(total_anchors), gt_assignment]
        gt_argmax_overlaps = overlaps.argmax(axis=0)  # G
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]

        # bg label: less than threshold IOU
        labels[max_overlaps < cfg.FLAGS.rpn_bg_threshold] = 0
        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.FLAGS.rpn_fg_threshold] = 1

        # ignore cross-boundary anchors
        if ignore_cross_boundary is True:
            cb_inds = _get_cross_boundary(all_anchors, image_height,
                                          image_width, allow_border)
            labels[cb_inds] = -1

        # this is sentive to boxes of little overlaps, use with caution!
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
        # fg label: for each gt, hard-assign anchor with highest overlap despite its overlaps
        labels[gt_argmax_overlaps] = 1

        # subsample positive labels if there are too many
        num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = np.random.choice(fg_inds,
                                            size=(len(fg_inds) - num_fg),
                                            replace=False)
            labels[disable_inds] = -1
    else:
        # if there is no gt
        labels[:] = 0

    # TODO: mild hard negative mining
    # subsample negative labels if there are too many
    num_fg = np.sum(labels == 1)
    num_bg = max(min(cfg.FLAGS.rpn_batch_size - num_fg, num_fg * 3), 8)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = np.random.choice(bg_inds,
                                        size=(len(bg_inds) - num_bg),
                                        replace=False)
        labels[disable_inds] = -1

    bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets = _compute_targets(all_anchors,
                                        gt_boxes[gt_assignment, :])
    bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = 1.0  #0.1

    labels = labels.reshape((1, feature_height, feature_width, -1))
    bbox_targets = bbox_targets.reshape((1, feature_height, feature_width, -1))
    bbox_inside_weights = bbox_inside_weights.reshape(
        (1, feature_height, feature_width, -1))

    return labels, bbox_targets, bbox_inside_weights
Exemplo n.º 17
0
def encode(gt_boxes, rois, num_classes):
    """Matching and Encoding groundtruth boxes (gt_boxes) into learning targets to boxes
  Sampling
  Parameters
  ---------
  gt_boxes an array of shape (G x 5), [x1, y1, x2, y2, class]
  rois an array of shape (R x 4), [x1, y1, x2, y2]
  num_classes: scalar, number of classes

  Returns
  --------
  labels: Nx1 array in [0, num_classes)
  bbox_targets: of shape (N, Kx4) regression targets
  bbox_inside_weights: of shape (N, Kx4), in {0, 1} indicating which class is assigned.
  """
    all_rois = rois
    num_rois = rois.shape[0]
    if gt_boxes.size > 0:
        # R x G matrix
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(all_rois[:, 0:4], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)  # R
        # max_overlaps = overlaps.max(axis=1)      # R
        max_overlaps = overlaps[np.arange(rois.shape[0]), gt_assignment]
        # note: this will assign every rois with a positive label
        # labels = gt_boxes[gt_assignment, 4]
        labels = np.zeros([num_rois], dtype=np.float32)
        labels[:] = -1

        # if _DEBUG:
        #   print ('gt_assignment')
        #   print (gt_assignment)

        # sample rois as to 1:3
        fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0]
        fg_rois = int(
            min(fg_inds.size,
                cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))
        if fg_inds.size > 0 and fg_rois < fg_inds.size:
            fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False)
        labels[fg_inds] = gt_boxes[gt_assignment[fg_inds], 4]

        # TODO: sampling strategy
        bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0]
        bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64)
        if bg_inds.size > 0 and bg_rois < bg_inds.size:
            bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)
        labels[bg_inds] = 0

        # ignore rois with overlaps between fg_threshold and bg_threshold
        ignore_inds = np.where(((max_overlaps > cfg.FLAGS.bg_threshold) &\
                (max_overlaps < cfg.FLAGS.fg_threshold)))[0]
        labels[ignore_inds] = -1
        keep_inds = np.append(fg_inds, bg_inds)
        if _DEBUG:
            print('keep_inds')
            print(keep_inds)
            print('fg_inds')
            print(fg_inds)
            print('bg_inds')
            print(bg_inds)
            print('bg_rois:', bg_rois)
            print('cfg.FLAGS.bg_threshold:', cfg.FLAGS.bg_threshold)
            # print (max_overlaps)

            LOG('ROIEncoder: %d positive rois, %d negative rois' %
                (len(fg_inds), len(bg_inds)))

        bbox_targets, bbox_inside_weights = _compute_targets(
            rois[keep_inds, 0:4], gt_boxes[gt_assignment[keep_inds], :4],
            labels[keep_inds], num_classes)
        bbox_targets = _unmap(bbox_targets, num_rois, keep_inds, 0)
        bbox_inside_weights = _unmap(bbox_inside_weights, num_rois, keep_inds,
                                     0)

    else:
        # there is no gt
        labels = np.zeros((num_rois, ), np.float32)
        bbox_targets = np.zeros((num_rois, 4 * num_classes), np.float32)
        bbox_inside_weights = np.zeros((num_rois, 4 * num_classes), np.float32)
        bg_rois = min(
            int(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction)),
            64)
        if bg_rois < num_rois:
            bg_inds = np.arange(num_rois)
            ignore_inds = np.random.choice(bg_inds,
                                           size=num_rois - bg_rois,
                                           replace=False)
            labels[ignore_inds] = -1

    return labels, bbox_targets, bbox_inside_weights
Exemplo n.º 18
0
def matching_box(boxes,
                 image_inds,
                 gt_boxes_list,
                 bg_overlap_threshold=0.5,
                 fg_overlap_threshold=0.6):
    """gt_boxes_list is a list of np.ndarray, batch_inds specify the image a boxes belongs"""
    if boxes.is_cuda:
        boxes_np = boxes.data.cpu().numpy()
    else:
        boxes_np = boxes.data.numpy()

    if image_inds.is_cuda:
        image_inds_np = image_inds.cpu().numpy()
    else:
        image_inds_np = image_inds.numpy()

    num_boxes = boxes_np.shape[0]
    assert num_boxes == image_inds_np.size
    match_labels = []
    match_inds = []
    match_boxes = []

    for i, gt_boxes in enumerate(gt_boxes_list):
        boxes_im = boxes_np[image_inds_np == i]
        num_boxes_im = boxes_im.shape[0]
        match = np.zeros((boxes_im.shape[0], ), dtype=np.int32) - 1
        labels = np.zeros((boxes_im.shape[0], ), dtype=np.int64)
        match_box = np.zeros((boxes_im.shape[0], 4), dtype=np.float32)
        if gt_boxes.size > 0 and boxes_im.size > 0:
            # B x G
            overlaps = cython_bbox.bbox_overlaps(
                np.ascontiguousarray(boxes_im, dtype=np.float),
                np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))

            gt_assignment = overlaps.argmax(axis=1)  # (B)
            max_overlaps = overlaps[np.arange(num_boxes_im), gt_assignment]
            match[:] = gt_assignment[:]
            match[max_overlaps < bg_overlap_threshold] = -1
            match_box[:, :4] = gt_boxes[gt_assignment, :4]

            labels[:] = gt_boxes[gt_assignment, 4]
            # labels[max_overlaps < bg_overlap_threshold] = 0
            # labels[np.logical_and(max_overlaps > bg_overlap_threshold,
            #                       max_overlaps < fg_overlap_threshold)] = -1
            labels[max_overlaps < fg_overlap_threshold] = 0
            # labels[np.logical_and(max_overlaps > bg_overlap_threshold,
            #                       max_overlaps < fg_overlap_threshold)] = -1

        match_labels.append(labels)
        match_inds.append(match)
        match_boxes.append(match_box)

    match_labels = np.concatenate(match_labels, axis=0)
    match_inds = np.concatenate(match_inds, axis=0)
    match_boxes = np.concatenate(match_boxes, axis=0)
    if boxes.is_cuda:
        return torch.from_numpy(match_labels).cuda(), \
               torch.from_numpy(match_inds).cuda(), \
               torch.from_numpy(match_boxes).cuda()
    return torch.from_numpy(match_labels), \
           torch.from_numpy(match_inds), \
           torch.from_numpy(match_boxes)
Exemplo n.º 19
0
def encode(gt_boxes, all_anchors, height, width, stride):
    """Matching and Encoding groundtruth into learning targets
  Sampling
  
  Parameters
  ---------
  gt_boxes: an array of shape (G x 5), [x1, y1, x2, y2, class]
  all_anchors: an array of shape (h, w, A, 4),
  width: width of feature
  height: height of feature
  stride: downscale factor w.r.t the input size, e.g., [4, 8, 16, 32]
  Returns
  --------
  labels:   Nx1 array in [0, num_classes]
  anchors:  Sampled anchors
  bbox_targets: N x (4) regression targets
  bbox_inside_weights: N x (4), in {0, 1} indicating to which class is assigned.
  """
    # TODO: speedup this module
    if all_anchors is None:
        all_anchors = anchors_plane(height, width, stride=stride)

    # anchors, inds_inside, total_anchors
    all_anchors = all_anchors.reshape((-1, 4))
    inds_inside = np.where((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0)
                           & (all_anchors[:, 2] < width * stride)
                           & (all_anchors[:, 3] < height * stride))[0]
    anchors = all_anchors[inds_inside, :]
    total_anchors = all_anchors.shape[0]

    # choose boxes to assign to this stride
    # TODO gt assignment outside
    areas = (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) * (gt_boxes[:, 2] -
                                                     gt_boxes[:, 0] + 1)
    ks = np.floor(4 + np.log2(np.sqrt(areas) / 224.0))
    K = int(np.log2(stride))
    inds = np.where((K == ks + 4))[0]
    if inds.size > 0:
        gt_boxes = gt_boxes[inds]
    else:
        labels = np.zeros((total_anchors), dtype=np.float32)
        bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32)
        bbox_inside_weights = np.zeros((total_anchors, 4), dtype=np.float32)
        return labels, bbox_targets, bbox_inside_weights

    labels = np.zeros((anchors.shape[0], ), dtype=np.float32)
    overlaps = cython_bbox.bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))

    gt_assignment = overlaps.argmax(axis=1)  # (A)
    max_overlaps = overlaps[np.arange(len(inds_inside)), gt_assignment]
    gt_argmax_overlaps = overlaps.argmax(axis=0)  # G
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]

    if False:
        # this is sentive to boxes of little overlaps, no need!
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    # fg label: for each gt, assign anchor with highest overlap despite its overlaps
    labels[gt_argmax_overlaps] = 1
    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.FLAGS.fg_threshold] = 1
    # print (np.min(labels), np.max(labels))

    # subsample positive labels if there are too many
    num_fg = int(cfg.FLAGS.fg_rpn_fraction * cfg.FLAGS.rpn_batch_size)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = np.random.choice(fg_inds,
                                        size=(len(fg_inds) - num_fg),
                                        replace=False)
        labels[disable_inds] = -1

    # TODO: mild hard negative mining
    # subsample negative labels if there are too many
    num_bg = cfg.FLAGS.rpn_batch_size - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = np.random.choice(bg_inds,
                                        size=(len(bg_inds) - num_bg),
                                        replace=False)
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[gt_assignment, :])
    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = 1

    # mapping to whole outputs
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)

    labels = labels.reshape((1, height, width, -1))
    bbox_targets = bbox_targets.reshape((1, height, width, -1))
    bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, -1))

    return labels, bbox_targets, bbox_inside_weights
Exemplo n.º 20
0
def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width,
           indexs):
    """Encode masks groundtruth into learnable targets
  Sample some exmaples
  
  Params
  ------
  gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw)
  gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class]
  rois:     the bounding boxes of shape (N, 4),
  ## scores:   scores of shape (N, 1)
  num_classes; K
  mask_height, mask_width: height and width of output masks
  
  Returns
  -------
  # rois: boxes sampled for cropping masks, of shape (M, 4)
  labels: class-ids of shape (M, 1)
  mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values
  mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1}Í indicating which mask is sampled
  """
    total_masks = rois.shape[0]
    if gt_boxes.size > 0:
        # B x G
        overlaps = cython_bbox.bbox_overlaps(
            np.ascontiguousarray(rois[:, 0:4], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)  # shape is N
        max_overlaps = overlaps[np.arange(len(gt_assignment)),
                                gt_assignment]  # N
        # note: this will assign every rois with a positive label
        # labels = gt_boxes[gt_assignment, 4] # N
        labels = np.zeros((total_masks, ), np.int32)
        labels[:] = -1

        # sample positive rois which intersection is more than 0.5
        keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]
        num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image))
        if keep_inds.size > 0 and num_masks < keep_inds.size:
            keep_inds = np.random.choice(keep_inds,
                                         size=num_masks,
                                         replace=False)

        labels[keep_inds] = gt_boxes[gt_assignment[keep_inds], -1]

        mask_targets = np.zeros(
            (total_masks, mask_height, mask_width, num_classes),
            dtype=np.float32)
        mask_inside_weights = np.zeros(
            (total_masks, mask_height, mask_width, num_classes),
            dtype=np.float32)
        rois[rois < 0] = 0

        # TODO: speed bottleneck?
        # TODO: mask ground truth accuracy check
        for i in keep_inds:

            gt_height = gt_masks.shape[1]
            gt_width = gt_masks.shape[2]
            enlarged_width = mask_width * 20
            enlarged_height = mask_height * 20

            roi = rois[i, :4]
            cropped = gt_masks[gt_assignment[i], :, :]
            cropped = cv2.resize(cropped.astype(np.float32),
                                 (enlarged_width.astype(np.float32),
                                  enlarged_height.astype(np.float32)),
                                 interpolation=cv2.INTER_CUBIC)
            cropped = cropped[
                int(round(roi[1] * enlarged_height / float(gt_height))
                    ):int(round(roi[3] * enlarged_height / float(gt_height))),
                int(round(roi[0] * enlarged_width / float(gt_width))
                    ):int(round(roi[2] * enlarged_width / float(gt_width)))]
            cropped = cv2.resize(cropped.astype(np.float32),
                                 (mask_width.astype(np.float32),
                                  mask_height.astype(np.float32)),
                                 interpolation=cv2.INTER_CUBIC)

            mask_targets[i, :, :, labels[i]] = cropped
            mask_inside_weights[i, :, :, labels[i]] = 1.0

        mask_rois = rois[:, :4]
    else:
        # there is no gt
        labels = np.zeros((total_masks, ), np.int32)
        labels[:] = -1
        mask_targets = np.zeros(
            (total_masks, mask_height, mask_width, num_classes),
            dtype=np.float32)
        mask_inside_weights = np.zeros(
            (total_masks, mask_height, mask_height, num_classes),
            dtype=np.float32)
        mask_rois = np.zeros((total_masks, 4), dtype=np.float32)
    return labels, mask_targets, mask_inside_weights, mask_rois, indexs
Exemplo n.º 21
0
    def evaluate_recall(self, candidate_boxes=None, thresholds=None,
                        area='all', limit=None):
        """Evaluate detection proposal recall metrics.

        Returns:
            results: dictionary of results with keys
                'ar': average recall
                'recalls': vector recalls at each IoU overlap threshold
                'thresholds': vector of IoU overlap thresholds
                'gt_overlaps': vector of all ground-truth overlaps
        """
        # Record max overlap value for each gt box
        # Return vector of overlap values
        areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3,
                  '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
        area_ranges = [ [0**2, 1e5**2],    # all
                        [0**2, 32**2],     # small
                        [32**2, 96**2],    # medium
                        [96**2, 1e5**2],   # large
                        [96**2, 128**2],   # 96-128
                        [128**2, 256**2],  # 128-256
                        [256**2, 512**2],  # 256-512
                        [512**2, 1e5**2],  # 512-inf
                      ]
        assert areas.has_key(area), 'unknown area range: {}'.format(area)
        area_range = area_ranges[areas[area]]
        gt_overlaps = np.zeros(0)
        num_pos = 0
        for i in xrange(self.num_images):
            # Checking for max_overlaps == 1 avoids including crowd annotations
            # (...pretty hacking :/)
            max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
            gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
                               (max_gt_overlaps == 1))[0]
            gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
            gt_areas = self.roidb[i]['seg_areas'][gt_inds]
            valid_gt_inds = np.where((gt_areas >= area_range[0]) &
                                     (gt_areas <= area_range[1]))[0]
            gt_boxes = gt_boxes[valid_gt_inds, :]
            num_pos += len(valid_gt_inds)

            if candidate_boxes is None:
                # If candidate_boxes is not supplied, the default is to use the
                # non-ground-truth boxes from this roidb
                non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
                boxes = self.roidb[i]['boxes'][non_gt_inds, :]
            else:
                boxes = candidate_boxes[i]
            if boxes.shape[0] == 0:
                continue
            if limit is not None and boxes.shape[0] > limit:
                boxes = boxes[:limit, :]

            overlaps = cython_bbox.bbox_overlaps(boxes.astype(np.float),
                                                 gt_boxes.astype(np.float))

            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in xrange(gt_boxes.shape[0]):
                # find which proposal box maximally covers each gt box
                argmax_overlaps = overlaps.argmax(axis=0)
                # and get the iou amount of coverage for each gt box
                max_overlaps = overlaps.max(axis=0)
                # find which gt box is 'best' covered (i.e. 'best' = most iou)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert(gt_ovr >= 0)
                # find the proposal box that covers the best covered gt box
                box_ind = argmax_overlaps[gt_ind]
                # record the iou coverage of this gt box
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert(_gt_overlaps[j] == gt_ovr)
                # mark the proposal box and the gt box as used
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1
            # append recorded iou coverage level
            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        gt_overlaps = np.sort(gt_overlaps)
        if thresholds is None:
            step = 0.05
            thresholds = np.arange(0.5, 0.95 + 1e-5, step)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        # ar = 2 * np.trapz(recalls, thresholds)
        ar = recalls.mean()
        return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
                'gt_overlaps': gt_overlaps}
Exemplo n.º 22
0
def encode(gt_boxes, rois, num_classes):
  """Matching and Encoding groundtruth boxes (gt_boxes) into learning targets to boxes
  Sampling
  Parameters
  ---------
  gt_boxes an array of shape (G x 5), [x1, y1, x2, y2, class]
  rois an array of shape (R x 4), [x1, y1, x2, y2]
  num_classes: scalar, number of classes
  
  Returns
  --------
  labels: Nx1 array in [0, num_classes)
  bbox_targets: of shape (N, Kx4) regression targets
  bbox_inside_weights: of shape (N, Kx4), in {0, 1} indicating which class is assigned.
  """
  
  all_rois = rois
  num_rois = rois.shape[0]
  if gt_boxes.size > 0: 
      # R x G matrix
      overlaps = cython_bbox.bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 0:4], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
      gt_assignment = overlaps.argmax(axis=1)  # R
      # max_overlaps = overlaps.max(axis=1)      # R
      max_overlaps = overlaps[np.arange(rois.shape[0]), gt_assignment]
      # note: this will assign every rois with a positive label 
      # labels = gt_boxes[gt_assignment, 4]
      labels = np.zeros([num_rois], dtype=np.float32)
      labels[:] = -1

      # if _DEBUG:
      #     print ('gt_assignment')
      #     print (gt_assignment)

      # sample rois as to 1:3
      fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0]
      fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))
      if fg_inds.size > 0 and fg_rois < fg_inds.size:
        fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False)
      labels[fg_inds] = gt_boxes[gt_assignment[fg_inds], 4] 
      
      # TODO: sampling strategy
      bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0]
      bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64)
      if bg_inds.size > 0 and bg_rois < bg_inds.size:
        bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)
      labels[bg_inds] = 0
      
      # ignore rois with overlaps between fg_threshold and bg_threshold 
      ignore_inds = np.where(((max_overlaps > cfg.FLAGS.bg_threshold) &\
              (max_overlaps < cfg.FLAGS.fg_threshold)))[0]
      labels[ignore_inds] = -1 

      keep_inds = np.append(fg_inds, bg_inds)
      if _DEBUG: 
          print ('keep_inds')
          print (keep_inds)
          print ('fg_inds')
          print (fg_inds)
          print ('bg_inds')
          print (bg_inds)
          print ('bg_rois:', bg_rois)
          print ('cfg.FLAGS.bg_threshold:', cfg.FLAGS.bg_threshold)
          # print (max_overlaps)

          LOG('ROIEncoder: %d positive rois, %d negative rois' % (len(fg_inds), len(bg_inds)))

      bbox_targets, bbox_inside_weights = _compute_targets(
        rois[keep_inds, 0:4], gt_boxes[gt_assignment[keep_inds], :4], labels[keep_inds], num_classes)
      bbox_targets = _unmap(bbox_targets, num_rois, keep_inds, 0)
      bbox_inside_weights = _unmap(bbox_inside_weights, num_rois, keep_inds, 0)
   
  else:
      # there is no gt
      labels = np.zeros((num_rois, ), np.float32)
      bbox_targets = np.zeros((num_rois, 4 * num_classes), np.float32)
      bbox_inside_weights = np.zeros((num_rois, 4 * num_classes), np.float32)
      bg_rois  = min(int(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction)), 64)
      if bg_rois < num_rois:
          bg_inds = np.arange(num_rois)
          ignore_inds = np.random.choice(bg_inds, size=num_rois - bg_rois, replace=False)
          labels[ignore_inds] = -1 

  return labels, bbox_targets, bbox_inside_weights