Exemplo n.º 1
0
def proposal_layer(bbox_pred, iou_pred, cls_pred, anchors, ls):
    box_pred = bbox_transform_inv(
        np.ascontiguousarray(bbox_pred, dtype=np.float32),
        np.ascontiguousarray(anchors, dtype=np.float32), ls, ls) * cfg.INP_SIZE

    box_pred = np.reshape(box_pred, [-1, 4])

    iou_pred = np.reshape(iou_pred, [-1, 1])

    cls_pred = np.reshape(cls_pred, [-1, cfg.NUM_CLASSES])

    cls_inds = np.argmax(cls_pred, axis=1)
    cls_prob = cls_pred[np.arange(cls_pred.shape[0]), cls_inds][:, np.newaxis]

    scores = iou_pred * cls_prob

    # filter out boxes with scores <= coef thresh
    keep = np.where(scores >= cfg.COEF_THRESH)[0]
    # keep top n scores before apply nms
    keep = keep[np.argsort(-scores[keep, 0])[:cfg.PRE_NMS_TOP_N]]

    box_pred = box_pred[keep]
    cls_inds = cls_inds[keep]
    scores = scores[keep]

    # apply nms with top-n-score boxes
    keep = np.zeros(len(box_pred), dtype=np.int8)
    for i in range(cfg.NUM_CLASSES):
        inds = np.where(cls_inds == i)[0]
        if len(inds) == 0:
            continue

        keep_in_cls = nms_detection(np.hstack([box_pred[inds], scores[inds]]),
                                    cfg.NMS_THRESH)

        keep[inds[keep_in_cls]] = 1

    keep = np.where(keep > 0)

    box_pred = box_pred[keep]
    cls_inds = cls_inds[keep].astype(np.int8)
    scores = scores[keep][:, 0]

    # clip boxes inside image
    box_pred = clip_boxes(np.ascontiguousarray(box_pred, dtype=np.float32),
                          cfg.INP_SIZE, cfg.INP_SIZE)

    return box_pred, cls_inds, scores
Exemplo n.º 2
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
  """A simplified version compared to fast/er RCNN
     For details please see the technical report
  """
  if type(cfg_key) == bytes:
      cfg_key = cfg_key.decode('utf-8')
  pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
  post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
  nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

  # Get the scores and bounding boxes
  '''
  scores = tf.reshape(rpn_cls_prob, shape=(-1, 2))
  scores = scores[:, 1:]
  '''
  scores = rpn_cls_prob[:, :, :, num_anchors:]
  scores = scores.reshape((-1, 1))
  
  rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
  proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
  proposals = clip_boxes(proposals, im_info[:2])

  # Pick the top region proposals
  order = scores.ravel().argsort()[::-1]
  if pre_nms_topN > 0:
    order = order[:pre_nms_topN]
  proposals = proposals[order, :]
  scores = scores[order]

  # Non-maximal suppression
  keep = nms(np.hstack((proposals, scores)), nms_thresh)

  # Pick th top region proposals after NMS
  if post_nms_topN > 0:
    keep = keep[:post_nms_topN]
  proposals = proposals[keep, :]
  scores = scores[keep]

  # Only support single image as input
  batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
  blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

  return blob, scores
Exemplo n.º 3
0
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride,
                       anchors, num_anchors):
    """A layer that just selects the top region proposals
     without using non-maximal suppression,
     For details please see the technical report
  """
    rpn_top_n = cfg.TEST.RPN_TOP_N

    scores = rpn_cls_prob[:, :, :, num_anchors:]

    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))

    length = scores.shape[0]
    if length < rpn_top_n:
        # Random selection, maybe unnecessary and loses good proposals
        # But such case rarely happens
        top_inds = npr.choice(length, size=rpn_top_n, replace=True)
    else:
        top_inds = scores.argsort(0)[::-1]
        top_inds = top_inds[:rpn_top_n]
        top_inds = top_inds.reshape(rpn_top_n, )

    # Do the selection here
    anchors = anchors[top_inds, :]
    rpn_bbox_pred = rpn_bbox_pred[top_inds, :]
    scores = scores[top_inds]

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)

    # Clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob, scores
Exemplo n.º 4
0
def _proposal_layer(rpn_bbox_cls, rpn_bbox_pred, im_size, feat_stride,
                    eval_mode):
    """

    :param rpn_bbox_cls: (None, H, W, 2 * k)
    :param rpn_bbox_pred: (None, H, W, 4 * k)
    :param im_size: (800, 600)
    :param feat_stride: 16
    :return:
    """
    rpn_bbox_cls_prob = rpn_softmax(rpn_bbox_cls)
    anchor = Anchors(feat_stride=feat_stride)
    # all_anchors (A * H * W, 4)
    anchors, A = anchor.get_anchors()
    num_anchors = A
    # (1,  2 * k, H, W)
    rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob, [0, 3, 1, 2])
    # (1,  4 * k, H, W)
    rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2])

    assert rpn_bbox_cls_prob.shape[0] == 1, 'Only support 1 batch_size'

    if not eval_mode:
        # 训练模式
        pre_nms_topN = cfg.train_rpn_pre_nms_top_n
        post_nms_topN = cfg.train_rpn_post_nms_top_n
        nms_thresh = cfg.train_rpn_nms_thresh
        min_size = cfg.train_rpn_min_size
    else:
        # 验证模式
        pre_nms_topN = cfg.test_rpn_pre_nms_top_n
        post_nms_topN = cfg.test_rpn_post_nms_top_n
        nms_thresh = cfg.test_rpn_nms_thresh
        min_size = cfg.test_rpn_min_size
    # 对于预测的cls 前9个表示背景 后9个表示前景
    scores = rpn_bbox_cls_prob[:, num_anchors:, :, :]
    bbox_deltas = rpn_bbox_pred
    # (1, 4 * k, H, W) -> (1, H, W, 4 * A)
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
    # 根据anchor 和 bbox 预测值 回归出来真正的anchor 从dx dy dw dh -->  cx cy w, h
    proposals = bbox_transform_inv(anchors, bbox_deltas)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_size)

    # 3. remove predicted boxes with either height or width < threshold
    keep = _filter_boxes(proposals, min_size)
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    # scores = scores[keep]

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob
Exemplo n.º 5
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'
        if self.phase==0:
            cfg_key = 'TRAIN'
        elif self.phase==1:
            cfg_key = 'TEST'
        else:
            cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'

        if cfg_key == 'TRAIN':
            nms_thresh = cfg[cfg_key].NMS_THRESH
            post_nms_topN = cfg[cfg_key].ANCHOR_N_POST_NMS
            pre_nms_topN = cfg[cfg_key].ANCHOR_N_PRE_NMS

        if cfg_key == 'TEST':
            pre_nms_topN =  cfg[cfg_key].N_DETS_PER_MODULE

        min_size = cfg[cfg_key].ANCHOR_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN

        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (if in training mode)
        # 7. take after_nms_topN
        # 8. return the top proposals (-> RoIs top)
        if self.phase == 0:
            # DO NMS ONLY IN TRAINING TIME
            # DURING TEST WE HAVE NMS OUTSIDE OF THIS FUNCTION 
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]


        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        if proposals.shape[0] == 0:
            blob = np.array([[0,0,0,16,16]],dtype=np.float32)
        else:
            batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
            blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
Exemplo n.º 6
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'
        if self.phase == 0:
            cfg_key = 'TRAIN'
        elif self.phase == 1:
            cfg_key = 'TEST'
        else:
            cfg_key = str(self.phase)  # either 'TRAIN' or 'TEST'

        if cfg_key == 'TRAIN':
            nms_thresh = cfg[cfg_key].NMS_THRESH
            post_nms_topN = cfg[cfg_key].ANCHOR_N_POST_NMS
            pre_nms_topN = cfg[cfg_key].ANCHOR_N_PRE_NMS

        if cfg_key == 'TEST':
            pre_nms_topN = cfg[cfg_key].N_DETS_PER_MODULE
            score_thresh = cfg[cfg_key].SCORE_THRESH

        min_size = cfg[cfg_key].ANCHOR_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[-3].data  # For multi-class
        bbox_deltas = bottom[-2].data
        im_info = bottom[-1].data[0, :]

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride[0]
        shift_y = np.arange(0, height) * self._feat_stride[0]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        num_classes = scores.shape[1] / (A * self._num_feats)
        anchors = self._anchors.reshape((1, A, 4)) + \
            shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))
        self.anchors = anchors

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape(
            (-1, num_classes, A * self._num_feats)).transpose(
                (0, 2, 1)).reshape((-1, num_classes))

        # Convert anchors into proposals via bbox transformations
        new_anchors = np.concatenate([anchors[:, np.newaxis, :]] *
                                     self._num_feats,
                                     axis=1).reshape((-1, 4))
        proposals = bbox_transform_inv(new_anchors, bbox_deltas)
        for i in range(self._num_refine):
            # Do this because a combination of bbox_transform_inv and _compute_targets
            # will cause a larger 3rd and 4th entry of coordinates
            # We do not do this at the last regression, just to follow the original code
            proposals[:, 2:4] -= 1
            refine_delta = bottom[i].data
            refine_delta = refine_delta.transpose((0, 2, 3, 1)).reshape(
                (-1, 4))
            proposals = bbox_transform_inv(proposals, refine_delta)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        if self._subsampled:
            anchor_map = np.zeros((height, width, A))
            for i in xrange(A):
                stride = self._feat_stride[i / len(self._shifts)**
                                           2] // self._feat_stride[0]
                anchor_map[::stride, ::stride, i] = 1
            anchor_map = anchor_map.reshape((K * A))
            subsampled_inds = np.where(anchor_map)[0]
            proposals = proposals[subsampled_inds, :]
            scores = scores[subsampled_inds, :]

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep, :]

        # # 4. sort all (proposal, score) pairs by score from highest to lowest
        # # 5. take top pre_nms_topN
        #
        max_score = np.max(scores[:, 1:], axis=1).ravel()
        order = max_score.argsort()[::-1]
        try:
            thresh_idx = np.where(max_score[order] >= score_thresh)[0].max()
        except:
            thresh_idx = 0  # Nothing greater then score_thresh, just keep the largest one
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        order = order[:thresh_idx + 1]
        proposals = proposals[order, :]
        scores = scores[order, :]

        # 6. apply nms (if in training mode)
        # 7. take after_nms_topN
        # 8. return the top proposals (-> RoIs top)
        if self.phase == 0:
            # DO NMS ONLY IN TRAINING TIME
            # DURING TEST WE HAVE NMS OUTSIDE OF THIS FUNCTION
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        if proposals.shape[0] == 0:
            blob = np.array([[0, 0, 0, 16, 16]], dtype=np.float32)
        else:
            batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
            blob = np.hstack(
                (batch_inds, proposals.astype(np.float32, copy=False)))

        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
Exemplo n.º 7
0
def test_net(test_split, net, batchsize, use_kld=cfg.USE_KLD, use_reg=cfg.USE_REG, threshold=cfg.OVERLAP_THRESHOLD,
             topk=cfg.TOPK, vis=False):
    print('validate split: %s' % test_split)
    rpn_topn = cfg.RPN_TOPN
    # dp = get_data_provider(data_split=test_split, batchsize=batchsize)
    dp = DDPNDataProvider(data_split=test_split, batchsize=batchsize)
    num_query = dp.get_num_query()
    num_right = 0

    if cfg.NTHREADS > 1:
        try:
            import torch
            dataloader = torch.utils.data.DataLoader(dp,
                                                     batch_size=batchsize,
                                                     shuffle=False,
                                                     num_workers=int(cfg.NTHREADS))
        except:
            cfg.NTHREADS = 1
            dataloader = dp
    else:
        dataloader = dp
    count = 0
    for data in dataloader:
        if data is None:
            break
        data = map(np.array, data)
        my_complete_data = functools.partial(complete_data, batchsize=batchsize)
        gt_boxes, qvec, cvec, img_feat, bbox, img_shape, spt_feat, query_label, query_label_mask, \
        query_bbox_targets, query_bbox_inside_weights, query_bbox_outside_weights, valid_data, iid_list = map(
            my_complete_data, data)

        tp_qvec = qvec.copy()
        tp_cvec = cvec.copy()
        qvec = np.transpose(qvec, (1, 0))
        cvec = np.transpose(cvec, (1, 0))
        query_bbox_targets = query_bbox_targets.reshape(-1, 4)
        query_bbox_inside_weights = query_bbox_inside_weights.reshape(-1, 4)
        query_bbox_outside_weights = query_bbox_outside_weights.reshape(-1, 4)
        # net.blobs['queries'].reshape(*(qvec.shape))
        # net.blobs['query_cont'].reshape(*(cvec.shape))
        # net.blobs['img_feat'].reshape(*(img_feat.shape))
        # net.blobs['spt_feat'].reshape(*(spt_feat.shape))
        # net.blobs['query_label'].reshape(*query_label.shape)
        # net.blobs['query_label_mask'].reshape(*query_label_mask.shape)
        # net.blobs['query_bbox_targets'].reshape(*query_bbox_targets.shape)
        # net.blobs['query_bbox_inside_weights'].reshape(*query_bbox_inside_weights.shape)
        # net.blobs['query_bbox_outside_weights'].reshape(*query_bbox_outside_weights.shape)
        # forward_kwargs = {  'qvec': qvec.astype(np.float32, copy=False), \
        #                     'cvec': cvec.astype(np.float32, copy=False), \
        #                     'img_feat': img_feat.astype(np.float32, copy=False), \
        #                     'spt_feat': spt_feat.astype(np.float32, copy=False), \
        #                     'query_label': query_label.astype(np.float32, copy=False), \
        #                     'query_label_mask': query_label_mask.astype(np.float32, copy=False), \
        #                     'query_bbox_targets': query_bbox_targets.astype(np.float32, copy=False), \
        #                     'query_bbox_inside_weights': query_bbox_inside_weights.astype(np.float32, copy=False), \
        #                     'query_bbox_outside_weights': query_bbox_outside_weights.astype(np.float32, copy=False)}
        net.blobs['qvec'].data.reshape(*qvec.shape)
        net.blobs['qvec'].data[...] = qvec

        net.blobs['cvec'].data.reshape(*cvec.shape)
        net.blobs['cvec'].data[...] = cvec

        net.blobs['img_feat'].data.reshape(*img_feat.shape)
        net.blobs['img_feat'].data[...] = img_feat

        net.blobs['spt_feat'].data.reshape(*spt_feat.shape)
        net.blobs['spt_feat'].data[...] = spt_feat

        net.blobs['query_label'].data.reshape(*query_label.shape)
        net.blobs['query_label'].data[...] = query_label

        net.blobs['query_label_mask'].data.reshape(*query_label_mask.shape)
        net.blobs['query_label_mask'].data[...] = query_label_mask

        net.blobs['query_bbox_targets'].data.reshape(*query_bbox_targets.shape)
        net.blobs['query_bbox_targets'].data[...] = query_bbox_targets

        net.blobs['query_bbox_inside_weights'].data.reshape(*query_bbox_inside_weights.shape)
        net.blobs['query_bbox_inside_weights'].data[...] = query_bbox_inside_weights

        net.blobs['query_bbox_outside_weights'].data.reshape(*query_bbox_outside_weights.shape)
        net.blobs['query_bbox_outside_weights'].data[...] = query_bbox_outside_weights

        blobs_out = net.forward()
        # query_emb_tile = net.blobs['query_emb_tile'].data

        rois = bbox.copy()
        rois = rois.reshape(-1, 4)
        query_score_pred = net.blobs['query_score_pred'].data
        if use_reg:
            query_bbox_pred = net.blobs['query_bbox_pred'].data
            query_bbox_pred = bbox_transform_inv(rois, query_bbox_pred)
        else:
            query_bbox_pred = rois

        query_inds = np.argsort(-query_score_pred, axis=1)

        rois = rois.reshape(batchsize, rpn_topn, 4)
        query_bbox_pred = query_bbox_pred.reshape(batchsize, rpn_topn, 4)
        for i in range(batchsize):
            if valid_data[i] != 0:
                right_flag = False
                t_query_bbox_pred = clip_boxes(query_bbox_pred[i], img_shape[i])
                t_rois = clip_boxes(rois[i], img_shape[i])
                for j in range(topk):
                    query_ind = query_inds[i, j]

                    # overlaps = bbox_overlaps(
                    #     np.ascontiguousarray(query_bbox_pred[query_ind][np.newaxis], dtype=np.float),
                    #     np.ascontiguousarray(gt_boxes, dtype=np.float) )
                    iou = calc_iou(t_query_bbox_pred[query_ind], gt_boxes[i])
                    # print '%.2f percent:  %.2f'%((100 * float(i) / num_query), 100*iou)
                    if iou >= threshold:
                        num_right += 1
                        right_flag = True
                        break
                    # if overlaps[0].max() > threshold:
                    #     # json.dump([1], open(save_dir + '/right.json', 'w'))
                    #     print overlaps[0].max()
                    #     num_right += 1
                    #     break

                # debug pred
                if vis:
                    debug_dir = 'visual_pred_%s_%s' % (cfg.IMDB_NAME, test_split)
                    img_path = dp.get_img_path(int(iid_list[i]))
                    img = cv2.imread(img_path)
                    img.shape
                    debug_pred(debug_dir, count, tp_qvec[i], tp_cvec[i], img, gt_boxes[i], t_rois[query_ind],
                               t_query_bbox_pred[query_ind], iou)

            percent = 100 * float(count) / num_query
            sys.stdout.write('\r' + ('%.2f' % percent) + '%')
            sys.stdout.flush()
            count += 1
            if count >= num_query:
                break

    accuracy = num_right / float(num_query)
    print('accuracy: %f\n' % accuracy)
    return accuracy
Exemplo n.º 8
0
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(image, info, gt_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, 1:5]
        box_deltas = bbox_pred.data
        if cfg.TRAIN.CLASS_AGNOSTIC:
            box_deltas = box_deltas.view(-1, 4) * bbox_normalize_stds + bbox_normalize_means
            box_deltas = box_deltas.view(-1, 4)
        else:
            box_deltas = box_deltas.view(-1, 4) * bbox_normalize_stds + bbox_normalize_means
            box_deltas = box_deltas.view(-1, 4 * len(imdb.classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, info)
        pred_boxes /= im_scales[0]

        im2show = np.copy(im)
        for j in range(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if cfg.TRAIN.CLASS_AGNOSTIC:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j*4:(j+1)*4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
                cls_dets = cls_dets[order]
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'
        if self.phase==0:
            cfg_key = 'TRAIN'
        elif self.phase==1:
            cfg_key = 'TEST'
        else:
            cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'

        if cfg_key == 'TRAIN':
            nms_thresh = cfg[cfg_key].NMS_THRESH
            post_nms_topN = cfg[cfg_key].ANCHOR_N_POST_NMS
            pre_nms_topN = cfg[cfg_key].ANCHOR_N_PRE_NMS

        if cfg_key == 'TEST':
            pre_nms_topN =  cfg[cfg_key].N_DETS_PER_MODULE

        min_size = cfg[cfg_key].ANCHOR_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN

        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (if in training mode)
        # 7. take after_nms_topN
        # 8. return the top proposals (-> RoIs top)
        if self.phase == 0:
            # DO NMS ONLY IN TRAINING TIME
            # DURING TEST WE HAVE NMS OUTSIDE OF THIS FUNCTION 
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]


        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        if proposals.shape[0] == 0:
            blob = np.array([[0,0,0,16,16]],dtype=np.float32)
        else:
            batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
            blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
Exemplo n.º 10
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]

        pre_nms_topN = self.cf.rpn_pre_nms_top_n
        post_nms_topN = self.cf.rpn_post_nms_top_n
        nms_thresh = self.cf.rpn_nms_thresh

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)

        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
Exemplo n.º 11
0
    def __call__(self, locs, scores, anchor_base, batch_size, feature_shape, image_size, min_scale=1.):
        '''
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes **centered** on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)
        '''
        # NOTE: when test, remember
        # faster_rcnn.eval()
        # to set self.training = False
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms
        
        # the first set of _num_anchors channels are bg probs, the second set are the fg probs
        # !NOTE:WHY
        scores = scores[:, self.parent_model.n_anchor:, :, :]

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors(batch_size, K*9, 4); the same process to rpn_score
        bbox_deltas = locs.permute(0,2,3,1).contiguous().reshape(batch_size, -1, 4)
        scores = scores.permute(0,2,3,1).contiguous().reshape(batch_size, -1)
        
        # ipdb.set_trace()
        ## 1.1 generate A anchor boxes **centered** on cell i,feature size: (batch, 9*feat_h*feat_w,4)type(torch)
        anchor = _enumerate_shifted_anchor(batch_size, np.array(anchor_base), 
                                           self.parent_model.feat_stride, feature_shape)
        ## 1.2 Convert anchors into proposal with bbox transformations.
        roi = loc2bbox(anchor, bbox_deltas)
        ## 2 Clip predicted boxes to image:just clip, the number of roi is not changed 
        roi = clip_boxes(roi, image_size, batch_size)
        ## 3 remove predicted boxes with either height or width < threshold
        min_size = self.min_size * min_scale
        ws = roi[:,:,2] - roi[:,:,0]
        hs = roi[:,:,3] - roi[:,:,1]
        # !NOTE should change to numpy???
        keep = np.where((ws.numpy() >= min_size) & (hs.numpy() >= min_size))[1]
        roi_keep = roi[:,keep,:]
        scores_keep = scores[:,keep]
        ## 4 sort all (proposal, score) pairs by score from highest to lowest
        _, order = torch.sort(scores_keep, 1, True)

        for i in range(batch_size):
            
            roi_single = roi_keep[i]
            score_single = scores_keep[i]
            order_single = order[i]

            ## 5 Take top pre_nms_topN (e.g. 6000).
            if n_pre_nms > 0 and n_pre_nms < scores_keep.numel():
                order_single = order_single[:n_pre_nms]
            roi_single = roi_single[order_single,:]
            score_single = score_single[order_single]

            # 6. apply nms (e.g. threshold = 0.7)
            keep = non_maximum_suppression(
                cp.ascontiguousarray(cp.asarray(roi_single)),
                thresh=self.nms_thresh)

            # ipdb.set_trace()
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)
            if n_post_nms > 0:
                keep = keep[:n_post_nms]
            roi_single = roi_single[keep,:]    

            # store roi_single
            output = roi_single 

        return output, anchor     
Exemplo n.º 12
0
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, fl_cls_prob, fl_bbox_pred, feat_stride=[16,], anchor_scales = [8, 16, 32], base_size = 10, ratios =[0.333, 0.5, 0.667, 1.0, 1.5, 2.0, 3.0], pre_nms_topN = 2000, max_nms_topN = 400, isHardware=False, num_stddev=2.0):
        """
        Parameters
        ----------
        rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg
                                                 NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
        rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN
        im_info: a list of [image_height, image_width, scale_ratios]
        cfg_key: 'TRAIN' or 'TEST'
        _feat_stride: the downsampling ratio of feature map to the original input image
        anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
        ----------
        Returns
        ----------
        rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]
        """
        _anchors =      generate_anchors(base_size, ratios, anchor_scales)
        _num_anchors = _anchors.shape[0]
        im_info = im_info[0]

        assert rpn_cls_prob_reshape.shape[0] == 1, \
                'Only single item batches are supported'

        # Convert fixed point int to floats fror internal calculations ! 
        rpn_cls_prob_reshape = convert_to_float_py(rpn_cls_prob_reshape, fl_cls_prob)
        rpn_bbox_pred = convert_to_float_py(rpn_bbox_pred, fl_bbox_pred)

        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh        = cfg[cfg_key].RPN_NMS_THRESH
        min_size          = cfg[cfg_key].RPN_MIN_SIZE

        height, width = rpn_cls_prob_reshape.shape[1:3]

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        # (1, H, W, A)
        scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1],
                                                [1, height, width, _num_anchors])

        # TODO: NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
        # TODO: if you use the old trained model, VGGnet_fast_rcnn_iter_70000.ckpt, uncomment this line
        scores = rpn_cls_prob_reshape[:,:,:,_num_anchors:]

        bbox_deltas = rpn_bbox_pred
        #im_info = bottom[2].data[0, :]

        if DEBUG:
                print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
                print 'scale: {}'.format(im_info[2])
                print 'min_size: {}'.format(min_size)
                print 'max_nms_topN: {}'.format(max_nms_topN)
                print 'post_nms_topN: {}'.format(post_nms_topN)

        # 1. Generate proposals from bbox deltas and shifted anchors
        if DEBUG:
                print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * feat_stride
        shift_y = np.arange(0, height) * feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = _num_anchors
        K = shifts.shape[0]
        anchors = _anchors.reshape((1, A, 4)) + \
                          shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4)

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, isHardware)
        proposals = proposals.astype(bbox_deltas.dtype)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        #KM:  Move filtering into NMS (after estimating parameters
        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        #keep = _filter_boxes(proposals, min_size * im_info[2])
        #proposals = proposals[keep, :]
        #
        #print '[Ref Model Log] Num total Proposals before NMS : ' + str(proposals.shape)
        #scores = scores[keep]

        # # remove irregular boxes, too fat too tall
        # keep = _filter_irregular_boxes(proposals)
        # proposals = proposals[keep, :]
        # scores = scores[keep]

        # Hardware modeling             
        if (isHardware): 
        #if (0): 
                #proposals1 = np.copy(proposals)
                #scores1 = np.copy(scores)
                #KM:  Proposal inputs to NMS need to be in same order as HW or final results will be different!
                proposals1 = np.zeros(proposals.shape)
                scores1 = np.zeros(scores.shape)
                idy = 0
                for k in range(0,A):
                        for j in range(0,width):
                                for i in range(0,height):
                                        idx = (i*width*A)+(j*A)+k
                                        scores1[idy] = scores[idx]
                                        proposals1[idy] = proposals[idx]
                                        print_msg(str(k) + '.' + str(j) + '.' + str(i) + ' Proposal ' + str(idy) + ' -> [' + str(int(8*scores1[idy])) + '] ' + str((16*proposals1[idy,:]).astype(int)),1)
                                        idy = idy+1
                prop, score = nms_hw(proposals1, scores1, num_stddev, nms_thresh, min_size, im_info[2], max_nms_topN, post_nms_topN)
                batch_inds = np.zeros((prop.shape[0], 1), dtype=np.float32)
                blob = np.hstack((batch_inds, prop.astype(np.float32, copy=False)))                             
        else:
                order = scores.ravel().argsort()[::-1]
                if pre_nms_topN > 0:
                        order = order[:pre_nms_topN]
                proposals = proposals[order, :]
                scores = scores[order]
                keep = nms(np.hstack((proposals, scores)), nms_thresh)
                if post_nms_topN > 0:
                        keep = keep[:post_nms_topN]
                proposals = proposals[keep, :]
                scores = scores[keep]
                print 'Number of proposals : ' + str(len(keep))
                batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
                blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        return blob
    def forward(self, scores, bbox_delta, im_info, cfg_key):
        scores = scores[:, self._num_anchors:, :, :]

        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH

        batch_size = bbox_delta.size(0)
        assert (batch_size == 1) # Only support batch size = 1

        # Get the full anchor
        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchor.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors_reshape = anchors.reshape((K * A, 4)).astype(np.float32, copy=False)

        # Convert the anchor into proposal
        bbox_delta = bbox_delta.permute(0, 2, 3, 1).contiguous()
        bbox_delta = bbox_delta.view(-1, 4)
        proposals = bbox_transform_inv(torch.from_numpy(anchors_reshape).type_as(bbox_delta), bbox_delta)
        proposals = clip_boxes(proposals, im_info)

        # choose the proposals
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(1, -1)

        # pick the top region proposals
        scores, order = scores.view(-1).sort(descending=True)
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
            scores = scores[:pre_nms_topN].view(-1, 1)
        proposals = proposals[order.data, :]

        # scores_keep = scores
        # _, order = torch.sort(scores_keep, 1, True)
        # if pre_nms_topN > 0:
        #     order_single = order[0]
        #     scores_single = scores[0]
        #     order_single = order_single[:pre_nms_topN]
        # proposals = proposals[order_single, :]
        # scores = scores_single[order_single].view(-1, 1)


        # Non-maximal suppression
        keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh)

        # pick the  top region proposals after nms
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep, :]

        # TODO: batch_size > 1
        # padding batch ids at the first row
        output = scores.new(post_nms_topN, 5).zero_()
        num_proposal = proposals.size(0)
        output[:num_proposal, 1:] = proposals

        return output, anchors_reshape