def select_bg(self, Phi_labels, boxes, labels, bbox_pred, keeps_Y, good_gt_overlap, M, im_shape_w, im_shape_h): """ Find B in p(B|Xb) """ selected_item = range(M) prob_dpp = np.ones((M,)) ignores = [] dict_keeps_Y = {} for i, j in keeps_Y.iteritems(): if j not in dict_keeps_Y: dict_keeps_Y[j] = [] dict_keeps_Y[j].append(i) for k in range(M): if (k in keeps_Y and keeps_Y[k] == Phi_labels[k]) \ or (k in good_gt_overlap and Phi_labels[k] == labels[k] and labels[k] > 0): ignores.append(k) else: label_k = labels[k] if label_k in dict_keeps_Y: loc_lbl = bbox_pred[[k], 4 * label_k:4 * (label_k + 1)] pbox = bbox_transform_inv(boxes[[k], :], loc_lbl) pbox = clip_boxes(pbox, (im_shape_w, im_shape_h)) pbox = np.reshape(np.tile(pbox, len(dict_keeps_Y[label_k])), (len(dict_keeps_Y[label_k]), 4)) Y_selected_ll = bbox_pred[dict_keeps_Y[label_k], 4 * label_k:4 * (label_k + 1)] Y_selected_pbox = bbox_transform_inv(boxes[dict_keeps_Y[label_k], :], Y_selected_ll) Y_selected_pbox = clip_boxes(Y_selected_pbox, (im_shape_w, im_shape_h)) if np.max(IoU_target(pbox, Y_selected_pbox)) > cfg.TRAIN.IGNORANCE: ignores.append(k) selected_item = np.array([x for ii, x in enumerate(selected_item) if ii not in ignores]) prob_dpp = [x for ii, x in enumerate(prob_dpp) if ii not in ignores] return selected_item, prob_dpp
def update_rl(rl_in, h_start, w_start, h_end, w_end, t, rois_seq, cls_probs_seq, bbox_preds_seq, cls_probs_uptonow, pred_bboxes_uptonow, keeps, im_shape, bin_ctrs, height, width, rl_in_upsamp_height, rl_in_upsamp_width, thresh=0.0): if t > 1: cls_probs_uptonow = cls_probs_uptonow[keeps[0], :] pred_bboxes_uptonow = pred_bboxes_uptonow[keeps[0], :] keeps[0] = [] # Potentially perform per-time-step NMS if rois_seq is not None: # Current preds at this fix merged with survivors from previous steps cls_probs_uptonow = np.vstack([cls_probs_uptonow, cls_probs_seq]) pred_bboxes = bbox_transform_inv(rois_seq, bbox_preds_seq) pred_bboxes = clip_boxes(pred_bboxes, im_shape) pred_bboxes_uptonow = np.vstack([pred_bboxes_uptonow, pred_bboxes]) # Perform on-the-fly NMS (used when performing class-specific history updates) keeps = _get_nms_keep(keeps, cls_probs_uptonow, pred_bboxes_uptonow, thresh) # Update non-history part of RL state rl_in[:, h_start:h_end, w_start:w_end, :cfg.DIMS_NONHIST] = -1 if rois_seq is not None: rl_in, _ = do_hist_update(rl_in, cls_probs_uptonow, pred_bboxes_uptonow, keeps, bin_ctrs, height, width, rl_in_upsamp_height, rl_in_upsamp_width) return rl_in, keeps, cls_probs_uptonow, pred_bboxes_uptonow
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors, rpn_reject_inds): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N im_info = im_info[0] scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) length = scores.shape[0] ######################REJECT VIA RPN################ ###------------------------reject process---------------------------### if rpn_reject_inds.size != 0: reject_inds = np.unique(rpn_reject_inds) scores[reject_inds] = -2 passinds = np.where(scores != -2)[0] #reject via frcn and rpn anchors = anchors[passinds] scores = scores[passinds] rpn_bbox_pred = rpn_bbox_pred[passinds] ###-------------------------reject done-----------------------------### ##################################################### length = scores.shape[0] if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = npr.choice(length, size=rpn_top_n, replace=True) else: top_inds = scores.argsort(0)[::-1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.reshape(rpn_top_n, ) # Do the selection here anchors = anchors[top_inds, :] rpn_bbox_pred = rpn_bbox_pred[top_inds, :] scores = scores[top_inds] # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def pad_rois(rois, im_info, is_training): """Pad rois to utilize contextual information and to alleviate truncation """ nroi = rois.shape[0] proposals = np.zeros((nroi, 4), dtype=np.float32) w = rois[:, 3] - rois[:, 1] h = rois[:, 4] - rois[:, 2] dw = cfg.POOL_PAD_RATIO * w dh = cfg.POOL_PAD_RATIO * h nroi = rois.shape[0] if is_training: nw = npr.rand(nroi) nh = npr.rand(nroi) else: nw = np.ones(nroi) * 0.5 nh = np.ones(nroi) * 0.5 proposals[:, 0] = rois[:, 1] - (dw - nw * (1 + 2 * cfg.POOL_PAD_RATIO) / 15 * w) proposals[:, 1] = rois[:, 2] - (dh - nh * (1 + 2 * cfg.POOL_PAD_RATIO) / 15 * h) proposals[:, 2] = rois[:, 3] + (dw - (1 - nw) * (1 + 2 * cfg.POOL_PAD_RATIO) / 15 * w) proposals[:, 3] = rois[:, 4] + (dh - (1 - nh) * (1 + 2 * cfg.POOL_PAD_RATIO) / 15 * h) proposals = clip_boxes(proposals, im_info[:2]) batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): ''' A simplified version compared to fast/er RCNN For details please see the technical report :param rpn_cls_prob: :param rpn_bbox_pred: :param im_info: [M,N,scale_factor]保存了将任意图像缩放到M×N的所有信息 :param cfg_key: :param _feat_stride:feat_stride=16用于计算anchor的偏移量 :param anchors: :param num_anchors: :return: ''' if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # 计算得到bbox四个顶点坐标 proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals ''' 按照输入的foreground softmax降序排列,提取前pre_nms_topN(6000)的结果 提取修正后的foreground anchor ''' order = scores.ravel().argsort()[::-1] # ravel数组扁平化,降序排列 if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] # anchor坐标 scores = scores[order] # anchor分数 # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS ''' 再次按照nms后的foreground softmax由大到小排列,提取前post_nms_topN(300)结果作为proposals的输出 ''' if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors, reject_inds_1, reject_inds_2): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH im_info = im_info[0] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) ######################REJECT VIA RPN################ ###------------------------reject process---------------------------### if reject_inds_1.size != 0: reject_inds_1 = np.unique(reject_inds_1) scores[reject_inds_1] = -2 if reject_inds_2.size != 0: reject_inds_2 = np.unique(reject_inds_2) scores[reject_inds_2] = -2 passinds = np.where(scores != -2)[0] #reject via frcn and rpn proposals = proposals[passinds] scores = scores[passinds] ###-------------------------reject done-----------------------------### ##################################################### # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ # cfg_key代表TRAIN还是TEST if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') ''' pre_nms_topN: 在NMS处理之前,分数在前面的rois post_nms_topN: 在NMS处理之后,分数在前面的rois nms_thresh: NMS的阈值 ''' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # __C.TRAIN.RPN_NMS_THRESH = 0.7 # Get the scores and bounding boxes # 其中第四维度前9位是背景的分数,后9位是前景的分数 # 假设rpn_cls_prob = (1,38,50,18) scores = rpn_cls_prob[:, :, :, num_anchors:] # scores = (1,38,50,9) rpn_bbox_pred = rpn_bbox_pred.reshape( (-1, 4)) # rpn_bbox_pred = (1,38,50,36)->(17100,4) scores = scores.reshape((-1, 1)) # scores = (17100,1) # bbox_transform_inv 根据anchor和偏移量计算proposals proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # clip_boxes作用:调整boxes的坐标,使其全部在图像的范围内 proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals # 首先变成一维,然后argsort返回数组值从小到大的索引值,然后加上[::-1],翻转序列 # order保存数组值从大到小的索引值 order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: # 只取前pre_nms_topN order = order[:pre_nms_topN] # order对应的是下标,然后把得分最高的前pre_nms_topN的区域保存 proposals = proposals[order, :] # 只保存前pre_nms_topN个得分 scores = scores[order] # Non-maximal suppression # 非极大值抑制 np.hstack把他们拼接成(区域 分数)的形式 keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS # 在nms之后,选择前post_nms_topN个 if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input # 这点多出来一个batch_inds,拼接之后blob的第一列全是0,不知道后面是不是有什么操作。。。 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors, gt_boxes, gt_texts, gt_pair): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) scores = scores.contiguous().view(-1, 1) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] # Till now, proposals and scores consists RPN_PRE_NMS_TOP_N (12000/6000) top anchor_regions # only by the rpn_cls_prob ''' if cfg.mode=='TRAIN': overlaps = bbox_overlaps( proposals.data, gt_boxes[:, :4].data) max_overlaps, gt_assignment = overlaps.max(1) labels = gt_boxes[gt_assignment, [4]] texts = [gt_texts[i] for i in gt_assignment] pair = torch.LongTensor([int(gt_pair[i]) for i in gt_assignment]).cuda() print("labels") print(labels) print("pair") print(pair) ''' # Non-maximal suppression keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, ] # Only support single image as input batch_inds = Variable(proposals.data.new(proposals.size(0), 1).zero_()) blob = torch.cat((batch_inds, proposals), 1) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH blob = [] scores = [] # Get the scores and bounding boxes for im_i in np.arange(im_info.shape[0]): scores_im_i = rpn_cls_prob[im_i, :, :, num_anchors:].copy().reshape( (-1, 1)) rpn_bbox_pred_im_i = rpn_bbox_pred[im_i].copy().reshape((-1, 4)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred_im_i) proposals = clip_boxes(proposals, im_info[im_i, :2]) # Pick the top region proposals order = scores_im_i.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores_im_i = scores_im_i[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores_im_i)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] # not filter out the remaining, but re-ranking and then clip scores_im_i[keep] += 2 order = scores_im_i.ravel().argsort()[::-1] keep = order[:post_nms_topN] proposals = proposals[keep, :] scores_im_i = scores_im_i[keep] scores.append(scores_im_i) # multi image as input batch_inds = im_i * np.ones((proposals.shape[0], 1), dtype=np.float32) blob_im_i = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) blob.append(blob_im_i) scores = np.concatenate(scores) blob = np.concatenate(blob) return blob, scores
def unnormalize_box(Phi_labels, bbox_targets, boxes, stds, means,M, im_shape_w, im_shape_h): """ un-normalize boxes by using stds and means """ Phi_argmax = 4 * Phi_labels bbox_target = bbox_targets[np.tile(range(M),4),np.hstack((4*Phi_labels,4*Phi_labels+1,4*Phi_labels+2, 4*Phi_labels+3))] bbox_target = np.reshape(bbox_target,(M,4),order='F') bbox_target = bbox_target * stds[Phi_argmax/4,:] + means[Phi_argmax/4,:] unnormalized_bbox_targets = bbox_transform_inv(boxes, bbox_target) unnormalized_bbox_targets = clip_boxes(unnormalized_bbox_targets, (im_shape_w,im_shape_h)) return unnormalized_bbox_targets
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report Parameters: - rpn_cls_prob : (1, H, W, 2A) float Variable - rpn_bbox_pred: (1, H, W, 4A) - im_info : [im_height, im_width, scale], ndarray (3, ) - cfg_key : train or test - _feat_stride : 16 - anchors : (HWA, 4) float Variable - num_anchors : A = 9 Returns: - blob : Variable (N_nms, 5) [0; x1y1x2h2] - scores : Variable (N_nms, ) """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # train 12000; test 6000 post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # train 2000 ; test 300 nms_thresh = cfg[cfg_key].RPN_NMS_THRESH im_info = im_info[0] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] # (1, H, W, A) pos score only rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) # (HWA, 4) scores = scores.contiguous().view(-1, 1) # (HWA, 1) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # (HWA, 4) # Pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] # Non-maximal suppression # changes by me keep = nms(proposals, scores[:, 0], nms_thresh) # keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, ] # Only support single image as input batch_inds = Variable(proposals.data.new(proposals.size(0), 1).zero_()) blob = torch.cat((batch_inds, proposals), 1) return blob, scores
def forward(self, out_cls, out_ellipse): """ out_cls: (feat_height, feat_width, anchors, 2) FloatVariable out_ellipse: (feat_height, feat_width, anchors, 5) FloatVariable """ scores = nn.functional.softmax(out_cls, dim=3)[..., 1].contiguous().data.view(-1, 1) ellipse_deltas = out_ellipse.data.view(-1, 5) # 1. Generate proposals from ellipse deltas and shifted anchors # Convert anchors into proposals via ellipse transformations # Convert ellipse into bbox proposals ellipses = ellipse_transform_inv(self._anchors, ellipse_deltas) boxes = ellipse2box(ellipses, self._cfg['ELLIPSE_PAD']) # 2. clip predicted boxes to image boxes = clip_boxes(boxes, self._im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTICE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(boxes, self._cfg['TEST.RPN_MIN_SIZE']) boxes = boxes[keep, :] ellipses = ellipses[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) _, order = torch.sort(scores.view(-1), dim=0, descending=True) if self._cfg['TEST.RPN_PRE_NMS_TOP_N'] > 0: order = order[:self._cfg['TEST.RPN_PRE_NMS_TOP_N']] boxes = boxes[order, :] ellipses = ellipses[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if self._cfg['USE_GPU_NMS']: nms = gpu_nms else: nms = cpu_nms dets = np.hstack((boxes.cpu().numpy(), scores.cpu().numpy())) keep = nms(dets, self._cfg['TEST.RPN_NMS_THRESH']) keep = torch.from_numpy(np.array(keep)).type_as(scores).long() if self._cfg['TEST.RPN_POST_NMS_TOP_N'] > 0: keep = keep[:self._cfg['TEST.RPN_POST_NMS_TOP_N']] boxes = boxes[keep, :] ellipses = ellipses[keep, :] scores = scores[keep].view(-1) return (boxes, ellipses, scores)
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, anchors, num_anchors): """ A simplified version compared to fast/er RCNN For details please see the technical report :param rpn_cls_prob: (1, H, W, Ax2) softmax result of rpn scores rpn_bbox_pred: (1, H, W, Ax4) 1x1 conv result for rpn bbox """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes for foreground (text) # The order in last dim is related to network.py: # self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") # scores = rpn_cls_prob[:, :, :, num_anchors:] # old height, width = rpn_cls_prob.shape[1:3] # feature-map的高宽 scores = np.reshape( np.reshape(rpn_cls_prob, [1, height, width, num_anchors, 2])[:, :, :, :, 1], [1, height, width, num_anchors]) rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh, not cfg.USE_GPU_NMS) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input blob = np.hstack( (scores.astype(np.float32, copy=False), proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report Parameters: - rpn_cls_prob : (1, H, W, 2A) float Variable - rpn_bbox_pred: (1, H, W, 4A) - im_info : [im_height, im_width, scale], ndarray (3, ) - _feat_stride : 16 - anchors : (HWA, 4) float Variable - num_anchors : A = 9 Returns: - blob : (N_nms, 5) float Variable [0; x1y1x2h2] - scores : (N_nms, ) float Variable """ rpn_top_n = cfg.TEST.RPN_TOP_N im_info = im_info[0] scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.view(-1, 4) scores = scores.contiguous().view(-1, 1) length = scores.size(0) if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = torch.from_numpy( npr.choice(length, size=rpn_top_n, replace=True)).long().cuda() else: top_inds = scores.sort(0, descending=True)[1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.view(rpn_top_n) # Do the selection here anchors = anchors[top_inds].contiguous() rpn_bbox_pred = rpn_bbox_pred[top_inds].contiguous() scores = scores[top_inds].contiguous() # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = Variable(proposals.data.new(proposals.size(0), 1).zero_()) blob = torch.cat([batch_inds, proposals], 1) return blob, scores
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ # __C.TEST.RPN_TOP_N = 5000 仅TEST.MODE = 'top' 的时候使用 # __C.TEST.MODE = 'nms' rpn_top_n = cfg.TEST.RPN_TOP_N # 提取概率分数 scores = rpn_cls_prob[:, :, :, num_anchors:] # 对提取的预测狂reshape # rpn_bbox_pred:RPN层输出的box的取值,即:tx,ty,tw,th rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) # 统计有多少个框 length = scores.shape[0] if length < rpn_top_n: # 如果框小于5000个,需要随即重复采样,让他变成5000个 # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = npr.choice(length, size=rpn_top_n, replace=True) else: # 从大到小排序,取列索引 top_inds = scores.argsort(0)[::-1] # 取前大的5000个 top_inds = top_inds[:rpn_top_n] top_inds = top_inds.reshape(rpn_top_n, ) # Do the selection here # 选择/重排 # 按照索引提取anchor数据 anchors = anchors[top_inds, :] rpn_bbox_pred = rpn_bbox_pred[top_inds, :] scores = scores[top_inds] # Convert anchors into proposals via bbox transformations # bbox_transform_inv : 根据anchor和偏移量计算proposals proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image # clip_boxes : proposals的边界限制在图片内 proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 # 和 proposal_layer 一样,多出来一列0,然后拼接 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH #按照通道C取出RPN预测的框属于前景的分数,请注意,这18个channel中,前9个是背景的概率,后九个才是前景的概率 # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv( anchors, rpn_bbox_pred) #在这里结合RPN的输出变换初始框的坐标,得到第一次变换坐标后的proposals proposals = clip_boxes(proposals, im_info[:2]) #在这里讲超出图像边界的proposal进行边界裁剪,使之在图像边界之内 # 按照前景概率进行排序,取前top个, #对框按照前景分数进行排序,order中指示了框的下标 # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[: pre_nms_topN] #选择前景分数排名在前pre_nms_topN(训练时为12000,测试时为6000)的框 proposals = proposals[order, :] #保留了前pre_nms_topN个框的坐标信息 scores = scores[order] #保留了前pre_nms_topN个框的分数信息 # 对剩下的proposal进行NMS操作,阈值是0.7进行nms操作,再取前n个 # Non-maximal suppression #使用nms算法排除重复的框 keep = nms(np.hstack((proposals, scores)), nms_thresh) # 对剩下的proposal,保留RPN_POST_NMS_TOP_N个, 得到最终的rois和相应的rpn_socre # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[: post_nms_topN] #选择前景分数排名在前post_nms_topN(训练时为2000,测试时为300)的框 proposals = proposals[keep, :] #保留了前post_nms_topN个框的坐标信息 scores = scores[keep] #保留了前post_nms_topN个框的分数信息 # Only support single image as input # 因为要进行roi_pooling,所以在保留框内的坐标信息前面插入batch中图片的编号信息,此时,batchsize为1,所以都插入为0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N blob = [] scores = [] # Get the scores and bounding boxes for im_i in np.arange(im_info.shape[0]): scores_ind_start = num_anchors if cfg.TRAIN.RPN_FL_SOFTMAX else 0 scores_im_i = rpn_cls_prob[im_i, :, :, scores_ind_start:].copy().reshape((-1, 1)) rpn_bbox_pred_im_i = rpn_bbox_pred[im_i].copy().reshape((-1, 4)) length = scores_im_i.shape[0] if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = npr.choice(length, size=rpn_top_n, replace=True) else: top_inds = scores_im_i.argsort(0)[::-1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.reshape(rpn_top_n, ) # Do the selection here anchors = anchors[top_inds, :] rpn_bbox_pred_im_i = rpn_bbox_pred_im_i[top_inds, :] scores_im_i = scores_im_i[top_inds] scores.append(scores_im_i) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred_im_i) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[im_i, :2]) # multi image as input batch_inds = im_i * np.ones((proposals.shape[0], 1), dtype=np.float32) blob_im_i = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) blob.append(blob_im_i) scores = np.concatenate(scores) blob = np.concatenate(blob) return blob, scores
def proposal_layer_test_caption_compact(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if cfg.DEBUG_VERBOSE == 1: # print('_region_proposal.py ~~~~~~~~~~~~~ TESTING ... blob shape {:s}'.format(blob.shape)) # e.g. (210, 5) # print('_region_proposal.py ~~~~~~~~~~~~~ TESTING ... sentences shape {:s}'.format(sentences.shape)) # e.g. (210, 10) # print('_region_proposal.py ~~~~~~~~~~~~~ TESTING ... sentences {:s}'.format(sentences)) # print('_region_proposal.py ~~~~~~~~~~~~~ TESTING ... score shape {:s}'.format(scores.shape)) # print('_region_proposal.py ~~~~~~~~~~~~~ TESTING ... blob (rois) {:s}'.format(blob)) # print('_region_proposal.py ~~~~~~~~~~~~~ TESTING ... score {:s}'.format(scores)) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, anchor_scales=(8, 16, 32), anchor_ratios=(0.5, 1, 2)): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH scales = np.array(anchor_scales) ratios = np.array(anchor_ratios) num_anchors = scales.shape[0] * ratios.shape[0] im_info = im_info[0] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer_fpn(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH proposals_total = [] scores_total = [] for idx in range(len(rpn_cls_prob)): # Get the scores and bounding boxes scores = rpn_cls_prob[idx][:, :, :, num_anchors:] rpn_bbox_pred[idx] = rpn_bbox_pred[idx].view((-1, 4)) scores = scores.contiguous().view(-1, 1) proposals = bbox_transform_inv(anchors[idx], rpn_bbox_pred[idx]) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] proposals_total.append(proposals) scores_total.append(scores) proposals = torch.cat(proposals_total) scores = torch.cat(scores_total) # Non-maximal suppression keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, ] # Only support single image as input batch_inds = Variable(proposals.data.new(proposals.size(0), 1).zero_()) blob = torch.cat((batch_inds, proposals), 1) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """ A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N #12000 post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N #2000 nms_thresh = cfg[cfg_key].RPN_NMS_THRESH #0.7 # Get the scores and bounding boxes # [1,57,38,18] [1,57,38,36] scores = rpn_cls_prob[:, :, :, num_anchors:] #[1,57,38,9] rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) #[19494,4] scores = scores.contiguous().view(-1, 1) # [19494,1] #9个anchor,[19494,4],做边框平移和缩放,得到和ground truth相近的结果 proposals = bbox_transform_inv(anchors, rpn_bbox_pred) #[x1,y1,x2,y2] proposals = clip_boxes(proposals, im_info[:2]) #根据宽高缩放比,w,h,scale # Pick the top region proposals # scores是值,order是序列 scores, order = scores.view(-1).sort(descending=True) #保留2000个最高的 if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] # Non-maximal suppression keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, ] # Only support single image as input #保留2000个最高的 batch_inds = proposals.new_zeros(proposals.size(0), 1) #[2000,1],全0 blob = torch.cat((batch_inds, proposals), 1) #[2000,5] 多一维batch_inds return blob, scores #[2000,5] [x1,y1,x2,y2] [2000,1]
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].ANCHOR_MIN_SIZE # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # removed predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) # keep = _filter_boxes(proposals, min_size * im_info[2]) # proposals = proposals[keep, :] # scores = scores[keep] # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) scores = scores.contiguous().view(-1, 1) # print('anchors: ', anchors) # print('rpn_bbox_pred: ', rpn_bbox_pred) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # print('PROPOSALS0: ', proposals) proposals = clip_boxes(proposals, im_info[:2]) # print('PROPOSALS1: ', proposals) # Pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] # Non-maximal suppression keep = nms(proposals, scores.squeeze(1), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, ] # Only support single image as input batch_inds = proposals.new_zeros(proposals.size(0), 1) blob = torch.cat((batch_inds, proposals), 1) # print('PROPOSALS: ', proposals) # print("BLOOOOOB: ", blob) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') #12000 anchors pre nms algorithm are applied! pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N #2000 anchors are generated from one pic which are generated after nms post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N #the threshold is set to 0.7 to delete abundant anchors! nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.view(-1, 4) scores = scores.contiguous().view(-1, 1) length = scores.size(0) if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = torch.from_numpy( npr.choice(length, size=rpn_top_n, replace=True)).long().cuda() else: top_inds = scores.sort(0, descending=True)[1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.view(rpn_top_n) # Do the selection here anchors = anchors[top_inds, :].contiguous() rpn_bbox_pred = rpn_bbox_pred[top_inds, :].contiguous() scores = scores[top_inds].contiguous() # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = proposals.data.new(proposals.size(0), 1).zero_() # blob = torch.cat([batch_inds, proposals], 1) ## Changed to make __C.TEST.MODE = 'top' work --> blob = torch.cat((Variable(batch_inds), proposals), 1) blob = torch.cat((Variable(batch_inds), proposals), 1) return blob, scores
def proposal_layer_all(rpn_bbox_pred, im_info, anchors, rpn_cls_prob=None): """ Simply returns every single RoI; drl-RPN later decides which are forwarded to the class-specific module. """ # Get the bounding boxes batch_sz, height, width = rpn_bbox_pred.shape[0:3] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Create initial (all-zeros) observation RoI volume roi_obs_vol = np.zeros((batch_sz, height, width, cfg.NBR_ANCHORS), dtype=np.int32) if cfg.DRL_RPN.USE_AGNO: # If this branch is used, we only consider RoIs among survivors from # class-agnositc NMS when choosing RoIs with drl-RPN pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N nms_thresh = cfg.TEST.RPN_NMS_THRESH scores = rpn_cls_prob[:, :, :, cfg.NBR_ANCHORS:] scores = scores.reshape((-1, 1)) keep_ids_all = np.arange(scores.shape[0], dtype=np.int32) # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) keep_ids = keep_ids_all[keep] not_keep_ids = np.setdiff1d(keep_ids_all, keep_ids) else: not_keep_ids = np.zeros((1, 1), dtype=np.int32) # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) rois_all = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return rois_all, roi_obs_vol, not_keep_ids
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, info, cfg_key, anchors, anchors_3d, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ #print('rpn_bbox_pred') #print(rpn_bbox_pred) if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] #Start at back half of anchor/score list rpn_bbox_pred = rpn_bbox_pred.view( (-1, 4)) #rpn_bbox_pred are adjustment factors to existing anchors scores = scores.contiguous().view(-1, 1) #Collapse into a single vector proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, info) #Make sure they are within bounds # Pick the top 'pre_nms_topN' # of region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] anchors_3d = anchors_3d[order.data, :] # Non-maximal suppression keep = nms(proposals, scores.squeeze(1), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, ] anchors_3d = anchors_3d[keep, :] # Only support single image as input batch_inds = proposals.new_zeros(proposals.size(0), 1) blob = torch.cat((batch_inds, proposals), 1) return blob, scores, anchors_3d
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """ 1,根据rpn_bbox_pred对anchor进行微调 2,根据预测分数选前pre_nms_topN个anchor 3,进行nms非极大值抑制处理 4,对非极大值抑制处理后的数据,取前post_nms_topN个推荐框 """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.view(-1, 4) scores = scores.contiguous().view(-1, 1) length = scores.size(0) if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = torch.from_numpy( npr.choice(length, size=rpn_top_n, replace=True)).long().to(anchors.device) else: top_inds = scores.sort(0, descending=True)[1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.view(rpn_top_n) # Do the selection here anchors = anchors[top_inds, :].contiguous() rpn_bbox_pred = rpn_bbox_pred[top_inds, :].contiguous() scores = scores[top_inds].contiguous() # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = proposals.new_zeros(proposals.size(0), 1) blob = torch.cat([batch_inds, proposals], 1) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) scores = scores.contiguous().view(-1, 1) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] # Non-maximal suppression keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] # test(300,4) scores = scores[keep, ] # Our RPN implementation only supports a single input image, # so all batch inds are 0 # 即这些roi都属于一个图片,如果后续实现了多个输入图片,这个roi要区分它属于哪一个图片(即哪一个batch) batch_inds = Variable(proposals.data.new(proposals.size(0), 1).zero_()) blob = torch.cat((batch_inds, proposals), 1) return blob, scores
def predict(self, blobs): """ Test image :param im: bgr :return: """ # Test img_h, img_w, im_scale = blobs['im_info'] _, scores, bbox_pred, rois = self.net.test_image( blobs['data'], blobs['im_info']) boxes = rois[:, 1:5] / im_scale scores = np.reshape(scores, [scores.shape[0], -1]) bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1]) # Apply bounding-box regression deltas box_deltas = bbox_pred pred_boxes = bbox_transform_inv(t.from_numpy(boxes), t.from_numpy(box_deltas)) pred_boxes = clip_boxes(pred_boxes, [img_h, img_w]).numpy() return scores, pred_boxes
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N im_info = im_info[0] scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) length = scores.shape[0] if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = npr.choice(length, size=rpn_top_n, replace=True) else: top_inds = scores.argsort(0)[::-1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.reshape(rpn_top_n, ) # Do the selection here anchors = anchors[top_inds, :] rpn_bbox_pred = rpn_bbox_pred[top_inds, :] scores = scores[top_inds] # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH im_info = im_info[0] # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) scores = scores.contiguous().view(-1, 1) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] # Non-maximal suppression keep = nms(proposals, scores.squeeze(1), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, ] # Only support single image as input batch_inds = proposals.new_zeros(proposals.size(0), 1) blob = torch.cat((batch_inds, proposals), 1) return blob, scores