def proposal_layer(bbox_pred, iou_pred, cls_pred, anchors, ls): box_pred = bbox_transform_inv( np.ascontiguousarray(bbox_pred, dtype=np.float32), np.ascontiguousarray(anchors, dtype=np.float32), ls, ls) * cfg.INP_SIZE box_pred = np.reshape(box_pred, [-1, 4]) iou_pred = np.reshape(iou_pred, [-1, 1]) cls_pred = np.reshape(cls_pred, [-1, cfg.NUM_CLASSES]) cls_inds = np.argmax(cls_pred, axis=1) cls_prob = cls_pred[np.arange(cls_pred.shape[0]), cls_inds][:, np.newaxis] scores = iou_pred * cls_prob # filter out boxes with scores <= coef thresh keep = np.where(scores >= cfg.COEF_THRESH)[0] # keep top n scores before apply nms keep = keep[np.argsort(-scores[keep, 0])[:cfg.PRE_NMS_TOP_N]] box_pred = box_pred[keep] cls_inds = cls_inds[keep] scores = scores[keep] # apply nms with top-n-score boxes keep = np.zeros(len(box_pred), dtype=np.int8) for i in range(cfg.NUM_CLASSES): inds = np.where(cls_inds == i)[0] if len(inds) == 0: continue keep_in_cls = nms_detection(np.hstack([box_pred[inds], scores[inds]]), cfg.NMS_THRESH) keep[inds[keep_in_cls]] = 1 keep = np.where(keep > 0) box_pred = box_pred[keep] cls_inds = cls_inds[keep].astype(np.int8) scores = scores[keep][:, 0] # clip boxes inside image box_pred = clip_boxes(np.ascontiguousarray(box_pred, dtype=np.float32), cfg.INP_SIZE, cfg.INP_SIZE) return box_pred, cls_inds, scores
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes ''' scores = tf.reshape(rpn_cls_prob, shape=(-1, 2)) scores = scores[:, 1:] ''' scores = rpn_cls_prob[:, :, :, num_anchors:] scores = scores.reshape((-1, 1)) rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) length = scores.shape[0] if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = npr.choice(length, size=rpn_top_n, replace=True) else: top_inds = scores.argsort(0)[::-1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.reshape(rpn_top_n, ) # Do the selection here anchors = anchors[top_inds, :] rpn_bbox_pred = rpn_bbox_pred[top_inds, :] scores = scores[top_inds] # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def _proposal_layer(rpn_bbox_cls, rpn_bbox_pred, im_size, feat_stride, eval_mode): """ :param rpn_bbox_cls: (None, H, W, 2 * k) :param rpn_bbox_pred: (None, H, W, 4 * k) :param im_size: (800, 600) :param feat_stride: 16 :return: """ rpn_bbox_cls_prob = rpn_softmax(rpn_bbox_cls) anchor = Anchors(feat_stride=feat_stride) # all_anchors (A * H * W, 4) anchors, A = anchor.get_anchors() num_anchors = A # (1, 2 * k, H, W) rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob, [0, 3, 1, 2]) # (1, 4 * k, H, W) rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) assert rpn_bbox_cls_prob.shape[0] == 1, 'Only support 1 batch_size' if not eval_mode: # 训练模式 pre_nms_topN = cfg.train_rpn_pre_nms_top_n post_nms_topN = cfg.train_rpn_post_nms_top_n nms_thresh = cfg.train_rpn_nms_thresh min_size = cfg.train_rpn_min_size else: # 验证模式 pre_nms_topN = cfg.test_rpn_pre_nms_top_n post_nms_topN = cfg.test_rpn_post_nms_top_n nms_thresh = cfg.test_rpn_nms_thresh min_size = cfg.test_rpn_min_size # 对于预测的cls 前9个表示背景 后9个表示前景 scores = rpn_bbox_cls_prob[:, num_anchors:, :, :] bbox_deltas = rpn_bbox_pred # (1, 4 * k, H, W) -> (1, H, W, 4 * A) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # 根据anchor 和 bbox 预测值 回归出来真正的anchor 从dx dy dw dh --> cx cy w, h proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_size) # 3. remove predicted boxes with either height or width < threshold keep = _filter_boxes(proposals, min_size) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] # scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' if self.phase==0: cfg_key = 'TRAIN' elif self.phase==1: cfg_key = 'TEST' else: cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' if cfg_key == 'TRAIN': nms_thresh = cfg[cfg_key].NMS_THRESH post_nms_topN = cfg[cfg_key].ANCHOR_N_POST_NMS pre_nms_topN = cfg[cfg_key].ANCHOR_N_PRE_NMS if cfg_key == 'TEST': pre_nms_topN = cfg[cfg_key].N_DETS_PER_MODULE min_size = cfg[cfg_key].ANCHOR_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (if in training mode) # 7. take after_nms_topN # 8. return the top proposals (-> RoIs top) if self.phase == 0: # DO NMS ONLY IN TRAINING TIME # DURING TEST WE HAVE NMS OUTSIDE OF THIS FUNCTION keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 if proposals.shape[0] == 0: blob = np.array([[0,0,0,16,16]],dtype=np.float32) else: batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' if self.phase == 0: cfg_key = 'TRAIN' elif self.phase == 1: cfg_key = 'TEST' else: cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' if cfg_key == 'TRAIN': nms_thresh = cfg[cfg_key].NMS_THRESH post_nms_topN = cfg[cfg_key].ANCHOR_N_POST_NMS pre_nms_topN = cfg[cfg_key].ANCHOR_N_PRE_NMS if cfg_key == 'TEST': pre_nms_topN = cfg[cfg_key].N_DETS_PER_MODULE score_thresh = cfg[cfg_key].SCORE_THRESH min_size = cfg[cfg_key].ANCHOR_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[-3].data # For multi-class bbox_deltas = bottom[-2].data im_info = bottom[-1].data[0, :] # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride[0] shift_y = np.arange(0, height) * self._feat_stride[0] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] num_classes = scores.shape[1] / (A * self._num_feats) anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) self.anchors = anchors # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape( (-1, num_classes, A * self._num_feats)).transpose( (0, 2, 1)).reshape((-1, num_classes)) # Convert anchors into proposals via bbox transformations new_anchors = np.concatenate([anchors[:, np.newaxis, :]] * self._num_feats, axis=1).reshape((-1, 4)) proposals = bbox_transform_inv(new_anchors, bbox_deltas) for i in range(self._num_refine): # Do this because a combination of bbox_transform_inv and _compute_targets # will cause a larger 3rd and 4th entry of coordinates # We do not do this at the last regression, just to follow the original code proposals[:, 2:4] -= 1 refine_delta = bottom[i].data refine_delta = refine_delta.transpose((0, 2, 3, 1)).reshape( (-1, 4)) proposals = bbox_transform_inv(proposals, refine_delta) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) if self._subsampled: anchor_map = np.zeros((height, width, A)) for i in xrange(A): stride = self._feat_stride[i / len(self._shifts)** 2] // self._feat_stride[0] anchor_map[::stride, ::stride, i] = 1 anchor_map = anchor_map.reshape((K * A)) subsampled_inds = np.where(anchor_map)[0] proposals = proposals[subsampled_inds, :] scores = scores[subsampled_inds, :] # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep, :] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN # max_score = np.max(scores[:, 1:], axis=1).ravel() order = max_score.argsort()[::-1] try: thresh_idx = np.where(max_score[order] >= score_thresh)[0].max() except: thresh_idx = 0 # Nothing greater then score_thresh, just keep the largest one if pre_nms_topN > 0: order = order[:pre_nms_topN] order = order[:thresh_idx + 1] proposals = proposals[order, :] scores = scores[order, :] # 6. apply nms (if in training mode) # 7. take after_nms_topN # 8. return the top proposals (-> RoIs top) if self.phase == 0: # DO NMS ONLY IN TRAINING TIME # DURING TEST WE HAVE NMS OUTSIDE OF THIS FUNCTION keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 if proposals.shape[0] == 0: blob = np.array([[0, 0, 0, 16, 16]], dtype=np.float32) else: batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores
def test_net(test_split, net, batchsize, use_kld=cfg.USE_KLD, use_reg=cfg.USE_REG, threshold=cfg.OVERLAP_THRESHOLD, topk=cfg.TOPK, vis=False): print('validate split: %s' % test_split) rpn_topn = cfg.RPN_TOPN # dp = get_data_provider(data_split=test_split, batchsize=batchsize) dp = DDPNDataProvider(data_split=test_split, batchsize=batchsize) num_query = dp.get_num_query() num_right = 0 if cfg.NTHREADS > 1: try: import torch dataloader = torch.utils.data.DataLoader(dp, batch_size=batchsize, shuffle=False, num_workers=int(cfg.NTHREADS)) except: cfg.NTHREADS = 1 dataloader = dp else: dataloader = dp count = 0 for data in dataloader: if data is None: break data = map(np.array, data) my_complete_data = functools.partial(complete_data, batchsize=batchsize) gt_boxes, qvec, cvec, img_feat, bbox, img_shape, spt_feat, query_label, query_label_mask, \ query_bbox_targets, query_bbox_inside_weights, query_bbox_outside_weights, valid_data, iid_list = map( my_complete_data, data) tp_qvec = qvec.copy() tp_cvec = cvec.copy() qvec = np.transpose(qvec, (1, 0)) cvec = np.transpose(cvec, (1, 0)) query_bbox_targets = query_bbox_targets.reshape(-1, 4) query_bbox_inside_weights = query_bbox_inside_weights.reshape(-1, 4) query_bbox_outside_weights = query_bbox_outside_weights.reshape(-1, 4) # net.blobs['queries'].reshape(*(qvec.shape)) # net.blobs['query_cont'].reshape(*(cvec.shape)) # net.blobs['img_feat'].reshape(*(img_feat.shape)) # net.blobs['spt_feat'].reshape(*(spt_feat.shape)) # net.blobs['query_label'].reshape(*query_label.shape) # net.blobs['query_label_mask'].reshape(*query_label_mask.shape) # net.blobs['query_bbox_targets'].reshape(*query_bbox_targets.shape) # net.blobs['query_bbox_inside_weights'].reshape(*query_bbox_inside_weights.shape) # net.blobs['query_bbox_outside_weights'].reshape(*query_bbox_outside_weights.shape) # forward_kwargs = { 'qvec': qvec.astype(np.float32, copy=False), \ # 'cvec': cvec.astype(np.float32, copy=False), \ # 'img_feat': img_feat.astype(np.float32, copy=False), \ # 'spt_feat': spt_feat.astype(np.float32, copy=False), \ # 'query_label': query_label.astype(np.float32, copy=False), \ # 'query_label_mask': query_label_mask.astype(np.float32, copy=False), \ # 'query_bbox_targets': query_bbox_targets.astype(np.float32, copy=False), \ # 'query_bbox_inside_weights': query_bbox_inside_weights.astype(np.float32, copy=False), \ # 'query_bbox_outside_weights': query_bbox_outside_weights.astype(np.float32, copy=False)} net.blobs['qvec'].data.reshape(*qvec.shape) net.blobs['qvec'].data[...] = qvec net.blobs['cvec'].data.reshape(*cvec.shape) net.blobs['cvec'].data[...] = cvec net.blobs['img_feat'].data.reshape(*img_feat.shape) net.blobs['img_feat'].data[...] = img_feat net.blobs['spt_feat'].data.reshape(*spt_feat.shape) net.blobs['spt_feat'].data[...] = spt_feat net.blobs['query_label'].data.reshape(*query_label.shape) net.blobs['query_label'].data[...] = query_label net.blobs['query_label_mask'].data.reshape(*query_label_mask.shape) net.blobs['query_label_mask'].data[...] = query_label_mask net.blobs['query_bbox_targets'].data.reshape(*query_bbox_targets.shape) net.blobs['query_bbox_targets'].data[...] = query_bbox_targets net.blobs['query_bbox_inside_weights'].data.reshape(*query_bbox_inside_weights.shape) net.blobs['query_bbox_inside_weights'].data[...] = query_bbox_inside_weights net.blobs['query_bbox_outside_weights'].data.reshape(*query_bbox_outside_weights.shape) net.blobs['query_bbox_outside_weights'].data[...] = query_bbox_outside_weights blobs_out = net.forward() # query_emb_tile = net.blobs['query_emb_tile'].data rois = bbox.copy() rois = rois.reshape(-1, 4) query_score_pred = net.blobs['query_score_pred'].data if use_reg: query_bbox_pred = net.blobs['query_bbox_pred'].data query_bbox_pred = bbox_transform_inv(rois, query_bbox_pred) else: query_bbox_pred = rois query_inds = np.argsort(-query_score_pred, axis=1) rois = rois.reshape(batchsize, rpn_topn, 4) query_bbox_pred = query_bbox_pred.reshape(batchsize, rpn_topn, 4) for i in range(batchsize): if valid_data[i] != 0: right_flag = False t_query_bbox_pred = clip_boxes(query_bbox_pred[i], img_shape[i]) t_rois = clip_boxes(rois[i], img_shape[i]) for j in range(topk): query_ind = query_inds[i, j] # overlaps = bbox_overlaps( # np.ascontiguousarray(query_bbox_pred[query_ind][np.newaxis], dtype=np.float), # np.ascontiguousarray(gt_boxes, dtype=np.float) ) iou = calc_iou(t_query_bbox_pred[query_ind], gt_boxes[i]) # print '%.2f percent: %.2f'%((100 * float(i) / num_query), 100*iou) if iou >= threshold: num_right += 1 right_flag = True break # if overlaps[0].max() > threshold: # # json.dump([1], open(save_dir + '/right.json', 'w')) # print overlaps[0].max() # num_right += 1 # break # debug pred if vis: debug_dir = 'visual_pred_%s_%s' % (cfg.IMDB_NAME, test_split) img_path = dp.get_img_path(int(iid_list[i])) img = cv2.imread(img_path) img.shape debug_pred(debug_dir, count, tp_qvec[i], tp_cvec[i], img, gt_boxes[i], t_rois[query_ind], t_query_bbox_pred[query_ind], iou) percent = 100 * float(count) / num_query sys.stdout.write('\r' + ('%.2f' % percent) + '%') sys.stdout.flush() count += 1 if count >= num_query: break accuracy = num_right / float(num_query) print('accuracy: %f\n' % accuracy) return accuracy
rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(image, info, gt_boxes) scores = cls_prob.data boxes = rois.data[:, 1:5] box_deltas = bbox_pred.data if cfg.TRAIN.CLASS_AGNOSTIC: box_deltas = box_deltas.view(-1, 4) * bbox_normalize_stds + bbox_normalize_means box_deltas = box_deltas.view(-1, 4) else: box_deltas = box_deltas.view(-1, 4) * bbox_normalize_stds + bbox_normalize_means box_deltas = box_deltas.view(-1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, info) pred_boxes /= im_scales[0] im2show = np.copy(im) for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if cfg.TRAIN.CLASS_AGNOSTIC: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j*4:(j+1)*4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order]
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] pre_nms_topN = self.cf.rpn_pre_nms_top_n post_nms_topN = self.cf.rpn_post_nms_top_n nms_thresh = self.cf.rpn_nms_thresh batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single return output
def __call__(self, locs, scores, anchor_base, batch_size, feature_shape, image_size, min_scale=1.): ''' # Algorithm: # # for each (H, W) location i # generate A anchor boxes **centered** on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) ''' # NOTE: when test, remember # faster_rcnn.eval() # to set self.training = False if self.parent_model.training: n_pre_nms = self.n_train_pre_nms n_post_nms = self.n_train_post_nms else: n_pre_nms = self.n_test_pre_nms n_post_nms = self.n_test_post_nms # the first set of _num_anchors channels are bg probs, the second set are the fg probs # !NOTE:WHY scores = scores[:, self.parent_model.n_anchor:, :, :] # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors(batch_size, K*9, 4); the same process to rpn_score bbox_deltas = locs.permute(0,2,3,1).contiguous().reshape(batch_size, -1, 4) scores = scores.permute(0,2,3,1).contiguous().reshape(batch_size, -1) # ipdb.set_trace() ## 1.1 generate A anchor boxes **centered** on cell i,feature size: (batch, 9*feat_h*feat_w,4)type(torch) anchor = _enumerate_shifted_anchor(batch_size, np.array(anchor_base), self.parent_model.feat_stride, feature_shape) ## 1.2 Convert anchors into proposal with bbox transformations. roi = loc2bbox(anchor, bbox_deltas) ## 2 Clip predicted boxes to image:just clip, the number of roi is not changed roi = clip_boxes(roi, image_size, batch_size) ## 3 remove predicted boxes with either height or width < threshold min_size = self.min_size * min_scale ws = roi[:,:,2] - roi[:,:,0] hs = roi[:,:,3] - roi[:,:,1] # !NOTE should change to numpy??? keep = np.where((ws.numpy() >= min_size) & (hs.numpy() >= min_size))[1] roi_keep = roi[:,keep,:] scores_keep = scores[:,keep] ## 4 sort all (proposal, score) pairs by score from highest to lowest _, order = torch.sort(scores_keep, 1, True) for i in range(batch_size): roi_single = roi_keep[i] score_single = scores_keep[i] order_single = order[i] ## 5 Take top pre_nms_topN (e.g. 6000). if n_pre_nms > 0 and n_pre_nms < scores_keep.numel(): order_single = order_single[:n_pre_nms] roi_single = roi_single[order_single,:] score_single = score_single[order_single] # 6. apply nms (e.g. threshold = 0.7) keep = non_maximum_suppression( cp.ascontiguousarray(cp.asarray(roi_single)), thresh=self.nms_thresh) # ipdb.set_trace() # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if n_post_nms > 0: keep = keep[:n_post_nms] roi_single = roi_single[keep,:] # store roi_single output = roi_single return output, anchor
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, fl_cls_prob, fl_bbox_pred, feat_stride=[16,], anchor_scales = [8, 16, 32], base_size = 10, ratios =[0.333, 0.5, 0.667, 1.0, 1.5, 2.0, 3.0], pre_nms_topN = 2000, max_nms_topN = 400, isHardware=False, num_stddev=2.0): """ Parameters ---------- rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN im_info: a list of [image_height, image_width, scale_ratios] cfg_key: 'TRAIN' or 'TEST' _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] """ _anchors = generate_anchors(base_size, ratios, anchor_scales) _num_anchors = _anchors.shape[0] im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # Convert fixed point int to floats fror internal calculations ! rpn_cls_prob_reshape = convert_to_float_py(rpn_cls_prob_reshape, fl_cls_prob) rpn_bbox_pred = convert_to_float_py(rpn_bbox_pred, fl_bbox_pred) post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE height, width = rpn_cls_prob_reshape.shape[1:3] # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # (1, H, W, A) scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1], [1, height, width, _num_anchors]) # TODO: NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! # TODO: if you use the old trained model, VGGnet_fast_rcnn_iter_70000.ckpt, uncomment this line scores = rpn_cls_prob_reshape[:,:,:,_num_anchors:] bbox_deltas = rpn_bbox_pred #im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) print 'min_size: {}'.format(min_size) print 'max_nms_topN: {}'.format(max_nms_topN) print 'post_nms_topN: {}'.format(post_nms_topN) # 1. Generate proposals from bbox deltas and shifted anchors if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, isHardware) proposals = proposals.astype(bbox_deltas.dtype) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) #KM: Move filtering into NMS (after estimating parameters # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) #keep = _filter_boxes(proposals, min_size * im_info[2]) #proposals = proposals[keep, :] # #print '[Ref Model Log] Num total Proposals before NMS : ' + str(proposals.shape) #scores = scores[keep] # # remove irregular boxes, too fat too tall # keep = _filter_irregular_boxes(proposals) # proposals = proposals[keep, :] # scores = scores[keep] # Hardware modeling if (isHardware): #if (0): #proposals1 = np.copy(proposals) #scores1 = np.copy(scores) #KM: Proposal inputs to NMS need to be in same order as HW or final results will be different! proposals1 = np.zeros(proposals.shape) scores1 = np.zeros(scores.shape) idy = 0 for k in range(0,A): for j in range(0,width): for i in range(0,height): idx = (i*width*A)+(j*A)+k scores1[idy] = scores[idx] proposals1[idy] = proposals[idx] print_msg(str(k) + '.' + str(j) + '.' + str(i) + ' Proposal ' + str(idy) + ' -> [' + str(int(8*scores1[idy])) + '] ' + str((16*proposals1[idy,:]).astype(int)),1) idy = idy+1 prop, score = nms_hw(proposals1, scores1, num_stddev, nms_thresh, min_size, im_info[2], max_nms_topN, post_nms_topN) batch_inds = np.zeros((prop.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, prop.astype(np.float32, copy=False))) else: order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] print 'Number of proposals : ' + str(len(keep)) batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
def forward(self, scores, bbox_delta, im_info, cfg_key): scores = scores[:, self._num_anchors:, :, :] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH batch_size = bbox_delta.size(0) assert (batch_size == 1) # Only support batch size = 1 # Get the full anchor feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = self._num_anchors K = shifts.shape[0] anchors = self._anchor.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors_reshape = anchors.reshape((K * A, 4)).astype(np.float32, copy=False) # Convert the anchor into proposal bbox_delta = bbox_delta.permute(0, 2, 3, 1).contiguous() bbox_delta = bbox_delta.view(-1, 4) proposals = bbox_transform_inv(torch.from_numpy(anchors_reshape).type_as(bbox_delta), bbox_delta) proposals = clip_boxes(proposals, im_info) # choose the proposals scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(1, -1) # pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] # scores_keep = scores # _, order = torch.sort(scores_keep, 1, True) # if pre_nms_topN > 0: # order_single = order[0] # scores_single = scores[0] # order_single = order_single[:pre_nms_topN] # proposals = proposals[order_single, :] # scores = scores_single[order_single].view(-1, 1) # Non-maximal suppression keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh) # pick the top region proposals after nms if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, :] # TODO: batch_size > 1 # padding batch ids at the first row output = scores.new(post_nms_topN, 5).zero_() num_proposal = proposals.size(0) output[:num_proposal, 1:] = proposals return output, anchors_reshape