def detect(net, im): # Detect all object classes and regress object bounds ims = [] ims.append(im) scores, boxes = im_detect(net, ims) scores = scores[0] boxes = boxes[0] # filter boxes according to prob scores keeps = np.where(scores[:,0] > cfg.TEST.PROB)[0] scores = scores[keeps, :] boxes = boxes[keeps, :] # change boxes according to input size and the original image size im_shape = np.array(im.shape[0:2]) im_scales = float(cfg.TEST.SCALES[0]) / im_shape boxes[:, 0::2] = boxes[:, 0::2] / im_scales[1] boxes[:, 1::2] = boxes[:, 1::2] / im_scales[0] # filter boxes with small sizes boxes = clip_boxes(boxes, im_shape) keeps = filter_boxes(boxes, cfg.TEST.RON_MIN_SIZE ) scores = scores[keeps,:] boxes = boxes[keeps, :] scores = np.tile(scores[:, 0], (len(CLASSES), 1)).transpose() * scores return scores, boxes
def im_detect(net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] # now in blobs['data'] there are images # and in blobs['rois'] proposals ################################################### YOUR CODE GOES HERE # reshape network inputs to match blobs['data'].shape and blobs['rois'].shape # # do forward with blobs['data'] and blobs['rois'] # # use softmax estimated probabilities (net output) # scores = ... if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes
def im_detectreg(net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. im_blob = blobs['data'] blobs['im_info'] = np.array( #[[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], [np.hstack((im_blob.shape[2], im_blob.shape[3], im_scales[0]))], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5]/ im_scales[0] # output is not named 'bbox_pred' and train snapshot saving is not modified, # so scale means and stds # TODO add means and stds global average box_deltas = blobs_out['one_bbox_pred'] bbox_means=np.array([0,0,0,0],dtype=np.float32) bbox_stds=np.array([0.1,0.1,0.2,0.2],dtype=np.float32) box_deltas=box_deltas * bbox_stds + bbox_means pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) return pred_boxes
def gao(net): from fast_rcnn.bbox_transform import clip_boxes, bbox_transform_inv im = net.blobs['data'].data.copy() im = im[0, :, :, :] im = im.transpose(1, 2, 0) im += cfg.PIXEL_MEANS im = im.astype(np.uint8, copy=False) cls_prob = net.blobs['cls_prob'].data.copy() cls_prob_repool_head = net.blobs['cls_prob_repool_head'].data.copy() rois = net.blobs['head_repool'].data.copy() boxes = rois[:, 1:5] # bbox_targets_hard's shape : (128, 8) # labels_hard's shape : (128,) bbox_targets_hard = net.blobs['head_pred_repool'].data.copy() pred_boxes = bbox_transform_inv(boxes, bbox_targets_hard) pred_boxes = clip_boxes(pred_boxes, im.shape) # cls_boxes = pred_boxes[:, 4:] inds = np.where(cls_prob_repool_head[:, 1] > 0.05)[0] cls_scores_head = cls_prob_repool_head[inds, 1] cls_head = pred_boxes[inds, 4:8] cls_head_dets = np.hstack((cls_head, cls_scores_head[:, np.newaxis])) \ .astype(np.float32, copy=False) cls_boxes = cls_head_dets print(cls_head_dets.shape) print(cls_head_dets[0]) ''' keep = nms(cls_head_dets, cfg.TEST.NMS) head_NMSed = cls_head_dets[keep, :] cls_boxes = head_NMSed ''' print(cls_head_dets.shape) print(cls_head_dets[0:10]) print(cls_prob[0:10]) print(cls_prob_repool_head[0:10]) ''' plt.figure() plt.plot(cls_prob[:, 1]) plt.figure() plt.plot(cls_prob_repool_head[:, 1]) plt.show() ''' vis_detections(im, cls_boxes)
def im_detect(net, im, boxes): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) # use softmax estimated probabilities scores = blobs_out['cls_prob'] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes
def im_detect(net, im): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scale = _get_blobs(im) resized_shape = (int(im.shape[0] * im_scale), int(im.shape[1] * im_scale), im.shape[2]) net.blobs['data'].reshape(*(blobs['data'].shape)) net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) rois = net.blobs['rois'].data.copy() boxes = rois[:, 1:5] #return proposal roi pred_boxes = boxes scores = net.blobs['rpn_scores'].data.copy() # scores = blobs_out['cls_prob'] # box_deltas = blobs_out['bbox_pred'] # pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, resized_shape) pred_boxes = pred_boxes / im_scale color = (0, 0, 255) for i in range(len(pred_boxes)): bbox = pred_boxes[i, :] score = scores[i] if score < 0.95: continue cv2.rectangle(im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) info = "{}".format(str(score)[:4]) cv2.putText(im, info, (bbox[0], bbox[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1) window = "test" cv2.imshow(window, im) if cv2.waitKey(-1) == 27: sys.exit(0) return scores, pred_boxes
def forward(self, bottom, top): proposals = bottom[0].data proposals = proposals[:, 1:] predicted_box_deltas = bottom[1].data predicted_box_deltas = predicted_box_deltas.reshape((-1, 8)) im_info = bottom[2].data pred_boxes = bbox_transform_inv(proposals, predicted_box_deltas[:, 4:]) pred_boxes = clip_boxes(pred_boxes, [im_info[0, 0], im_info[0, 1]]) pred_boxes_final = np.zeros((pred_boxes.shape[0], 5)) pred_boxes_final[:, 1:] = pred_boxes top[0].reshape(*pred_boxes_final.shape) top[0].data[...] = pred_boxes_final
def im_detect_split(net, im, boxes, use_wzctx): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs_all, im_scales = _get_blobs(im, boxes) num_boxes = boxes.shape[0] scores = np.zeros((num_boxes, 201), dtype=np.float32) box_deltas = np.zeros((num_boxes, 4*201), dtype=np.float32) for i in xrange(blobs_all['data'].shape[0]): # load blobs inds = np.where(blobs_all['rois'][:, 0] == i)[0] if inds.shape[0] == 0: continue blobs = {'data' : None, 'rois' : None} blobs['data'] = blobs_all['data'][[i]] blobs['rois'] = blobs_all['rois'][inds] blobs['rois'][:, 0] = 0 # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) # use softmax estimated probabilities score = blobs_out['cls_prob'] scores[inds] = score box_delta = blobs_out['bbox_pred_avg'] box_deltas[inds] = box_delta # Apply bounding-box regression deltas pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) return scores, pred_boxes
def transform_kp_to_box(gt_keyPoints, gt_boxes, im, fh=3): if cfg.FILTER_INVALID_BOX: kp_num = cfg.TRAIN.ATTRIBUTES[0]['gt_keyPoints'] / 2 gt_keyPoints = gt_keyPoints.reshape([-1, kp_num, 2]) x1 = np.min(gt_keyPoints[:, :, 0], 1).reshape(-1, 1) y1 = np.min(gt_keyPoints[:, :, 1], 1).reshape(-1, 1) x2 = np.max(gt_keyPoints[:, :, 0], 1).reshape(-1, 1) y2 = np.max(gt_keyPoints[:, :, 1], 1).reshape(-1, 1) if cfg.WIDER_FACE_STYLE == 1: offset = (y2 - y1) / fh y1 = y1 - offset elif cfg.WIDER_FACE_STYLE == 2: if kp_num == 19: # aflw-full y_offset = (y2 - y1) / fh y1 = y1 - y_offset x_offset = (gt_keyPoints[:, 1, 0] - gt_keyPoints[:, 0, 0] ) # 2, 1 x1 = x1 - x_offset x_offset = (gt_keyPoints[:, 5, 0] - gt_keyPoints[:, 4, 0] ) # 5, 4 x2 = x2 + x_offset elif kp_num == 29: # cofw y_offset = (y2 - y1) / fh y1 = y1 - y_offset x_offset = (gt_keyPoints[:, 4, 0] - gt_keyPoints[:, 0, 0]) x1 = x1 - x_offset x_offset = (gt_keyPoints[:, 1, 0] - gt_keyPoints[:, 6, 0]) x2 = x2 + x_offset boxes = np.hstack([x1, y1, x2, y2]) else: boxes = np.zeros([gt_keyPoints.shape[0], 4]) for i, gt_keyPoint in enumerate(gt_keyPoints): if sum(gt_keyPoint) != 0: gt_keyPoint = gt_keyPoint.reshape([kp_num, 2]) x1 = np.min(gt_keyPoint[:, 0]) y1 = np.min(gt_keyPoint[:, 1]) x2 = np.max(gt_keyPoint[:, 0]) y2 = np.max(gt_keyPoint[:, 1]) if cfg.WIDER_FACE_STYLE: offset = (y2 - y1) / fh y1 = y1 - offset boxes[i] = [x1, y1, x2, y2] else: boxes[i] = gt_boxes[i] if cfg.CLIP_BOXES: im_shape = cv2.imread(im).shape[0:2] boxes = clip_boxes(boxes, im_shape) return boxes
def compute_rois_offset(rois, offset, im_info=None): """Compute bounding-box offset for region of interests""" assert rois.shape[1] == 4 assert offset.shape[1] == 4 if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev -- reverse the transformation offset_unnorm = offset * np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS) + np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS) else: offset_unnorm = offset.copy() rois_offset = bbox_transform_inv(rois, offset_unnorm) if not im_info is None: rois_offset = clip_boxes(rois_offset, im_info[:2]) return rois_offset
def forward(self, bottom, top): """Compute loss, select RoIs using OHEM. Use RoIs to get blobs and copy them into this layer's top blob vector.""" boxes = bottom[0].data.copy()[:, 1:5] box_deltas = bottom[1].data.copy() im_info = bottom[2].data.copy() im_shape = (im_info[0, 0], im_info[0, 1]) pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape) rois_repool = pred_boxes[:, 4:] zeros = np.zeros((rois_repool.shape[0], 1), dtype=np.float32) rois_repool = np.hstack((zeros, rois_repool)) top[0].reshape(*(rois_repool.shape)) top[0].data[...] = rois_repool.astype(np.float32, copy=False)
def unnormalize_box(Phi_labels, bbox_targets, boxes, stds, means, M, im_shape_w, im_shape_h): """ un-normalize boxes by using stds and means """ Phi_argmax = 4 * Phi_labels bbox_target = bbox_targets[np.tile(range(M), 4), np.hstack( (4 * Phi_labels, 4 * Phi_labels + 1, 4 * Phi_labels + 2, 4 * Phi_labels + 3))] bbox_target = np.reshape(bbox_target, (M, 4), order='F') bbox_target = bbox_target * stds[Phi_argmax / 4, :] + means[Phi_argmax / 4, :] unnormalized_bbox_targets = bbox_transform_inv(boxes, bbox_target) unnormalized_bbox_targets = clip_boxes(unnormalized_bbox_targets, (im_shape_w, im_shape_h)) return unnormalized_bbox_targets
def im_detect(net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) rois = net.blobs['rois'].data.copy() boxes = rois[:, 1:5] / im_scales[0] scores = blobs_out['cls_prob'] # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) return scores, pred_boxes
def run_single(sess, net, inputs, outputs, im, boxes, relations, bbox_reg, multi_iter): blobs, im_scales = _get_blobs(im, boxes) relations = np.array(relations, dtype=np.int32) # all possible combinations num_roi = blobs['rois'].shape[0] num_rel = relations.shape[0] inputs_feed = data_utils.create_graph_data(num_roi, num_rel, relations) feed_dict = { inputs['ims']: blobs['data'], inputs['rois']: blobs['rois'], inputs['relations']: relations, net.keep_prob: 1 } for k in inputs_feed: feed_dict[inputs[k]] = inputs_feed[k] # compute relation rois feed_dict[inputs['rel_rois']] = \ data_utils.compute_rel_rois(num_rel, blobs['rois'], relations) ops_value = sess.run(outputs, feed_dict=feed_dict) mi = multi_iter[-1] rel_probs_flat = ops_value['rel_probs'][mi] rel_probs = np.zeros([num_roi, num_roi, rel_probs_flat.shape[1]]) for i, rel in enumerate(relations): rel_probs[rel[0], rel[1], :] = rel_probs_flat[i, :] cls_probs = ops_value['cls_probs'][mi] if bbox_reg: # Apply bounding-box regression deltas pred_boxes = bbox_transform_inv(boxes, ops_value['bbox_deltas'][mi]) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, cls_probs.shape[1])) return {'scores': cls_probs, 'boxes': pred_boxes, 'relations': rel_probs}
def im_detect_tensorflow(sess_tuple, im): """Detect object classes in an image given object proposals. Arguments: sess_tuple: the tuple containing tensorflow sessions and input placeholders and output tensors im (ndarray): color image to test (in BGR order) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, rois=None) sess, ph_data, ph_im_info, out_rois, out_cls_prob, out_bbox_pred = sess_tuple im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # Convert Caffe format to tensorflow format # BGR to RGB, N x C x H x W to N x H x W x C blobs['data'] = np.transpose(blobs['data'][:, ::-1, :, :], (0, 2, 3, 1)) rois, scores, box_deltas = sess.run( (out_rois, out_cls_prob, out_bbox_pred), { ph_data: blobs['data'], ph_im_info: blobs['im_info'] }) assert len(im_scales) == 1, "Only single-image batch implemented" # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes
def pred_box_trans(rois, cls_pred, bbox_deltas, im_scale, im_shape): """ input: rois: output get from RPN, [256, 5],[prob, x1, x2, y1, y2] for top 256 proposals cls_pred: output get from the detection net, [256, 21] for top 256 proposals bbox_deltas: output get from the detection net, [256, 21*4] for top 256 proposals im_scale: output: Given cls_pred, get the exact pred bboxes on scaled im """ boxes = rois[:, 1:5] / im_scale # prob first, then bbox boxes = bbox_transform_inv(boxes, bbox_deltas) boxes = clip_boxes(boxes, im_shape) # [num_box] cat_ids = np.argmax(cls_pred, axis=1) pred_boxes = np.zeros([0, 4]) for box_id, cat_id in enumerate(cat_ids): pred_boxes = np.vstack( (pred_boxes, boxes[box_id, cat_id * 4:(cat_id + 1) * 4])) pred_boxes *= im_scale return pred_boxes
def interpret_faster_rcnn(self, cls_prob, bbox_pred, rois, im_info, im_shape, nms=True, clip=True, min_score=0.0): # find class scores, inds = cls_prob.data.max(1) scores, inds = scores.cpu().numpy(), inds.cpu().numpy() keep = np.where((inds > 0) & (scores >= min_score)) scores, inds = scores[keep], inds[keep] # Apply bounding-box regression deltas keep = keep[0] box_deltas = bbox_pred.data.cpu().numpy()[keep] """ box_deltas = np.asarray([ box_deltas[i, (inds[i] * 4): (inds[i] * 4 + 4)] for i in range(len(inds)) ], dtype=np.float) """ box_deltas = np.asarray([box_deltas[i, 4:] for i in range(len(inds))], dtype=np.float) boxes = rois.data.cpu().numpy()[keep, 1:5] / im_info[0][2] pred_boxes = bbox_transform_inv(boxes, box_deltas) if clip: pred_boxes = clip_boxes(pred_boxes, im_shape) # nms if nms and pred_boxes.shape[0] > 0: pred_boxes, scores, inds = nms_detections(pred_boxes, scores, 0.3, inds=inds) return pred_boxes, scores, self.classes[inds]
def caption(self, im_path, gt_objects=None, gt_regions=None, thr=0.0, nms=False, top_N=100, clip=True, use_beam_search=False): image = cv2.imread(im_path) # print 'image.shape', image.shape im_data, im_scales = self.get_image_blob_noscale(image) # print 'im_data.shape', im_data.shape # print 'im_scales', im_scales if gt_objects is not None: gt_objects[:, :4] = gt_objects[:, :4] * im_scales[0] if gt_regions is not None: gt_regions[:, :4] = gt_regions[:, :4] * im_scales[0] im_info = np.array( [[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32) # pdb.set_trace() region_result = self(im_data, im_info, gt_objects, gt_regions=gt_regions, use_beam_search=use_beam_search)[2] region_caption, bbox_pred, region_rois, logprobs = region_result[:] boxes = region_rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] box_deltas = bbox_pred.data.cpu().numpy() pred_boxes = bbox_transform_inv_hdn(boxes, box_deltas) if clip: pred_boxes = clip_boxes(pred_boxes, image.shape) # print 'im_scales[0]', im_scales[0] return (region_caption.numpy(), logprobs.numpy(), pred_boxes)
def object_detection(self, image_path, gt_boxes=None): min_score = 1 / 150. image = cv2.imread(image_path) # print 'image.shape', image.shape im_data, im_scales = self.get_image_blob_noscale(image) if gt_boxes is not None: gt_boxes[:, :4] = gt_boxes[:, :4] * im_scales[0] # print 'im_data.shape', im_data.shape # print 'im_scales', im_scales im_info = np.array( [[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32) object_result = self(im_data, im_info)[0] cls_prob_object, bbox_object, object_rois = object_result[:] prob_object = F.softmax(cls_prob_object) prob = prob_object.cpu().data.numpy() boxes = object_rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] fg_id = np.where(prob > min_score) box_id = fg_id[0] cls_id = fg_id[1] box_id = box_id[cls_id > 0] cls_id = cls_id[cls_id > 0] box_deltas = bbox_object.data.cpu().numpy() new_box_delta = np.asarray([ box_deltas[box_id[i], (cls_id[i] * 4):(cls_id[i] * 4 + 4)] for i in range(len(cls_id)) ], dtype=np.float) regressed_boxes = bbox_transform_inv_hdn(boxes[box_id], new_box_delta) regressed_boxes = clip_boxes(regressed_boxes, image.shape) object_score = np.asarray( [prob[box_id[i], cls_id[i]] for i in range(len(cls_id))], dtype=np.float) # print 'im_scales[0]', im_scales[0] return (cls_id, object_score, regressed_boxes)
def compute_kernel(self, labels, boxes, Phi, loc_argmax, unnormalized_bbox_targets, im_shape_w, im_shape_h): """ Compute DPP Kernel Matrix """ M = boxes.shape[0] # number of rois of 1 image in the minibatch pred_boxes = bbox_transform_inv(boxes, loc_argmax) pred_boxes = clip_boxes(pred_boxes, (im_shape_w, im_shape_h)) IoU_with_gt_all = IoU_target(pred_boxes, unnormalized_bbox_targets) # nonzero argmax labels for background images will have wrong target boxes IoU_with_gt_all[np.where(labels == 0)[0]] = 0.5 IoU_with_gt_all = IoU_with_gt_all sim_images = self.sim_classes[(labels - 1), :][:, (labels - 1)] # Compute IoU, S, Phi, L IoU = pair_IoU(pred_boxes) S = np.multiply(IoU, sim_images) + self.epsilon * np.eye(M, M) Phi = np.multiply(IoU_with_gt_all, Phi) L = np.reshape(np.repeat(Phi, M), (M, M)) * S * np.reshape(np.tile(Phi, M), (M, M)) det_L_I = np.linalg.det(L + np.eye(M)) return IoU, S, L, IoU_with_gt_all, pred_boxes, det_L_I
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) cfg_key = str( 'TRAIN' if self.phase == 0 else 'TEST') # either 'TRAIN' or 'TEST' # cfg_key = 'TRAIN' enable_nms = cfg[cfg_key].ENABLE_NMS nms_thresh = cfg[cfg_key].NMS pre_nms_topN = cfg[cfg_key].PRE_RON_NMS_TOP_N post_nms_topN = cfg[cfg_key].RON_NMS_TOP_N # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # scores = bottom[0].data[:, 1].reshape(-1, 1) # bbox_deltas = bottom[1].data[:, 4:] im_info = bottom[-1].data[0, :] # rois = bottom[3].data[:, 1:5] # RON rois = np.zeros((0, 4), dtype=np.float32) rois_scores = np.zeros((0, 1), dtype=np.float32) # 2 class rois_rpn_nos = np.zeros((0, 1), dtype=np.int) RPN_NO_sum = len(cfg.MULTI_SCALE_RPN_NO) for used_rpn_no in cfg.USED_RPN_NO: if used_rpn_no in cfg.MULTI_SCALE_RPN_NO: rpn_no = cfg.MULTI_SCALE_RPN_NO.index(used_rpn_no) rois = np.concatenate((rois, bottom[rpn_no].data[0]), axis=0) rois_scores = np.concatenate( (rois_scores, bottom[rpn_no + RPN_NO_sum].data[0]), axis=0) rois_rpn_nos = np.concatenate( (rois_rpn_nos, np.repeat([int(used_rpn_no)], bottom[rpn_no].data[0].shape[0]).reshape(-1, 1)), axis=0) # reshape rois (-1, 4) if len(rois.shape) == 4: rois = rois.reshape(rois.shape[0], rois.shape[1]) if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors # Convert anchors into proposals via bbox transformations # proposals = bbox_transform_inv(rois, bbox_deltas) proposals = rois.copy() scores = rois_scores.copy() # 1.5 filter boxes according to prob scores pro_thresh = cfg[cfg_key].PROB while True: keeps = np.where(scores[:, 0] > pro_thresh)[0] if len(keeps) == 0 and pro_thresh - 0.1 >= 0: pro_thresh = pro_thresh - 0.1 else: # print pro_thresh break scores = scores[keeps, :] proposals = proposals[keeps, :] rois_rpn_nos = rois_rpn_nos[keeps, :] # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes( proposals, cfg[cfg_key].RON_MIN_SIZE) # min_size * im_info[2] proposals = proposals[keep, :] scores = scores[keep] rois_rpn_nos = rois_rpn_nos[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if enable_nms: nms_keep = nms(np.hstack((proposals, scores)), nms_thresh) nms_keep = nms_keep[:post_nms_topN] proposals = proposals[nms_keep, :] scores = scores[nms_keep] rois_rpn_nos = rois_rpn_nos[nms_keep] else: order = scores.ravel().argsort()[::-1] order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] rois_rpn_nos = rois_rpn_nos[order] # concat several groups of proposals from other rpn maps # using gt as roi if cfg[cfg_key].USING_GT: gt_kps = cfg.TRAIN.ANNOINFOS[:, 5:] gt_boxes = cfg.TRAIN.ANNOINFOS[:, :4] if cfg.TRANSFORM_KP_TO_BOX: gt_boxes = transform_kp_to_box(gt_kps, gt_boxes, cfg.TRAIN.VISUAL_ANCHORS_IMG) proposals = gt_boxes * cfg.TRAIN.VISUAL_ANCHORS_IMG_SCALE scores = cfg.TRAIN.ANNOINFOS[:, 4].reshape([1, 1]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # print blob.shape top[0].reshape(*(blob.shape)) top[0].data[...] = blob # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores if len(top) > 2: top[2].reshape(*(rois_rpn_nos.shape)) top[2].data[...] = rois_rpn_nos
def forward(self, bottom, top): # prep incoming data========== rpn_boxes = bottom[0].data.copy() bbox_pred = bottom[1].data scores = bottom[2].data im_info = bottom[3].data[0] im_idx = int(bottom[4].data) im_data = bottom[5].data[0, :, :, :].transpose((1, 2, 0)).copy() m = self.meta im_id = self._image_id[im_idx] r_anno = self.r_anno[im_id] # prep done============ # prep blobs for forward blobs = {} s_classeme = [] s_rois = [] s_rois_encoded = [] o_classeme = [] o_rois = [] o_rois_encoded = [] relation_label = [] gt_boxes = [] if hasattr(r_anno, 'relationship'): rpn_boxes_img_coor = rpn_boxes[:, 1:5] / im_info[2] boxes = rpn_boxes_img_coor boxes = bbox_transform_inv(boxes, bbox_pred) boxes = clip_boxes( boxes, (im_info[0] / im_info[2], im_info[1] / im_info[2])) cv2.normalize(im_data, im_data, 255, 0, cv2.NORM_MINMAX) im_data = im_data.astype(np.uint8) origsz = (im_info[1] / im_info[2], im_info[0] / im_info[2]) im_data = cv2.resize(im_data, origsz) thresh_final = .5 res_locations = [] res_classemes = [] res_cls_confs = [] boxes_tosort = [] for j in xrange(1, 101): inds = np.where(scores[:, j] > .3)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], inds[:, np.newaxis])) \ .astype(np.float32, copy=False) # pred_boxes = clip_boxes(pred_boxes, im.shape) if len(cls_scores) <= 0: boxes_tosort.append(cls_dets) continue res_loc = np.hstack((cls_boxes, inds[:, np.newaxis])) res_classeme = scores[inds] res_cls_conf = np.column_stack( (np.zeros(cls_scores.shape[0]) + j, cls_scores)) keep = nms(cls_dets[:, :5], .3) # nms threshold cls_dets = cls_dets[keep, :] res_loc = res_loc[keep] res_classeme = res_classeme[keep] res_cls_conf = res_cls_conf[keep] res_classemes.extend(res_classeme) res_locations.extend(res_loc) res_cls_confs.extend(res_cls_conf) boxes_tosort.append(cls_dets) try: # final class confidence inds = np.where( np.array(res_cls_confs)[:, 1] > thresh_final)[0] classemes = np.array(res_classemes)[inds] locations = np.array(res_locations)[inds] cls_confs = np.array(res_cls_confs)[inds] # decide what to pass to top # limit max w, h = self.meta['train/' + im_id + '/w'][...], self.meta['train/' + im_id + '/h'][...] if not isinstance(r_anno.relationship, np.ndarray): r_anno.relationship = [r_anno.relationship] for r in xrange(len(r_anno.relationship)): if not hasattr(r_anno.relationship[r], 'phrase'): continue predicate = r_anno.relationship[r].phrase[1] ymin, ymax, xmin, xmax = r_anno.relationship[r].subBox sub_bbox = [xmin, ymin, xmax, ymax] gt_boxes.append(sub_bbox) ymin, ymax, xmin, xmax = r_anno.relationship[r].objBox obj_bbox = [xmin, ymin, xmax, ymax] gt_boxes.append(obj_bbox) overlaps = bbox_overlaps( np.ascontiguousarray([sub_bbox, obj_bbox], dtype=np.float), np.ascontiguousarray(locations, dtype=np.float)) if overlaps.shape[0] == 0: continue sub_sorted = overlaps[0].argsort()[-40:][::-1] obj_sorted = overlaps[1].argsort()[-40:][::-1] while len(sub_sorted) > 0 and overlaps[0][ sub_sorted[-1]] < .6: sub_sorted = sub_sorted[:-1] while len(obj_sorted) > 0 and overlaps[1][ obj_sorted[-1]] < .6: obj_sorted = obj_sorted[:-1] if len(sub_sorted) <= 0 or len(obj_sorted) <= 0: continue cnt = 0 for s in sub_sorted[:1]: # sub_idx: for o in obj_sorted[:1]: # obj_idx: if s != o and cnt < 20: sub_clsmemes = classemes[s] obj_clsmemes = classemes[o] sub_box_encoded = bbox_transform( np.array([[0, 0, w, h]]), np.array([locations[s]]))[0] obj_box_encoded = bbox_transform( np.array([[0, 0, w, h]]), np.array([locations[o]]))[0] relation = self.meta['meta/pre/name2idx/' + predicate][...] # all done, now we put forward s_classeme.append(sub_clsmemes) o_classeme.append(obj_clsmemes) s_rois.append(rpn_boxes[locations[s][-1]]) o_rois.append(rpn_boxes[locations[o][-1]]) s_rois_encoded.append(sub_box_encoded) o_rois_encoded.append(obj_box_encoded) relation_label.append(np.float32(relation)) cnt += 1 # final step copy all the stuff for forward blobs['s_classeme'] = np.array(s_classeme) blobs['o_classeme'] = np.array(o_classeme) blobs['s_rois'] = np.array(s_rois) blobs['o_rois'] = np.array(o_rois) blobs['s_rois_encoded'] = np.array(s_rois_encoded) blobs['o_rois_encoded'] = np.array(o_rois_encoded) blobs['relation_label'] = np.array(relation_label) except: blobs = self._prev_blob if blobs['s_classeme'].shape[0] == 0: blobs = self._prev_blob else: blobs = self._prev_blob visualize_gt(im_data, gt_boxes) visualize(im_data, boxes_tosort, rpn_boxes_img_coor, m, thresh_final) for blob_name, blob in blobs.iteritems(): top_ind = self._name_to_top_map[blob_name] # Reshape net's input blobs top[top_ind].reshape(*(blob.shape)) # Copy data into net's input blobs top[top_ind].data[...] = blob.astype(np.float32, copy=False) # this becomes a dummy for forward in case things fail if blobs['relation_label'][0] != -1: for blob_name, blob in blobs.iteritems(): blobs[blob_name] = blob[0, np.newaxis] if blob_name == 'relation_label': blobs[blob_name][...] = -1 self._prev_blob = blobs
def _generate_rpn_rois(self, scores, bbox_deltas, im_info): cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove small predicted boxes (we removed this step) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] # assert len(keep) == post_nms_topN, \ # '{} vs {}'.format(len(keep), post_nms_topN) proposals = proposals[keep, :] return proposals
def im_detect2(net, im, boxes=None): blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs # scores = net.blobs['cls_score'].data 1 else: # use softmax estimated probabilities # scores = blobs_out['cls_prob'] scores = net.blobs['cls_prob'].data.copy() if cfg.TEST.MASK_REG: rois_class_score = blobs_out['rois_class_score'] rois_class_ind = blobs_out['rois_class_ind'] rois_final = blobs_out['rois_final'] if cfg.TEST.BBOX_REG: box_deltas = net.blobs['bbox_pred'].data.copy() pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) if cfg.TEST.MASK_REG: masks_out = blobs_out[ 'mask_prob'] #Nx2x14x14 where N is number of boxess #print '------------------ MASKS OUT SHAPE: ', masks_out.shape #masks_out = masks_out[:, 1, :, :] # masks = Nx14x14 ## DO NOT remove #channel class else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] if cfg.TEST.MASK_REG: #return scores, pred_boxes, pred_boxes_before_clip, masks return rois_final, rois_class_score, rois_class_ind, masks_out, scores, pred_boxes else: return scores, pred_boxes
def predict(self, inputs): # class_prediction, inputs[0] (1, 38, 63, 24)) # box_encodings, inputs[1] (1, 38, 63, 48)) # image_shape print('input_0.shape=', inputs[0].shape) print('input_1.shape=', inputs[1].shape) print('input_2.shape=', inputs[2]) image_shape = inputs[2] scales = config.cfg.POSTPROCESSOR.SCALES aspect_ratios = config.cfg.POSTPROCESSOR.ASPECT_RATIOS height_stride = config.cfg.POSTPROCESSOR.HEIGHT_STRIDE width_stride = config.cfg.POSTPROCESSOR.WIDTH_STRIDE _num_anchors = len(scales) * len(aspect_ratios) print("_num_anchors:", _num_anchors) scores = inputs[0][:, :, :, _num_anchors:] bbox_deltas = inputs[1] # box bbox_deltas = bbox_deltas.reshape((-1, 4)) # scores scores = scores.reshape((-1, 1)) print("scores:", scores.shape) # anchors height, width = inputs[0].shape[1], inputs[0].shape[2] feature_map_shape_list = [(height, width)] anchors = generate_anchors( scales=[scale for scale in scales], aspect_ratios=[aspect_ratio for aspect_ratio in aspect_ratios], base_anchor_size=None, anchor_stride=[height_stride, width_stride], anchor_offset=None, feature_map_shape_list=feature_map_shape_list) pre_nms_topN = 6000 post_nms_topN = 100 nms_thresh = 0.699999988079 min_size = 16 # box_encodings, inputs[1] (1, 38, 63, 48)) # bbox_deltas:', (28728, 4) # clip_window:', array([ 0, 0, 600, 1002])) # tf clip_to_window print("============== clip_to_window ===================") proposals = bbox_transform_inv_tf(anchors, bbox_deltas) boxdecode = proposals clip_window = np.array([0, 0, height, width]) print("clip_window:", clip_window) # ('proposals_clip :', (1829, 4)) # proposals_clip = clip_to_window(proposals, clip_window) boxdecode = proposals im_info = np.array([height, width, 0]) proposals = proposals[:, (1, 0, 3, 2)] proposals = clip_boxes(proposals, im_info[:2]) print("proposals_clip clip_to_window :", proposals.shape) print("proposals_clip clip_to_window[0] :", proposals[0]) # array([ 0. , 0. , 37.36838, 15.30636], dtype=float32)) # ', array([ 0. , 0. , 15.30636, 37. ], dtype=float32)) # anchors[0]:', array([ 0. , 0. , 45.254834, 22.627417])) # proposals1 0:', array([11.60263 , 3.129001, 41.311607, 18.966888], dtype=float32)) # ('proposals1 1:', array([ 0.22217222, 1.6537127 , 100.95798 , 44.21667 ], # dtype=float32)) # im_info:', array([38, 63, 0])) # proposals = clip_boxes(proposals, im_info[:2]) print("proposals clip_boxes :", proposals.shape) print("proposals clip_boxes [0]:", proposals[0]) # im_info:[:2]', array([38, 63] print("im_info:[:2]", im_info[:2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] print("proposals3:", proposals.shape) # 'scores.shape1', (28728, 1 print("scores.shape1", scores.shape) scores = scores[keep] print("scores.shape2", scores.shape) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] # proposals4:', (6000, 4)) print("proposals4 pre:", proposals.shape) scores = scores[order] # TODO nms 方法是否重写 keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] # ('proposals5:', (100, 4)) print("proposals final:", proposals.shape) return proposals, boxdecode, anchors
def im_detect_array(net, imgs): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(imgs) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] images = [] for i in xrange(blobs['data'].shape[0]): images.append(np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[i]]], dtype=np.float32)) blobs['im_info'] = np.array(images) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) deviders = [] boxes = [] if cfg.TEST.HAS_RPN: assert len(im_scales) == len(imgs), "Only one scale per image implemented" rois = net.blobs['rois'].data.copy() for idx in xrange(im_scales.shape[0] - 1): deviders.append(np.searchsorted(rois[:, 0], idx + 1, 'left')) boxes = np.split(rois[:, 1:5], deviders) # unscale back to raw image space for idx in xrange(len(imgs)): boxes[idx] = boxes[idx] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = np.split(net.blobs['cls_score'].data, deviders) else: # use softmax estimated probabilities scores = np.split(blobs_out['cls_prob'], deviders) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = np.split(blobs_out['bbox_pred'], deviders) pred_boxes = [] for idx in xrange(im_scales.shape[0]): pred_boxes.append(bbox_transform_inv(boxes[idx], box_deltas[idx])) pred_boxes[idx] = clip_boxes(pred_boxes[idx], imgs[idx].shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] return scores, pred_boxes
def im_detect(feature_net, embed_net, recurrent_net, im, boxes=None, use_box_at = -1): """Detect object classes in an image given object proposals. Arguments: feature_net (caffe.Net): CNN model for extracting features embed_net (caffe.Net): A word embedding layer recurrent_net (caffe.Net): Recurrent model for generating captions and locations im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) use_box_at (int32): Use predicted box at a given timestep, default to the last one (use_box_at=-1) Returns: scores (ndarray): R x 1 array of object class scores pred_boxes (ndarray)): R x 4 array of predicted bounding boxes captions (list): length R list of list of word tokens (captions) """ # for bbox unnormalization bbox_mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS).reshape((1,4)) bbox_stds = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS).reshape((1,4)) blobs, im_scales = _get_blobs(im, boxes) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs feature_net.blobs['data'].reshape(*(blobs['data'].shape)) feature_net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) feature_net.forward(data = im_blob, im_info = blobs['im_info']) region_features = feature_net.blobs['region_features'].data.copy() rois = feature_net.blobs['rois'].data.copy() # detection scores scores = feature_net.blobs['cls_probs'].data[:,1].copy() # proposal boxes boxes = rois[:, 1:5] / im_scales[0] proposal_n = rois.shape[0] feat_args = {'input_features': region_features} opt_args = {} # global feature as an optional input: context if 'global_features' in feature_net.blobs and 'global_features' in recurrent_net.blobs: #changed according to the global feature shape opt_args['global_features'] = np.tile(feature_net.blobs['global_features'].data, (1,proposal_n,1)) bbox_pred_direct = ('bbox_pred' in feature_net.blobs) if bbox_pred_direct: # do greedy search captions, _, logprobs = _greedy_search(embed_net, recurrent_net, feat_args, opt_args, proposal_n, pred_bbox = False) #bbox target unnormalization box_offsets = feature_net.blobs['bbox_pred'].data else: captions, box_offsets, logprobs = _greedy_search(embed_net, recurrent_net, feat_args, opt_args, proposal_n, \ pred_bbox = True, use_box_at = use_box_at) #bbox target unnormalization box_deltas = box_offsets * bbox_stds + bbox_mean #do the transformation pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) return scores, pred_boxes, captions
def forward(self, bottom, top): # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN # (i.e., rpn.proposal_layer.ProposalLayer), or any other source all_rois = bottom[0].data aaa = all_rois[:] # GT boxes (x1, y1, x2, y2, label) # TODO(rbg): it's annoying that sometimes I have extra info before # and other times after box coordinates -- normalize to one format gt_boxes = bottom[1].data im = bottom[2].data # Include ground-truth boxes in the set of candidate rois zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) all_rois = np.vstack( (all_rois, np.hstack((zeros, gt_boxes[:, :-1]))) ) num_images = 1 rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) rois, labels, bbox_targets, bbox_weights ,layer_indexs = _sample_rois( all_rois, gt_boxes, fg_rois_per_image, rois_per_image, self._num_classes,sample_type='fpn', k0 = 4) vis =False if vis: ind = np.where(labels!=0)[0] im_shape = im.shape means = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (21, 1)).ravel() stds = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (21, 1)).ravel() bbox_targets = bbox_targets*stds +means pred_boxes = bbox_transform_inv(rois[:,1:], bbox_targets) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) l =labels[ind] ro = rois[ind,1:] b = bbox_targets[ind,:] p = pred_boxes[ind,:]*bbox_weights[ind,:] r = [] for i in range(p.shape[0]): r.append(p[i,l[i]*4:l[i]*4+4]) r_ = np.vstack(r) # Optionally normalize targets by a precomputed mean and stdev vis_all_detection(im, aaa[:,1:], l, 1) rois_ = np.zeros((self._batch_rois*4, 5), dtype=rois.dtype) labels_all = np.ones((self._batch_rois*4, ), dtype=labels.dtype)*-1 bbox_targets_all = np.zeros((self._batch_rois*4, self._num_classes * 4), dtype=bbox_targets.dtype) bbox_weights_all = np.zeros((self._batch_rois*4, self._num_classes * 4), dtype=bbox_weights.dtype) rois_all =[] for i in range(4): index = (layer_indexs == (i + 2)) num_index = sum(index) start = self._batch_rois*i end = start+num_index index_range = range(start, end) rois_[index_range, :] = rois[index, :] rois_all.append(rois_[range(start,start + self._batch_rois), :]) labels_all[index_range] = labels[index] bbox_targets_all[index_range,:] = bbox_targets[index, :] bbox_weights_all[index_range,:] = bbox_weights[index, :] rois_p2 = rois_all[0] rois_p3 = rois_all[1] rois_p4 = rois_all[2] rois_p5 = rois_all[3] top[0].reshape(*rois_p2.shape) top[0].data[...] = rois_p2 top[1].reshape(*rois_p3.shape) top[1].data[...] = rois_p3 top[2].reshape(*rois_p4.shape) top[2].data[...] = rois_p4 top[3].reshape(*rois_p5.shape) top[3].data[...] = rois_p5 # classification labels top[4].reshape(*labels_all.shape) top[4].data[...] = labels_all # bbox_targets top[5].reshape(*bbox_targets_all.shape) top[5].data[...] = bbox_targets_all # bbox_inside_weights top[6].reshape(*bbox_weights_all.shape) top[6].data[...] = bbox_weights_all # bbox_outside_weights top[7].reshape(*bbox_weights_all.shape) top[7].data[...] = np.array(bbox_weights_all > 0).astype(np.float32)
def __call__(self, x, bbox_deltas, im_info): if isinstance(bbox_deltas.data, chainer.cuda.ndarray): bbox_deltas = chainer.cuda.to_cpu(bbox_deltas.data) if isinstance(x.data, chainer.cuda.ndarray): x = chainer.cuda.to_cpu(x.data) assert x.shape[0] == 1, 'Only single item batches are supported' if self.train: pre_nms_topN = self.TRAIN_RPN_PRE_NMS_TOP_N post_nms_topN = self.TRAIN_RPN_POST_NMS_TOP_N nms_thresh = self.TRAIN_RPN_NMS_THRESH min_size = self.TRAIN_RPN_MIN_SIZE else: pre_nms_topN = self.TEST_RPN_PRE_NMS_TOP_N post_nms_topN = self.TEST_RPN_POST_NMS_TOP_N nms_thresh = self.TEST_RPN_NMS_THRESH min_size = self.TEST_RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = x[:, self.num_anchors:, :, :] # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] # Enumerate all shifts shift_x = np.arange(0, width) * self.feat_stride shift_y = np.arange(0, height) * self.feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self.num_anchors K = shifts.shape[0] anchors = self.anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) blob = chainer.cuda.cupy.asarray(blob, np.float32) rois = chainer.Variable(blob, volatile=not self.train) return rois
def im_detect(net, im, boxes=None,num_classes=21): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] index= np.where(np.sum(boxes,axis=1)!=0)[0] boxes = boxes[index,:] # / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # use softmax estimated probabilities scores = blobs_out['cls_prob'] scores = scores[index] # print scores[0:10] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] box_deltas = box_deltas[index,:] if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: means = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)).ravel() stds = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)).ravel() # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas * stds + means # print boxes.shape,box_deltas.shape pred_boxes = bbox_transform_inv(boxes, box_deltas) s = (blobs['data'].astype(np.float32, copy=False).shape[2],blobs['data'].astype(np.float32, copy=False).shape[3],blobs['data'].astype(np.float32, copy=False).shape[1]) pred_boxes = clip_boxes(pred_boxes, s) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] vis = False if vis: vis_rois_detection(blobs['data'].astype(np.float32, copy=False),pred_boxes/ im_scales[0]) return scores, pred_boxes/ im_scales[0]
def proposal_layer_3d(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, calib, cfg_key, _feat_stride=[ 8, ], anchor_scales=[1.0, 1.0]): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) _anchors = generate_anchors_bv() # _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] #print "aaaaaaa",_anchors.shape (4,4) #print "bbbbbbb",im_info (601,601,1) #print "ccccccc", calib.shape (4,12) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # print rpn_cls_prob_reshape.shape height, width = rpn_cls_prob_reshape.shape[1:3] # scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] scores = np.reshape( np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:, :, :, :, 1], [1, height, width, _num_anchors]) bbox_deltas = rpn_bbox_pred if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order # bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 6)) bbox_deltas = bbox_deltas.reshape((-1, 6)) # print "bbox_deltas",bbox_deltas.shape # print anchors.shape # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) # scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) scores = scores.reshape((-1, 1)) # print np.sort(scores.ravel())[-30:] # convert anchors bv to anchors_3d anchors_3d = bv_anchor_to_lidar(anchors) # Convert anchors into proposals via bbox transformations proposals_3d = bbox_transform_inv_3d(anchors_3d, bbox_deltas) # convert back to lidar_bv proposals_bv = lidar_3d_to_bv(proposals_3d) lidar_corners = lidar_3d_to_corners(proposals_3d) proposals_img = lidar_cnr_to_img(lidar_corners, calib[3], calib[2], calib[0]) if DEBUG: # print "bbox_deltas: ", bbox_deltas[:10] # print "proposals number: ", proposals_3d[:10] print "proposals_bv shape: ", proposals_bv.shape print "proposals_3d shape: ", proposals_3d.shape print "proposals_img shape:", proposals_img.shape # 2. clip predicted boxes to image proposals_bv = clip_boxes(proposals_bv, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals_bv, min_size * im_info[2]) proposals_bv = proposals_bv[keep, :] proposals_3d = proposals_3d[keep, :] proposals_img = proposals_img[keep, :] scores = scores[keep] # TODO: pass real image_info keep = _filter_img_boxes(proposals_img, [375, 1242]) proposals_bv = proposals_bv[keep, :] proposals_3d = proposals_3d[keep, :] proposals_img = proposals_img[keep, :] scores = scores[keep] if DEBUG: print "proposals after clip" print "proposals_bv shape: ", proposals_bv.shape print "proposals_3d shape: ", proposals_3d.shape print "proposals_img shape: ", proposals_img.shape # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals_bv = proposals_bv[order, :] proposals_3d = proposals_3d[order, :] proposals_img = proposals_img[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals_bv, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals_bv = proposals_bv[keep, :] proposals_3d = proposals_3d[keep, :] proposals_img = proposals_img[keep, :] scores = scores[keep] if DEBUG: print "proposals after nms" print "proposals_bv shape: ", proposals_bv.shape print "proposals_3d shape: ", proposals_3d.shape # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals_bv.shape[0], 1), dtype=np.float32) blob_bv = np.hstack((batch_inds, proposals_bv.astype(np.float32, copy=False))) blob_img = np.hstack( (batch_inds, proposals_img.astype(np.float32, copy=False))) blob_3d = np.hstack((batch_inds, proposals_3d.astype(np.float32, copy=False))) if DEBUG: print "blob shape ====================:" print blob_bv.shape print blob_img.shape # print '3d', blob_3d[:10] # print lidar_corners[:10] # print 'bv', blob_bv[:10] # print 'img', blob_img[:10] return blob_bv, blob_img, blob_3d
def forward_t(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) cfg_key = str( 'TRAIN' if self.phase == 0 else 'TEST') # either 'TRAIN' or 'TEST' # cfg_key = 'TRAIN' pre_nms_topN = cfg[cfg_key].Frozen_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].Frozen_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].Frozen_NMS_THRESH min_size = cfg[cfg_key].Frozen_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # scores = bottom[0].data[:, 1].reshape(-1, 1) # bbox_deltas = bottom[1].data[:, 4:] im_info = bottom[6].data[0, :] # rois = bottom[3].data[:, 1:5] # RON rois = np.zeros((0, 4), dtype=np.float32) scores = np.zeros((0, 1), dtype=np.float32) # 2 class RPN_NO_sum = len(cfg.MULTI_SCALE_RPN_NO) for rpn_no in range(RPN_NO_sum): rois = np.concatenate((rois, bottom[rpn_no].data[0]), axis=0) scores = np.concatenate( (scores, bottom[rpn_no + RPN_NO_sum].data[0]), axis=0) # reshape rois (-1, 4) if len(rois.shape) == 4: rois = rois.reshape(rois.shape[0], rois.shape[1]) if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors # Convert anchors into proposals via bbox transformations # proposals = bbox_transform_inv(rois, bbox_deltas) proposals = rois # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, cfg.TEST.RON_MIN_SIZE) # min_size * im_info[2] proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if cfg[cfg_key].Frozen_NMS: nms_keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: nms_keep = nms_keep[:post_nms_topN] proposals = proposals[nms_keep, :] scores = scores[nms_keep] # concat several groups of proposals from other rpn maps # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # print blob.shape top[0].reshape(*(blob.shape)) top[0].data[...] = blob # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores
def im_detect(net, im, feat_blob, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) feat_blob (str): name of the feature blob to be extracted boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes features (ndarray): R x D array of features """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # the last column of the pid_prob is the non-person box score scores = blobs_out['pid_prob'][:, -1] scores = scores[:, np.newaxis] scores = np.hstack([scores, 1. - scores]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) features = net.blobs[feat_blob].data.copy() if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] features = features[inv_index, :] return scores, pred_boxes, features
def _im_detect(net, im, roidb, blob_names=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) roidb (an roidb item): to provide gt_boxes if necessary blob_names (list of str): list of feature blob names to be extracted Returns: boxes (ndarray): R x (4*K) array of predicted bounding boxes scores (ndarray): R x K array of object class scores (K includes background as object category 0) features (dict of ndarray): {blob name: R x D array of features} """ im_blob, im_scales = get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" blobs = { 'data': im_blob, 'im_info': np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32), } if 'gt_boxes' in net.blobs: # Supply gt_boxes as input. Used to get pid_labels for proposals. blobs['gt_boxes'] = get_gt_boxes_blob( roidb['boxes'], roidb['gt_classes'], roidb['gt_pids'], im_scales) # reshape network inputs for k, v in blobs.iteritems(): net.blobs[k].reshape(*(v.shape)) # do forward forward_kwargs = {k: v.astype(np.float32, copy=False) for k, v in blobs.iteritems()} blobs_out = net.forward(**forward_kwargs) # unscale rois back to raw image space rois = net.blobs['rois'].data.copy() boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # the first column of the pid_prob is the non-person box score scores = blobs_out['pid_prob'][:, 0] scores = scores[:, np.newaxis] scores = np.hstack([scores, 1. - scores]) if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] # As we no longer scale and shift the bbox_pred weights when snapshot, # we need to manually do this during test. if cfg.TRAIN.BBOX_NORMALIZE_TARGETS and \ cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: num_classes = box_deltas.shape[1] // 4 stds = np.tile(cfg.TRAIN.BBOX_NORMALIZE_STDS, num_classes) means = np.tile(cfg.TRAIN.BBOX_NORMALIZE_MEANS, num_classes) box_deltas = box_deltas * stds + means boxes = bbox_transform_inv(boxes, box_deltas) boxes = clip_boxes(boxes, im.shape) else: # Simply repeat the boxes, once for each class boxes = np.tile(boxes, (1, scores.shape[1])) features = {blob: net.blobs[blob].data.copy() for blob in blob_names} \ if blob_names is not None else {} return boxes, scores, features
def im_detect(net, im, boxes=None, extra_boxes=np.zeros((0, 4), dtype=np.float32), dc_boxes=np.zeros((0, 4), dtype=np.float32)): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) if extra_boxes.shape[0] > 0: assert cfg.TEST.EXTERNAL_ROIS == True, "To use external proposals, the proper \ configuration parameter must be set" # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: assert cfg.TEST.EXTERNAL_ROIS == False v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) if cfg.TEST.EXTERNAL_ROIS: net.blobs['extra_rois'].reshape(*(extra_boxes.shape)) sc_extra_boxes, _ = _project_im_rois(extra_boxes, im_scales) net.blobs['dc_rois'].reshape(*(dc_boxes.shape)) sc_dc_boxes, _ = _project_im_rois(dc_boxes, im_scales) else: assert cfg.TEST.EXTERNAL_ROIS == False net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) if cfg.TEST.EXTERNAL_ROIS: forward_kwargs['extra_rois'] = sc_extra_boxes forward_kwargs['dc_rois'] = sc_dc_boxes else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # use softmax estimated probabilities scores = blobs_out['cls_prob'] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.VIEWPOINTS: try: viewpoints = blobs_out['viewpoint_pred'] except KeyError, e: viewpoints = blobs_out['viewpoints_pd'] except:
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want # scores = bottom[0].data[:, self._num_anchors:, :, :] # bbox_deltas = bottom[1].data im_info = bottom[0].data[0, :] cls_prob_dict = { 'stride32': bottom[8].data, 'stride16': bottom[7].data, 'stride8': bottom[6].data, 'stride4': bottom[5].data, } bbox_pred_dict = { 'stride32': bottom[4].data, 'stride16': bottom[3].data, 'stride8': bottom[2].data, 'stride4': bottom[1].data, } if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors proposal_list = [] score_list = [] for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=np.array(self._scales), ratios=self._ratios) scores = cls_prob_dict['stride' + str(s)][:, self._num_anchors:, :, :] bbox_deltas = bbox_pred_dict['stride' + str(s)] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes #height, width = int(im_info[0] / stride), int(im_info[1] / stride) height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] proposal_list.append(proposals) score_list.append(scores) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) proposals = np.vstack(proposal_list) scores = np.vstack(score_list) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] #print "keep len is ", len(keep) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 w = (proposals[:, 2] - proposals[:, 0]) h = (proposals[:, 3] - proposals[:, 1]) s = w * h s[s <= 0] = 1e-6 # layer_index = np.floor(k0+np.log2(np.sqrt(s)/224)) image_area = im_info[0] * im_info[1] alpha = np.sqrt(h * w) / (224.0 / np.sqrt(image_area)) layer_index_ = np.log(alpha) / np.log(2.0) layer_index = [] for i in layer_index_: layer_index.append( np.min([5, np.max([2, 4 + np.round(i).astype(np.int32)])])) layer_index[layer_index < 2] = 2 layer_index[layer_index > 5] = 5 layer_indexs = np.array(layer_index) rois_layers = [] for i in xrange(4): index = (layer_indexs == (i + 2)) if np.any(index) == False: rois_layers.append(np.array([])) else: rois_layers.append(proposals[index, :]) for i in xrange(4): if len(rois_layers[i]) == 0: index = i if index - 1 >= 0 and rois_layers[index - 1].shape[0] > 1: len_rois_layers = rois_layers[index - 1].shape[0] rois_layers[i] = rois_layers[index - 1][len_rois_layers - 1, :].reshape( 1, 4) rois_layers[index - 1] = rois_layers[index - 1][0:len_rois_layers - 1, :] elif index + 1 < 4 and rois_layers[index + 1].shape[0] > 1: rois_layers[i] = rois_layers[index + 1][0, :].reshape(1, 4) rois_layers[index + 1] = rois_layers[index + 1][1:, :] elif index - 2 >= 0 and rois_layers[index - 2].shape[0] > 1: len_rois_layers = rois_layers[index - 2].shape[0] # print len_rois_layers,'eeeeeeeeeeeee',index rois_layers[i] = rois_layers[index - 1][0, :].reshape(1, 4) rois_layers[index - 1] = rois_layers[index - 2][len_rois_layers - 1, :].reshape( 1, 4) # rois_layers[i]=rois_layers[index-2][0,:].reshape(1,5) rois_layers[index - 2] = rois_layers[index - 2][0:len_rois_layers - 1, :] elif index + 2 < 4 and rois_layers[index + 2].shape[0] > 1: # print rois_layers[index+1] # print rois_layers[index+1][0,:] # print rois_layers[index+2][0,:] # print rois_layers[index+2] if rois_layers[index + 1].shape[0] == 0: rois_layers[i + 1] = rois_layers[index + 2][1, :].reshape( 1, 4) rois_layers[i] = rois_layers[index + 2][0, :].reshape( 1, 4) rois_layers[index + 2] = rois_layers[index + 2][2:, :] else: rois_layers[i] = rois_layers[index + 1][0, :].reshape( 1, 4) rois_layers[i + 1] = rois_layers[index + 2][0, :].reshape( 1, 4) rois_layers[index + 2] = rois_layers[index + 2][1:, :] elif index - 3 >= 0 and rois_layers[index - 3].shape[0] > 1: len_rois_layers = rois_layers[index - 3].shape[0] # print len_rois_layers,'ddddddddddddd',index rois_layers[i] = rois_layers[index - 1][0, :].reshape(1, 4) rois_layers[index - 1] = rois_layers[index - 2][0, :].reshape( 1, 4) rois_layers[index - 2] = rois_layers[index - 3][len_rois_layers - 1, :] # rois_layers[i]=rois_layers[index-2][0,:].reshape(1,5) rois_layers[index - 3] = rois_layers[index - 3][0:len_rois_layers - 1, :] elif index + 3 < 4 and rois_layers[index + 3].shape[0] > 1: len_rois_layers = rois_layers[index + 3].shape[0] rois_layers[i] = rois_layers[index + 1][0, :].reshape(1, 4) rois_layers[index + 1] = rois_layers[index + 2][0, :].reshape( 1, 4) rois_layers[index + 2] = rois_layers[index + 3][0, :].reshape( 1, 4) # rois_layers[i]=rois_layers[index-2][0,:].reshape(1,5) rois_layers[index + 3] = rois_layers[index + 3][1:, :] # [Optional] output scores blob # if len(top) > 1: # top[1].reshape(*(scores.shape)) # top[1].data[...] = scores rpn_rois = np.zeros((proposals.shape[0], proposals.shape[1]), dtype=np.float32) count = 0 for i in xrange(4): batch_inds_i = np.zeros((rois_layers[i].shape[0], 1), dtype=np.float32) blob_i = np.hstack( (batch_inds_i, rois_layers[i].astype(np.float32, copy=False))) top[i].reshape(*(blob_i.shape)) top[i].data[...] = blob_i rpn_rois[count:rois_layers[i].shape[0] + count, :] = rois_layers[i] count += rois_layers[i].shape[0] batch_inds = np.zeros((rpn_rois.shape[0], 1), dtype=np.float32) blob_rpn_rois = np.hstack( (batch_inds, rpn_rois.astype(np.float32, copy=False))) top[4].reshape(*(blob_rpn_rois.shape)) top[4].data[...] = blob_rpn_rois
def im_detect(net, im, boxes=None, num_classes=21): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] index = np.where(np.sum(boxes, axis=1) != 0)[0] boxes = boxes[index, :] # / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # use softmax estimated probabilities scores = blobs_out['cls_prob'] scores = scores[index] # print scores[0:10] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] box_deltas = box_deltas[index, :] if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)).ravel() stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)).ravel() # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas * stds + means # print boxes.shape,box_deltas.shape pred_boxes = bbox_transform_inv(boxes, box_deltas) s = (blobs['data'].astype(np.float32, copy=False).shape[2], blobs['data'].astype(np.float32, copy=False).shape[3], blobs['data'].astype(np.float32, copy=False).shape[1]) pred_boxes = clip_boxes(pred_boxes, s) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] vis = False if vis: vis_rois_detection(blobs['data'].astype(np.float32, copy=False), pred_boxes / im_scales[0]) return scores, pred_boxes / im_scales[0]
def im_detect(net, im, boxes=None): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ blobs, im_scales = _get_blobs(im, boxes) # When mapping from image ROIs to feature map ROIs, there's some aliasing # (some distinct image ROIs get mapped to the same feature ROI). # Here, we identify duplicate feature ROIs, so we only compute features # on the unique subset. # if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # v = np.array([1, 1e3, 1e6, 1e9, 1e12]) # hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) # _, index, inv_index = np.unique(hashes, return_index=True, # return_inverse=True) # blobs['rois'] = blobs['rois'][index, :] # boxes = boxes[index, :] if cfg.TEST.HAS_RPN: im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) if cfg.TEST.HAS_RPN: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) else: net.blobs['rois'].reshape(*(blobs['rois'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} if cfg.TEST.HAS_RPN: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) else: forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) if cfg.TEST.HAS_RPN: assert len(im_scales) == 1, "Only single-image batch implemented" rois = net.blobs['rois'].data.copy() # unscale back to raw image space boxes = rois[:, 1:5] / im_scales[0] if cfg.TEST.SVM: # use the raw scores before softmax under the assumption they # were trained as linear SVMs scores = net.blobs['cls_score'].data else: # use softmax estimated probabilities scores = blobs_out['cls_prob'] # if cfg.TEST.BBOX_REG: if False: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN: # # Map scores and predictions back to the original set of boxes # scores = scores[inv_index, :] # pred_boxes = pred_boxes[inv_index, :] fc7 = net.blobs['fc7'].data return net.blobs['cls_score'].data[:, :], scores, fc7, pred_boxes
def test_net(net, imdb, vis = 0): """Test RON network on an image database.""" num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} for i in xrange(0, num_images, cfg.TEST.BATCH_SIZE): _t['misc'].tic() ims = [] for im_i in xrange(cfg.TEST.BATCH_SIZE): im = cv2.imread(imdb.image_path_at(i+im_i)) ims.append(im) _t['im_detect'].tic() batch_scores, batch_boxes = im_detect(net,ims) _t['im_detect'].toc() for im_i in xrange(cfg.TEST.BATCH_SIZE): im = ims[im_i] scores = batch_scores[im_i] boxes = batch_boxes[im_i] # filter boxes according to prob scores keeps = np.where(scores[:,0] > cfg.TEST.PROB)[0] scores = scores[keeps, :] boxes = boxes[keeps, :] # change boxes according to input size and the original image size im_shape = im.shape[0:2] im_scales = float(cfg.TEST.SCALES[0]) / np.array(im_shape) boxes[:, 0::2] = boxes[:, 0::2] / im_scales[1] boxes[:, 1::2] = boxes[:, 1::2] / im_scales[0] # filter boxes with small sizes boxes = clip_boxes(boxes, im_shape) keep = filter_boxes(boxes, cfg.TEST.RON_MIN_SIZE ) scores = scores[keep,:] boxes = boxes[keep, :] scores = np.tile(scores[:, 0], (imdb.num_classes, 1)).transpose() * scores for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > cfg.TEST.DET_MIN_PROB)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, :] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if len(keep) > cfg.TEST.BOXES_PER_CLASS: cls_dets = cls_dets[:cfg.TEST.BOXES_PER_CLASS,:] all_boxes[j][i+im_i] = cls_dets if vis: vis_detections(im, imdb.classes[j], cls_dets) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' cfg_key = self.phase # either 'TRAIN' or 'TEST' if cfg_key == 0: cfg_ = cfg.TRAIN else: cfg_ = cfg.TEST pre_nms_topN = cfg_.RPN_PRE_NMS_TOP_N post_nms_topN = cfg_.RPN_POST_NMS_TOP_N nms_thresh = cfg_.RPN_NMS_THRESH min_size = cfg_.RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores
def forward(self, bottom, top): cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = self._min_sizes # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want im_info = bottom[0].data[0, :] batch_size = bottom[1].data.shape[0] if batch_size > 1: raise ValueError("Sorry, multiple images each device is not implemented") cls_prob_dict = { 'stride64': bottom[10].data, 'stride32': bottom[9].data, 'stride16': bottom[8].data, 'stride8': bottom[7].data, 'stride4': bottom[6].data, } bbox_pred_dict = { 'stride64': bottom[5].data, 'stride32': bottom[4].data, 'stride16': bottom[3].data, 'stride8': bottom[2].data, 'stride4': bottom[1].data, } proposal_list = [] score_list = [] for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) scores = cls_prob_dict['stride' + str(s)][:, self._num_anchors:, :, :] bbox_deltas = bbox_pred_dict['stride' + str(s)] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / stride), int(im_info[1] / stride) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = _clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = _clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] proposal_list.append(proposals) score_list.append(scores) proposals = np.vstack(proposal_list) scores = np.vstack(score_list) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det,nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: try: pad = npr.choice(keep, size=post_nms_topN - len(keep)) except: proposals = np.zeros((post_nms_topN, 4), dtype=np.float32) proposals[:,2] = 16 proposals[:,3] = 16 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob return keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if is_train: top[0].reshape(*(blob.shape)) top[0].data[...] = blob
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE self._num_anchors = bottom[0].shape[0] / 2 # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[1].data[:, 1:, :, :] sio.savemat('scores',{'scores':scores}) bbox_deltas = bottom[2].data im_info = bottom[3].data[0, :] anchors = bottom[0].data # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas[0, :, :, 0] bbox_deltas = bbox_deltas.reshape((-1, 4)) #bbox_deltas = bbox_deltas.transpose(1, 0) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) top[0].reshape(*(blob.shape)) top[0].data[...] = blob # [Optional] output scores blob if len(top) > 1: top[1].reshape(*(scores.shape)) top[1].data[...] = scores