def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors, is_tfchannel=False): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH im_info = im_info[0] # from IPython import embed; embed() # Get the scores and bounding boxes if is_tfchannel: scores = rpn_cls_prob.reshape(-1, 2) scores = scores[:, 1] else: scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) #if cfg_key == 'TRAIN' and 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \ if 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \ and cfg.TRAIN.RPN_NORMALIZE_TARGETS: rpn_bbox_pred *= cfg.TRAIN.RPN_NORMALIZE_STDS rpn_bbox_pred += cfg.TRAIN.RPN_NORMALIZE_MEANS scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) #filter boxes if 'RPN_MIN_SIZE' in cfg[cfg_key].keys(): min_size = cfg[cfg_key].RPN_MIN_SIZE if min_size > 0: keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores.flatten()
def inference(val_func, inputs, data_dict): image = data_dict['data'] ori_shape = image.shape if config.eval_resize == False: resized_img, scale = image, 1 else: resized_img, scale = dataset.resize_img_by_short_and_max_size( image, config.eval_image_short_size, config.eval_image_max_size) height, width = resized_img.shape[0:2] resized_img = resized_img.astype(np.float32) - config.image_mean resized_img = np.ascontiguousarray(resized_img[:, :, [2, 1, 0]]) im_info = np.array([[height, width, scale, ori_shape[0], ori_shape[1], 0]], dtype=np.float32) feed_dict = {inputs[0]: resized_img[None, :, :, :], inputs[1]: im_info} print('fd:\n', feed_dict) #st = time.time() _, scores, pred_boxes, rois = val_func(feed_dict=feed_dict) #ed = time.time() #print(ed -st) boxes = rois[:, 1:5] / scale if cfg.TEST.BBOX_REG: pred_boxes = bbox_transform_inv(boxes, pred_boxes) pred_boxes = clip_boxes(pred_boxes, ori_shape) pred_boxes = pred_boxes.reshape(-1, config.num_classes, 4) result_boxes = [] for j in range(1, config.num_classes): inds = np.where(scores[:, j] > config.test_cls_threshold)[0] cls_scores = scores[inds, j] cls_bboxes = pred_boxes[inds, j, :] cls_dets = np.hstack( (cls_bboxes, cls_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(cls_dets, config.test_nms) cls_dets = np.array(cls_dets[keep, :], dtype=np.float, copy=False) for i in range(cls_dets.shape[0]): db = cls_dets[i, :] dbox = DetBox(db[0], db[1], db[2] - db[0], db[3] - db[1], tag=config.class_names[j], score=db[-1]) result_boxes.append(dbox) if len(result_boxes) > config.test_max_boxes_per_image: result_boxes = sorted( result_boxes, reverse=True, key=lambda t_res: t_res.score) \ [:config.test_max_boxes_per_image] result_dict = data_dict.copy() result_dict['result_boxes'] = result_boxes return result_dict
def proposal_without_nms_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, feat_stride, anchors, num_anchors, is_tfchannel=False): if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') if cfg_key == 'TRAIN': pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N else: pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N im_info = im_info[0] # Get the scores and bounding boxes if is_tfchannel: scores = rpn_cls_prob.reshape(-1, 2) scores = scores[:, 1] else: scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) if 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \ and cfg.TRAIN.RPN_NORMALIZE_TARGETS: rpn_bbox_pred *= cfg.TRAIN.RPN_NORMALIZE_STDS rpn_bbox_pred += cfg.TRAIN.RPN_NORMALIZE_MEANS scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # filter boxes min_size = 0 if cfg_key == 'TRAIN': if 'RPN_MIN_SIZE' in cfg.TRAIN.keys(): min_size = cfg.TRAIN.RPN_MIN_SIZE elif cfg_key == 'TEST': if 'RPN_MIN_SIZE' in cfg.TEST.keys(): min_size = cfg.TEST.RPN_MIN_SIZE if min_size > 0: keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order].flatten() ##why add one, because tf nms assume x2,y2 does not include border proposals_addone = np.array(proposals) proposals_addone[:, 2] += 1 proposals_addone[:, 3] += 1 return proposals, scores, proposals_addone
def inference(val_func, inputs, data_dict): image = data_dict['data'] ori_shape = image.shape if config.eval_resize == False: resized_img, scale = image, 1 else: resized_img, scale = dataset.resize_img_by_short_and_max_size( image, config.eval_image_short_size, config.eval_image_max_size) height, width = resized_img.shape[0:2] resized_img = resized_img.astype(np.float32) - config.image_mean resized_img = np.ascontiguousarray(resized_img[:, :, [2, 1, 0]]) im_info = np.array( [[height, width, scale, ori_shape[0], ori_shape[1], 0]], dtype=np.float32) feed_dict = {inputs[0]: resized_img[None, :, :, :], inputs[1]: im_info} #st = time.time() _, scores, pred_boxes, rois = val_func(feed_dict=feed_dict) #ed = time.time() #print(ed -st) boxes = rois[:, 1:5] / scale if cfg.TEST.BBOX_REG: pred_boxes = bbox_transform_inv(boxes, pred_boxes) pred_boxes = clip_boxes(pred_boxes, ori_shape) pred_boxes = pred_boxes.reshape(-1, config.num_classes, 4) result_boxes = [] for j in range(1, config.num_classes): inds = np.where(scores[:, j] > config.test_cls_threshold)[0] cls_scores = scores[inds, j] cls_bboxes = pred_boxes[inds, j, :] cls_dets = np.hstack((cls_bboxes, cls_scores[:, np.newaxis])).astype( np.float32, copy=False) keep = nms(cls_dets, config.test_nms) cls_dets = np.array(cls_dets[keep, :], dtype=np.float, copy=False) for i in range(cls_dets.shape[0]): db = cls_dets[i, :] dbox = DetBox( db[0], db[1], db[2] - db[0], db[3] - db[1], tag=config.class_names[j], score=db[-1]) result_boxes.append(dbox) if len(result_boxes) > config.test_max_boxes_per_image: result_boxes = sorted( result_boxes, reverse=True, key=lambda t_res: t_res.score) \ [:config.test_max_boxes_per_image] result_dict = data_dict.copy() result_dict['result_boxes'] = result_boxes return result_dict
def coco_results_one_category_kernel(data_pack): cat_id = data_pack['cat_id'] ann_type = data_pack['ann_type'] binary_thresh = data_pack['binary_thresh'] all_im_info = data_pack['all_im_info'] boxes = data_pack['boxes'] if ann_type == 'bbox': masks = [] elif ann_type == 'segm': masks = data_pack['masks'] else: print('unimplemented ann_type: ' + ann_type) cat_results = [] for im_ind, im_info in enumerate(all_im_info): index = im_info['index'] dets = boxes[im_ind].astype(np.float) from IPython import embed embed() # if len(masks[im_ind]) != dets.shape[0]: # masks[im_ind] = masks[im_ind][:dets.shape[0]] print(dets.shape, len(masks[im_ind])) if len(dets) == 0: continue scores = dets[:, -1] if ann_type == 'bbox': xs = dets[:, 0] ys = dets[:, 1] ws = dets[:, 2] - xs + 1 hs = dets[:, 3] - ys + 1 result = [{ 'image_id': index, 'category_id': cat_id, 'bbox': [xs[k], ys[k], ws[k], hs[k]], 'score': scores[k] } for k in range(dets.shape[0])] elif ann_type == 'segm': width = im_info['width'] height = im_info['height'] dets[:, :4] = clip_boxes(dets[:, :4], [height, width]) mask_encode = mask_voc2coco(masks[im_ind], dets[:, :4], height, width, binary_thresh) result = [{ 'image_id': index, 'category_id': cat_id, 'segmentation': mask_encode[k], 'score': scores[k] } for k in range(len(mask_encode))] cat_results.extend(result) return cat_results
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors): """A layer that just selects the top region proposals without using non-maximal suppression, For details please see the technical report """ rpn_top_n = cfg.TEST.RPN_TOP_N im_info = im_info[0] scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) scores = scores.reshape((-1, 1)) length = scores.shape[0] if length < rpn_top_n: # Random selection, maybe unnecessary and loses good proposals # But such case rarely happens top_inds = npr.choice(length, size=rpn_top_n, replace=True) else: top_inds = scores.argsort(0)[::-1] top_inds = top_inds[:rpn_top_n] top_inds = top_inds.reshape(rpn_top_n, ) # Do the selection here anchors = anchors[top_inds, :] rpn_bbox_pred = rpn_bbox_pred[top_inds, :] scores = scores[top_inds] # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, rpn_bbox_pred) # Clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) #from IPython import embed; embed() return blob, scores
def proposal_without_nms_layer(rpn_cls_prob_fg, rpn_bbox_pred, im_info, anchors): pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N im_info = im_info[0] scores = rpn_cls_prob_fg scores = scores.reshape((-1, 1)) rpn_bbox_pred[:, 2:4] = np.minimum(20, rpn_bbox_pred[:, 2:4]) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order].flatten() ##why add one, because tf nms assume x2,y2 does not include border proposals_addone = np.array(proposals) proposals_addone[:, 2] += 1 proposals_addone[:, 3] += 1 return proposals, scores, proposals_addone, order
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors, is_tfchannel=False): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') if cfg_key == 'TRAIN': pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N post_nms_topN = cfg.TRAIN.RPN_POST_NMS_TOP_N nms_thresh = cfg.TRAIN.RPN_NMS_THRESH else: pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N nms_thresh = cfg.TEST.RPN_NMS_THRESH im_info = im_info[0] # from IPython import embed; embed() # Get the scores and bounding boxes if is_tfchannel: scores = rpn_cls_prob.reshape(-1, 2) scores = scores[:, 1] else: scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) # if cfg_key == 'TRAIN' and 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \ if 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \ and cfg.TRAIN.RPN_NORMALIZE_TARGETS: rpn_bbox_pred *= cfg.TRAIN.RPN_NORMALIZE_STDS rpn_bbox_pred += cfg.TRAIN.RPN_NORMALIZE_MEANS scores = scores.reshape((-1, 1)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # filter boxes min_size = 0 if cfg_key == 'TRAIN': if 'RPN_MIN_SIZE' in cfg.TRAIN.keys(): min_size = cfg.TRAIN.RPN_MIN_SIZE elif cfg_key == 'TEST': if 'RPN_MIN_SIZE' in cfg.TEST.keys(): min_size = cfg.TEST.RPN_MIN_SIZE if min_size > 0: keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores.flatten()