def im_conv_body_only(model, im, target_scale, target_max_size): """Runs `model.conv_body_net` on the given image `im`.""" im_blob, im_scale, _im_info = blob_utils.get_image_blob( im, target_scale, target_max_size) workspace.FeedBlob(core.ScopedName('data'), im_blob) workspace.RunNet(model.conv_body_net.Proto().name) return im_scale
def im_conv_body_only(model, im, target_scale, target_max_size): """Runs `model.conv_body_net` on the given image `im`.""" im_blob, im_scale, _im_info = blob_utils.get_image_blob( im, target_scale, target_max_size ) workspace.FeedBlob(core.ScopedName('data'), im_blob) workspace.RunNet(model.conv_body_net.Proto().name) return im_scale
def _get_blobs(im, rois, target_scale, target_max_size): """Convert an image and RoIs within that image into network inputs.""" blobs = {} blobs['data'], im_scale, blobs['im_info'] = \ blob_utils.get_image_blob(im, target_scale, target_max_size) if rois is not None: blobs['rois'] = _get_rois_blob(rois, im_scale) return blobs, im_scale
def _get_blobs(im, rois, target_scale, target_max_size): """Convert an image and RoIs within that image into network inputs.""" blobs = {} blobs['data'], im_scale = \ blob_utils.get_image_blob(im, target_scale, target_max_size) if rois is not None: blobs['rois'] = _get_rois_blob(rois, im_scale) blobs['labels'] = np.zeros((1, cfg.MODEL.NUM_CLASSES), dtype=np.int32) return blobs, im_scale
def _get_blobs(im, rois, target_scale, target_max_size): blobs = {} blobs['data'], im_scale = blob_utils.get_image_blob(im, target_scale, target_max_size) if rois is not None: blobs['rois'] = _get_rois_blob(rois, im_scale) blobs['labels'] = np.zeros((1, cfg.MODEL.NUM_CLASSES), dtype=np.int32) return blobs, im_scale
def im_detect_mask_aug(model, im, boxes, blob_conv): assert not cfg.TEST.MASK_AUG.SCALE_SIZE_DEP, \ 'Size dependent scaling not implemented' # Collect masks computed under different transformations masks_ts = [] # compute masks for the original image (identity transform) _, im_scale_i, _ = blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) masks_i = im_detect_mask(model, im_scale_i, boxes, blob_conv) masks_ts.append(masks_i) if cfg.TEST.MASK_AUG.H_FLIP: masks_hf = im_detect_mask_hflip(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes, blob_conv) masks_ts.append(masks_hf) ## Compute detections at different scales #for scale in cfg.TEST.MASK_AUG.SCALES: # max_size = cfg.TEST.MASK_AUG.MAX_SIZE # masks_scl = im_detect_mask_scale(model, im, scale, max_size, boxes, blob_conv) # masks_ts.append(masks_scl) # if cfg.TEST.MASK_AUG.SCALE_H_FLIP: # masks_scl_hf = im_detect_mask_scale(model, im, scale, max_size, boxes, blob_conv, hflip=True) # masks_ts.append(masks_scl_hf) ## Compute masks at different aspect ratios #for aspect_ratio in cfg.TEST.MASK_AUG.ASPECT_RATIOS: # masks_ar = im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes, blob_conv) # masks_ts.append(masks_ar) # if cfg.TEST.MASK_AUG.ASPECT_RATIO_H_FLIP: # masks_ar_hf = im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes, blob_conv, hflip=True) # masks_ts.append(masks_ar) # Combine the predicted soft masks: if cfg.TEST.MASK_AUG.HEUR == 'SOFT_AVG': masks_c = np.mean(masks_ts, axis=0) elif cfg.TEST.MASK_AUG.HEUR == 'SOFT_MAX': masks_c = np.amax(masks_ts, axis=0) elif cfg.TEST.MASK_AUG.HEUR == 'LOGIT_AVG': def logit(y): return -1.0 * np.log((1.0 - y) / np.maximum(y, 1e-20)) logit_masks = [logit(y) for y in masks_ts] logit_masks = np.mean(logit_masks, axis=0) masks_c = 1.0 / (1.0 + np.exp(-logit_masks)) else: raise NotImplementedError('Heuristic {} not supported'.format( cfg.TEST.MASK_AUG.HEUR)) return masks_c
def im_proposals(model, im): """Generate RPN proposals on a single image.""" inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) if cfg.PYTORCH_VERSION_LESS_THAN_040: inputs['data'] = [ Variable(torch.from_numpy(inputs['data']), volatile=True) ] inputs['im_info'] = [ Variable(torch.from_numpy(inputs['im_info']), volatile=True) ] else: inputs['data'] = [torch.from_numpy(inputs['data'])] inputs['im_info'] = [torch.from_numpy(inputs['im_info'])] return_dict = model(**inputs) if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL rois_names = ['rpn_rois_fpn' + str(l) for l in range(k_min, k_max + 1)] # note the spelling. Facebook Detectron uses rpn_roi_probs_fpn # this name is not used during training and is harmless score_names = [ 'rpn_rois_prob_fpn' + str(l) for l in range(k_min, k_max + 1) ] # Combine predictions across all levels and retain the top scoring boxes = np.concatenate( [return_dict[roi_name].cpu().numpy() for roi_name in rois_names]) scores = np.concatenate([ return_dict[score_name].cpu().numpy() for score_name in score_names ]).squeeze() # Discussion: one could do NMS again after combining predictions from # the different FPN levels. Conceptually, it's probably the right thing # to do. For arbitrary reasons, the original FPN RPN implementation did # not do another round of NMS. inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N] scores = scores[inds] boxes = boxes[inds, :] else: boxes, scores = return_dict['rpn_rois'].cpu().numpy(),\ return_dict['rpn_roi_probs'].cpu().numpy() scores = scores.squeeze() # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding, # so we remove it since we just want to return boxes # Scale proposals back to the original input image scale boxes = boxes[:, 1:] / im_scale return boxes, scores
def im_detect_mask_scale(model, im, target_scale, target_max_size, boxes, blob_conv, hflip=False): """Computes masks at the given scale""" if hflip: masks_scl = im_detect_mask_hflip(model, im, target_scale, target_max_size, boxes, blob_conv) else: _, im_scale, _ = blob_utils.get_image_blob(im, target_scale, target_max_size) masks_scl = im_detect_mask(model, im_scale, boxes, blob_conv) return masks_scl
def im_detect_mask_hflip(model, im, target_scale, target_max_size, boxes, blob_conv): """Performs mask detection on the horizontally flipped image. Function signature is the same as for im_detect_mask_aug. """ # Compute the masks for the flipped image im_hf = im[:, ::-1, :] boxes_hf = box_utils.flip_boxes(boxes, im.shape[1]) _, im_scale, _ = blob_utils.get_image_blob(im_hf, target_scale, target_max_size) # im_scale = im_conv_body_only(model, im_hf, target_scale, target_max_size) masks_hf = im_detect_mask(model, im_scale, boxes_hf, blob_conv) # Invert the predicted soft masks masks_inv = masks_hf[:, :, :, ::-1] return masks_inv
def im_proposals(model, im): """Generate RPN proposals on a single image.""" inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL rois_names = [ core.ScopedName('rpn_rois_fpn' + str(l)) for l in range(k_min, k_max + 1) ] score_names = [ core.ScopedName('rpn_roi_probs_fpn' + str(l)) for l in range(k_min, k_max + 1) ] blobs = workspace.FetchBlobs(rois_names + score_names) # Combine predictions across all levels and retain the top scoring boxes = np.concatenate(blobs[:len(rois_names)]) scores = np.concatenate(blobs[len(rois_names):]).squeeze() # Discussion: one could do NMS again after combining predictions from # the different FPN levels. Conceptually, it's probably the right thing # to do. For arbitrary reasons, the original FPN RPN implementation did # not do another round of NMS. inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N] scores = scores[inds] boxes = boxes[inds, :] else: boxes, scores = workspace.FetchBlobs( [core.ScopedName('rpn_rois'), core.ScopedName('rpn_roi_probs')] ) scores = scores.squeeze() # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding, # so we remove it since we just want to return boxes # Scale proposals back to the original input image scale boxes = boxes[:, 1:] / im_scale return boxes, scores
def im_proposals(model, im): """Generate RPN proposals on a single image.""" inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL rois_names = [ core.ScopedName('rpn_rois_fpn' + str(l)) for l in range(k_min, k_max + 1) ] score_names = [ core.ScopedName('rpn_roi_probs_fpn' + str(l)) for l in range(k_min, k_max + 1) ] blobs = workspace.FetchBlobs(rois_names + score_names) # Combine predictions across all levels and retain the top scoring boxes = np.concatenate(blobs[:len(rois_names)]) scores = np.concatenate(blobs[len(rois_names):]).squeeze() # Discussion: one could do NMS again after combining predictions from # the different FPN levels. Conceptually, it's probably the right thing # to do. For arbitrary reasons, the original FPN RPN implementation did # not do another round of NMS. inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N] scores = scores[inds] boxes = boxes[inds, :] else: boxes, scores = workspace.FetchBlobs( [core.ScopedName('rpn_rois'), core.ScopedName('rpn_roi_probs')]) scores = scores.squeeze() # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding, # so we remove it since we just want to return boxes # Scale proposals back to the original input image scale boxes = boxes[:, 1:] / im_scale return boxes, scores
def im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes, blob_conv, hflip=False): """Computes mask detections at the given width-relative aspect ratio""" # perform mask detection on the transformed image im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio) boxes_ar = box_utils.aspect_ratio(boxes, aspect_ratio) if hflip: masks_ar = im_detect_mask_hflip(model, im_ar, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes_ar, blob_conv) else: _, im_scale, _ = blob_utils.get_image_blob(im, target_scale, target_max_size) masks_ar = im_detect_mask(model, im_scale, boxes_ar, blob_conv) return masks_ar
def im_proposals(model, im, roidb=None): """Generate RPN proposals on a single image.""" inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) inputs['data'] = [torch.from_numpy(inputs['data'])] inputs['im_info'] = [torch.from_numpy(inputs['im_info'])] if roidb is not None: inputs['roidb'] = [[roidb]] return_dict = model(**inputs) if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN: k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL rois = [ return_dict['rpn_rois_fpn' + str(l)] for l in range(k_min, k_max + 1) ] scores = [ return_dict['rpn_rois_prob_fpn' + str(l)] for l in range(k_min, k_max + 1) ] # Combine predictions across all levels and retain the top scoring boxes = np.concatenate(rois) scores = np.concatenate(scores).squeeze() # Discussion: one could do NMS again after combining predictions from # the different FPN levels. Conceptually, it's probably the right thing # to do. For arbitrary reasons, the original FPN RPN implementation did # not do another round of NMS. inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N] scores = scores[inds] boxes = boxes[inds, :] else: boxes = return_dict['rpn_rois'].data.cpu().numpy() scores = return_dict['rpn_roi_probs'].data.cpu().numpy().squeeze() # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding, # so we remove it since we just want to return boxes # Scale proposals back to the original input image scale boxes = boxes[:, 1:] / im_scale return boxes, scores
def im_classify_bbox(model, im, box_proposals, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) timers['im_detect_bbox'].tic() inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) # do something to create the rois sampled_rois = box_proposals * inputs['im_info'][0, 2] repeated_batch_idx = blob_utils.zeros((sampled_rois.shape[0], 1)) sampled_rois = np.hstack((repeated_batch_idx, sampled_rois)) inputs['rois'] = sampled_rois if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS: _add_multilevel_rois(inputs) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v) workspace.RunNet(model.net.Proto().name) if cfg.MODEL.TYPE == 'region_classification': cls_prob = core.ScopedName('cls_prob') elif cfg.MODEL.TYPE == 'region_memory': cls_prob = core.ScopedName('final/cls_prob') else: raise NotImplementedError cls_scores = workspace.FetchBlob(cls_prob) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() timers['misc_bbox'].toc() return cls_scores
merge_cfg_from_file(cfg_file) cfg.TRAIN.WEIGHTS = '' # NOTE: do not download pretrained model weights cfg.TEST.WEIGHTS = weights_file cfg.NUM_GPUS = 1 assert_and_infer_cfg() #according the cfg to bulid model model = initialize_model_from_cfg(weights_file) return model if __name__ == '__main__': workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) args = parse_args() model = get_model(args.cfg, args.wts) img = cv2.imread(args.img) #im_scale = im_conv_body_only(model,img,cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) im_blob, im_scale, _im_info = blob_utils.get_image_blob( img, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) with c2_utils.NamedCudaScope(0): # workspace.FeedBlob(core.ScopedName('data'), im_blob) # workspace.RunNet(model.net.Proto().name) # blob = workplace.FetchBlob('rois') # print 1 cls_b, _, _ = infer_engine.im_detect_all(model, img, None) blobs = workspace.Blobs() print blobs mask_logits = workspace.FetchBlob(core.ScopedName('mask_logits')) #print mask_logits print mask_logits.shape np.save('/data1/shuai/adas/code/mask_logits.npy', mask_logits)
def im_detect_bbox(model, im, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) # cls_probs, box_preds = [], [] # for lvl in range(k_min, k_max + 1): # suffix = 'fpn{}'.format(lvl) # cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) # box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) # for k, v in inputs.items(): # workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) # workspace.RunNet(model.net.Proto().name) # cls_probs = workspace.FetchBlobs(cls_probs) # box_preds = workspace.FetchBlobs(box_preds) return_dict = model(**inputs) cls_probs = return_dict['cls_score'] box_preds = return_dict['bbox_pred'] # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape(( # [1, 9, 80, 112, 160] cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape(( # 1 9 4 112 160 box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.data.cpu().numpy().squeeze().ravel() box_pred = box_pred.data.cpu().numpy() #.squeeze() # print(box_pred.shape, box_pred.squeeze().shape) # 1 9 4 112 60 -> 9 4 112 160 # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 # 0.05 or 0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[ -pre_nms_topn:] # select thos better than threshold and top 10000 inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose( ) #to transform index according to data shape # before ravel: inds 5d shape (160, 112, 80, 10, 5) from (10, 80, 112, 160) torch.Size([1, 9, 80, 112, 160]) # after ravel: inds 5d shape (10, 5) from (10,) torch.Size([1, 9, 80, 112, 160]) # inds_5d: ?, anchor_id, classes, y, x classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] timers['misc_bbox'].toc() return cls_boxes #''' '''return None '''