def im_detect_all(model, im, box_proposals, timers=None): if timers is None: timers = defaultdict(Timer) timers['im_detect_bbox'].tic() scores, boxes, im_scale = im_detect_bbox(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=box_proposals) timers['im_detect_bbox'].toc() timers['misc_bbox'].tic() scores, boxes, cls_boxes, entropy_m = box_results_with_nms_and_limit( scores, boxes) timers['misc_bbox'].toc() total_uc = calc_ent_class(entropy_m) if scores.shape[0] > 0: total_uc = total_uc # +scores.shape[0]/10.0 else: total_uc = 1.0 # print(total_uc) return cls_boxes, total_uc
def get_detections_from_im(cfg, model, im, image_id, feat_blob_name, MIN_BOXES, MAX_BOXES, conf_thresh=0.2, bboxes=None): with c2_utils.NamedCudaScope(0): scores, cls_boxes, im_scale = infer_engine.im_detect_bbox(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes) feat_map = workspace.FetchBlob("gpu_0/res5_2_branch2c") box_features = workspace.FetchBlob(feat_blob_name) cls_prob = workspace.FetchBlob("gpu_0/cls_prob") rois = workspace.FetchBlob("gpu_0/rois") max_conf = np.zeros((rois.shape[0])) # unscale back to raw image space cls_boxes = rois[:, 1:5] / im_scale for cls_ind in range(1, cls_prob.shape[1]): cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] objects = np.argmax(cls_prob[keep_boxes], axis=1) img_shape = [np.size(im, 0), np.size(im, 1)] return feat_map, box_features[keep_boxes], cls_boxes[keep_boxes], np.array(img_shape)
def im_detect_w_features_func(model, im, box_proposals, timers=None): if timers is None: timers = defaultdict(Timer) # Handle RetinaNet testing separately for now if cfg.RETINANET.RETINANET_ON: cls_boxes = test_retinanet.im_detect_bbox(model, im, timers) return cls_boxes, None, None timers['im_detect_bbox'].tic() if cfg.TEST.BBOX_AUG.ENABLED: scores, boxes, im_scale = im_detect.im_detect_bbox_aug(model, im, box_proposals) else: scores, boxes, im_scale = im_detect.im_detect_bbox( model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=box_proposals ) timers['im_detect_bbox'].toc() # score and boxes are from the whole image after score thresholding and nms # (they are not separated by class) # cls_boxes boxes and scores are separated by class and in the format used # for evaluating results timers['misc_bbox'].tic() #ADDED cls_feats from box_results_with_nms_and_limit scores, boxes, cls_boxes, cls_feats = box_results_with_nms_and_limit(scores, boxes) timers['misc_bbox'].toc() if cfg.MODEL.MASK_ON and boxes.shape[0] > 0: timers['im_detect_mask'].tic() if cfg.TEST.MASK_AUG.ENABLED: masks = im_detect.im_detect_mask_aug(model, im, boxes) else: masks = im_detect.im_detect_mask(model, im_scale, boxes) timers['im_detect_mask'].toc() timers['misc_mask'].tic() cls_segms = im_detect.segm_results( cls_boxes, masks, boxes, im.shape[0], im.shape[1] ) timers['misc_mask'].toc() else: cls_segms = None if cfg.MODEL.KEYPOINTS_ON and boxes.shape[0] > 0: timers['im_detect_keypoints'].tic() if cfg.TEST.KPS_AUG.ENABLED: heatmaps = im_detect.im_detect_keypoints_aug(model, im, boxes) else: heatmaps = im_detect.im_detect_keypoints(model, im_scale, boxes) timers['im_detect_keypoints'].toc() timers['misc_keypoints'].tic() cls_keyps = im_detect.keypoint_results(cls_boxes, heatmaps, boxes) timers['misc_keypoints'].toc() else: cls_keyps = None #ADDED return cls_feats return cls_boxes, cls_segms, cls_keyps, cls_feats
def get_detections_from_im( cfg, model, im, image_id, feat_blob_name, MIN_BOXES, MAX_BOXES, conf_thresh=0.2, bboxes=None, ): assert conf_thresh >= 0 with c2_utils.NamedCudaScope(0): scores, cls_boxes, im_scale = infer.im_detect_bbox(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes) num_rpn = scores.shape[0] region_feat = workspace.FetchBlob(feat_blob_name) max_conf = np.zeros((num_rpn, ), dtype=np.float32) max_cls = np.zeros((num_rpn, ), dtype=np.float32) max_box = np.zeros((num_rpn, 4), dtype=np.float32) # unscale back to raw image space # cls_boxes = rois[:, 1:5] / im_scale # Column 0 of the scores matrix is for the background class for cls_ind in range(1, cfg.MODEL.NUM_CLASSES): cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) inds_update = np.where(cls_scores[keep] > max_conf[keep]) kinds = keep[inds_update] max_conf[kinds] = cls_scores[kinds] max_cls[kinds] = cls_ind max_box[kinds] = dets[kinds][:, :4] keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] # Predict the class label using the scores objects = max_cls[keep_boxes] obj_prob = max_conf[keep_boxes] obj_boxes = max_box[keep_boxes, :] cls_prob = scores[keep_boxes, :] return region_feat[keep_boxes], cls_prob, np.concatenate( (obj_boxes, np.reshape(objects, (-1, 1)), np.reshape(obj_prob, (-1, 1))), axis=-1)
def detect_image(detectron_model, image, args): """Given an image and a detectron model, extract object boxes, classes, confidences and features from the image using the model. Parameters ---------- detectron_model Detectron model. image : np.ndarray Image in BGR format. args : argparse.Namespace Parsed command-line arguments. Returns ------- np.ndarray, np.ndarray, np.ndarray, np.ndarray Object bounding boxes, classes, confidence and features. """ scores, cls_boxes, im_scale = detectron_test.im_detect_bbox( detectron_model, image, detectron_config.TEST.SCALE, detectron_config.TEST.MAX_SIZE, boxes=None, ) num_proposals = scores.shape[0] rois = workspace.FetchBlob(f"gpu_{args.gpu_id}/rois") features = workspace.FetchBlob(f"gpu_{args.gpu_id}/{args.feat_name}") cls_boxes = rois[:, 1:5] / im_scale max_conf = np.zeros((num_proposals, ), dtype=np.float32) max_cls = np.zeros((num_proposals, ), dtype=np.int32) max_box = np.zeros((num_proposals, 4), dtype=np.float32) for cls_ind in range(1, detectron_config.MODEL.NUM_CLASSES): cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(detectron_nms(dets, detectron_config.TEST.NMS)) idxs_update = np.where(cls_scores[keep] > max_conf[keep]) keep_idxs = keep[idxs_update] max_conf[keep_idxs] = cls_scores[keep_idxs] max_cls[keep_idxs] = cls_ind max_box[keep_idxs] = dets[keep_idxs][:, :4] keep_boxes = np.argsort(max_conf)[::-1][:args.max_boxes] boxes = max_box[keep_boxes, :] classes = max_cls[keep_boxes] confidence = max_conf[keep_boxes] features = features[keep_boxes, :] return boxes, features, classes, confidence
def run_mask_net(model, image, layer, unit_index): with c2_utils.NamedCudaScope(0): box_proposals = None scores, boxes, im_scale = im_detect_bbox(model, image, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=box_proposals) boxes = np.array( [[0, 0, 399, 399]]) # we don't want box tightly wrapped around stimulus im_detect_mask(model, im_scale, boxes) activities = workspace.blobs['gpu_0/{}'.format(layer)] centre = (int(activities.shape[2] / 2), int(activities.shape[3] / 2)) response = activities[0, unit_index, centre[0], centre[1]] return response
def get_detections_from_im( cfg, model, im, image_id, feat_blob_name, MIN_BOXES, MAX_BOXES, background=False, conf_thresh=0.2, bboxes=None, ): with c2_utils.NamedCudaScope(0): #mod by zhang ,del infer_engine scores, cls_boxes, im_scale = im_detect_bbox(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes) box_features = workspace.FetchBlob(feat_blob_name) cls_prob = workspace.FetchBlob("cpu_0/cls_prob") rois = workspace.FetchBlob("cpu_0/rois") max_conf = np.zeros((rois.shape[0])) # unscale back to raw image space cls_boxes = rois[:, 1:5] / im_scale start_index = 1 # Column 0 of the scores matrix is for the background class if background: start_index = 0 for cls_ind in range(start_index, cls_prob.shape[1]): cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] # Predict the class label using the scores objects = np.argmax(cls_prob[keep_boxes][start_index:], axis=1) return box_features[keep_boxes]
def get_detections_from_im(cfg, model, im, image_id, feat_blob_name, MIN_BOXES, MAX_BOXES, conf_thresh=0.2, bboxes=None, meta=False): with c2_utils.NamedCudaScope(0): scores, cls_boxes, im_scale = infer_engine.im_detect_bbox(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes) box_features = workspace.FetchBlob(feat_blob_name) cls_prob = workspace.FetchBlob("gpu_0/cls_prob") rois = workspace.FetchBlob("gpu_0/rois") max_conf = np.zeros((rois.shape[0])) # unscale back to raw image space cls_boxes = rois[:, 1:5] / im_scale for cls_ind in range(1, cls_prob.shape[1]): cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] objects = np.argmax(cls_prob[keep_boxes], axis=1) if meta: return { "image_id": image_id, "image_h": np.size(im, 0), "image_w": np.size(im, 1), 'num_boxes': len(keep_boxes), 'boxes': torch.from_numpy(cls_boxes[keep_boxes]).round().int(), #base64.b64encode(cls_boxes[keep_boxes]), 'features': torch.from_numpy(box_features[keep_boxes]), #base64.b64encode(box_features[keep_boxes]), 'object': torch.from_numpy(objects), #base64.b64encode(objects) } else: return torch.from_numpy(box_features[keep_boxes])
def get_detections_from_im(cfg, model, im, image_id, featmap_blob_name, feat_blob_name, MIN_BOXES, MAX_BOXES, conf_thresh=0.2, bboxes=None): assert conf_thresh >= 0. with c2_utils.NamedCudaScope(0): scores, cls_boxes, im_scale = infer.im_detect_bbox(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes) num_rpn = scores.shape[0] region_feat = workspace.FetchBlob(feat_blob_name) max_conf = np.zeros((num_rpn, ), dtype=np.float32) max_cls = np.zeros((num_rpn, ), dtype=np.int32) max_box = np.zeros((num_rpn, 4), dtype=np.float32) for cls_ind in range(1, cfg.MODEL.NUM_CLASSES): cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes[:, (cls_ind * 4):(cls_ind * 4 + 4)], cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) inds_update = np.where(cls_scores[keep] > max_conf[keep]) kinds = keep[inds_update] max_conf[kinds] = cls_scores[kinds] max_cls[kinds] = cls_ind max_box[kinds] = dets[kinds][:, :4] keep_boxes = np.where(max_conf > conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] objects = max_cls[keep_boxes] obj_prob = max_conf[keep_boxes] obj_boxes = max_box[keep_boxes, :] cls_prob = scores[keep_boxes, :] # print('{} ({}x{}): {} boxes, box size {}, feature size {}, class size {}'.format(image_id, # np.size(im, 0), np.size(im, 1), len(keep_boxes), cls_boxes[keep_boxes].shape, # box_features[keep_boxes].shape, objects.shape)) # print(cls_boxes[keep_boxes][:10, :], objects[:10], obj_prob[:10]) assert (np.sum(objects >= cfg.MODEL.NUM_CLASSES) == 0) # assert(np.min(obj_prob[:10])>=0.2) # if np.min(obj_prob) < 0.2: # print('confidence score too low!', np.min(obj_prob[:10])) # if np.max(cls_boxes[keep_boxes]) > max(np.size(im, 0), np.size(im, 1)): # print('box is offscreen!', np.max(cls_boxes[keep_boxes]), np.size(im, 0), np.size(im, 1)) return { "image_id": image_id, "image_h": np.size(im, 0), "image_w": np.size(im, 1), 'num_boxes': len(keep_boxes), 'boxes': obj_boxes, 'region_feat': region_feat[keep_boxes, :], 'object': objects, 'obj_prob': obj_prob, 'cls_prob': cls_prob }
def im_detect_bbox_aug(model, im, box_proposals=None): """Performs bbox detection with test-time augmentations. Function signature is the same as for im_detect_bbox. """ assert not cfg.TEST.BBOX_AUG.SCALE_SIZE_DEP, \ 'Size dependent scaling not implemented' assert not cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION' or \ cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION', \ 'Coord heuristic must be union whenever score heuristic is union' assert not cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION' or \ cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION', \ 'Score heuristic must be union whenever coord heuristic is union' assert not cfg.MODEL.FASTER_RCNN or \ cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION', \ 'Union heuristic must be used to combine Faster RCNN predictions' # Collect detections computed under different transformations scores_ts = [] boxes_ts = [] ## type:hflip:1,scale:value,scale_flip:bu yong,origin:0 def add_preds_t(scores_t, boxes_t, type): scores_ts.append(scores_t) boxes_ts.append(boxes_t) # Perform detection on the horizontally flipped image if cfg.TEST.BBOX_AUG.H_FLIP: scores_hf, boxes_hf, _ = im_detect_bbox_hflip( model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, box_proposals=box_proposals) add_preds_t(scores_hf, boxes_hf) # Compute detections at different scales for scale in cfg.TEST.BBOX_AUG.SCALES: max_size = cfg.TEST.BBOX_AUG.MAX_SIZE scores_scl, boxes_scl = im_detect_bbox_scale(model, im, scale, max_size, box_proposals) add_preds_t(scores_scl, boxes_scl) if cfg.TEST.BBOX_AUG.SCALE_H_FLIP: scores_scl_hf, boxes_scl_hf = im_detect_bbox_scale(model, im, scale, max_size, box_proposals, hflip=True) add_preds_t(scores_scl_hf, boxes_scl_hf) # Compute detections for the original image (identity transform) last to # ensure that the Caffe2 workspace is populated with blobs corresponding # to the original image on return (postcondition of im_detect_bbox) scores_i, boxes_i, im_scale_i = im_detect_bbox(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=box_proposals) add_preds_t(scores_i, boxes_i) # Combine the predicted scores if cfg.TEST.BBOX_AUG.SCORE_HEUR == 'ID': scores_c = scores_i elif cfg.TEST.BBOX_AUG.SCORE_HEUR == 'AVG': scores_c = np.mean(scores_ts, axis=0) elif cfg.TEST.BBOX_AUG.SCORE_HEUR == 'UNION': scores_c = np.vstack(scores_ts) else: raise NotImplementedError('Score heur {} not supported'.format( cfg.TEST.BBOX_AUG.SCORE_HEUR)) # Combine the predicted boxes if cfg.TEST.BBOX_AUG.COORD_HEUR == 'ID': boxes_c = boxes_i elif cfg.TEST.BBOX_AUG.COORD_HEUR == 'AVG': boxes_c = np.mean(boxes_ts, axis=0) elif cfg.TEST.BBOX_AUG.COORD_HEUR == 'UNION': boxes_c = np.vstack(boxes_ts) else: raise NotImplementedError('Coord heur {} not supported'.format( cfg.TEST.BBOX_AUG.COORD_HEUR)) return scores_c, boxes_c, im_scale_i