def box_results_with_nms_and_limit(scores, boxes, confs=None): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False) if cfg.STD_NMS: confs_j = confs[inds, j * 4:(j + 1) * 4] nms_dets, _ = py_nms_utils.soft(dets_j, confidence=confs_j) elif cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms(dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)]) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes
def get_detections_from_im(cfg, model, im, image_id, feat_blob_name, MIN_BOXES, MAX_BOXES, conf_thresh=0.2, bboxes=None): with c2_utils.NamedCudaScope(0): scores, cls_boxes, im_scale = infer_engine.im_detect_bbox(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes) feat_map = workspace.FetchBlob("gpu_0/res5_2_branch2c") box_features = workspace.FetchBlob(feat_blob_name) cls_prob = workspace.FetchBlob("gpu_0/cls_prob") rois = workspace.FetchBlob("gpu_0/rois") max_conf = np.zeros((rois.shape[0])) # unscale back to raw image space cls_boxes = rois[:, 1:5] / im_scale for cls_ind in range(1, cls_prob.shape[1]): cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] objects = np.argmax(cls_prob[keep_boxes], axis=1) img_shape = [np.size(im, 0), np.size(im, 1)] return feat_map, box_features[keep_boxes], cls_boxes[keep_boxes], np.array(img_shape)
def get_detections_from_im( cfg, model, im, image_id, feat_blob_name, MIN_BOXES, MAX_BOXES, conf_thresh=0.2, bboxes=None, ): assert conf_thresh >= 0 with c2_utils.NamedCudaScope(0): scores, cls_boxes, im_scale = infer.im_detect_bbox(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes) num_rpn = scores.shape[0] region_feat = workspace.FetchBlob(feat_blob_name) max_conf = np.zeros((num_rpn, ), dtype=np.float32) max_cls = np.zeros((num_rpn, ), dtype=np.float32) max_box = np.zeros((num_rpn, 4), dtype=np.float32) # unscale back to raw image space # cls_boxes = rois[:, 1:5] / im_scale # Column 0 of the scores matrix is for the background class for cls_ind in range(1, cfg.MODEL.NUM_CLASSES): cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) inds_update = np.where(cls_scores[keep] > max_conf[keep]) kinds = keep[inds_update] max_conf[kinds] = cls_scores[kinds] max_cls[kinds] = cls_ind max_box[kinds] = dets[kinds][:, :4] keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] # Predict the class label using the scores objects = max_cls[keep_boxes] obj_prob = max_conf[keep_boxes] obj_boxes = max_box[keep_boxes, :] cls_prob = scores[keep_boxes, :] return region_feat[keep_boxes], cls_prob, np.concatenate( (obj_boxes, np.reshape(objects, (-1, 1)), np.reshape(obj_prob, (-1, 1))), axis=-1)
def proposals_for_one_image(self, im_info, bboxes, scores): # Get mode-dependent configuration cfg_key = 'TRAIN' if self._train else 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # - bbox will be (4 * A, H, W) format from conv output # - transpose to (H, W, 4 * A) # - reshape to (H * W * A, 4) where rows are ordered by (H, W, A) # in slowest to fastest order to match the enumerated anchors bboxes = bboxes.transpose((1, 2, 0)).reshape((-1, 4)) # Same story for the scores: # - scores are (A, H, W) format from conv output # - transpose to (H, W, A) # - reshape to (H * W * A, 1) where rows are ordered by (H, W, A) # to match the order of anchors and bbox_deltas scores = scores.transpose((1, 2, 0)).reshape((-1, 1)) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): order = np.argsort(-scores.squeeze()) else: # Avoid sorting possibly large arrays; First partition to get top K # unsorted and then sort just those (~20x faster for 200k scores) inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN] order = np.argsort(-scores[inds].squeeze()) order = inds[order] proposals = bboxes[order, :] scores = scores[order] # 2. clip proposals to image (may result in proposals with zero area # that will be removed in the next step) proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < min_size keep = _filter_boxes(proposals, min_size, im_info) proposals = proposals[keep, :] scores = scores[keep] # 6. apply loose nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if nms_thresh > 0: keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] return proposals, scores
def get_detections_from_im( cfg, model, im, image_id, feat_blob_name, MIN_BOXES, MAX_BOXES, background=False, conf_thresh=0.2, bboxes=None, ): with c2_utils.NamedCudaScope(0): #mod by zhang ,del infer_engine scores, cls_boxes, im_scale = im_detect_bbox(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes) box_features = workspace.FetchBlob(feat_blob_name) cls_prob = workspace.FetchBlob("cpu_0/cls_prob") rois = workspace.FetchBlob("cpu_0/rois") max_conf = np.zeros((rois.shape[0])) # unscale back to raw image space cls_boxes = rois[:, 1:5] / im_scale start_index = 1 # Column 0 of the scores matrix is for the background class if background: start_index = 0 for cls_ind in range(start_index, cls_prob.shape[1]): cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] # Predict the class label using the scores objects = np.argmax(cls_prob[keep_boxes][start_index:], axis=1) return box_features[keep_boxes]
def mix_box_results_with_nms_and_limit(scores, boxes, mix_boxes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` (#detections, 4 * #classes) `mix b` (#detections, 4 * #classes, mix) `scores` (#detection, #classes) """ num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] mix_dets = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] mix_boxes_j = mix_boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False) keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] mix_dets[j] = mix_boxes_j[keep] cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)]) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] mix_dets[j] = mix_dets[j][keep] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes, mix_dets
def get_detections_from_im(cfg, model, im, image_id, feat_blob_name, MIN_BOXES, MAX_BOXES, conf_thresh=0.2, bboxes=None, meta=False): with c2_utils.NamedCudaScope(0): scores, cls_boxes, im_scale = infer_engine.im_detect_bbox(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes) box_features = workspace.FetchBlob(feat_blob_name) cls_prob = workspace.FetchBlob("gpu_0/cls_prob") rois = workspace.FetchBlob("gpu_0/rois") max_conf = np.zeros((rois.shape[0])) # unscale back to raw image space cls_boxes = rois[:, 1:5] / im_scale for cls_ind in range(1, cls_prob.shape[1]): cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] objects = np.argmax(cls_prob[keep_boxes], axis=1) if meta: return { "image_id": image_id, "image_h": np.size(im, 0), "image_w": np.size(im, 1), 'num_boxes': len(keep_boxes), 'boxes': torch.from_numpy(cls_boxes[keep_boxes]).round().int(), #base64.b64encode(cls_boxes[keep_boxes]), 'features': torch.from_numpy(box_features[keep_boxes]), #base64.b64encode(box_features[keep_boxes]), 'object': torch.from_numpy(objects), #base64.b64encode(objects) } else: return torch.from_numpy(box_features[keep_boxes])
def get_detections_from_im(cfg, model, im, image_id, featmap_blob_name, feat_blob_name, MIN_BOXES, MAX_BOXES, conf_thresh=0.2, bboxes=None): assert conf_thresh >= 0. with c2_utils.NamedCudaScope(0): scores, cls_boxes, im_scale = infer.im_detect_bbox(model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes) num_rpn = scores.shape[0] region_feat = workspace.FetchBlob(feat_blob_name) max_conf = np.zeros((num_rpn, ), dtype=np.float32) max_cls = np.zeros((num_rpn, ), dtype=np.int32) max_box = np.zeros((num_rpn, 4), dtype=np.float32) for cls_ind in range(1, cfg.MODEL.NUM_CLASSES): cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes[:, (cls_ind * 4):(cls_ind * 4 + 4)], cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) inds_update = np.where(cls_scores[keep] > max_conf[keep]) kinds = keep[inds_update] max_conf[kinds] = cls_scores[kinds] max_cls[kinds] = cls_ind max_box[kinds] = dets[kinds][:, :4] keep_boxes = np.where(max_conf > conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] objects = max_cls[keep_boxes] obj_prob = max_conf[keep_boxes] obj_boxes = max_box[keep_boxes, :] cls_prob = scores[keep_boxes, :] # print('{} ({}x{}): {} boxes, box size {}, feature size {}, class size {}'.format(image_id, # np.size(im, 0), np.size(im, 1), len(keep_boxes), cls_boxes[keep_boxes].shape, # box_features[keep_boxes].shape, objects.shape)) # print(cls_boxes[keep_boxes][:10, :], objects[:10], obj_prob[:10]) assert (np.sum(objects >= cfg.MODEL.NUM_CLASSES) == 0) # assert(np.min(obj_prob[:10])>=0.2) # if np.min(obj_prob) < 0.2: # print('confidence score too low!', np.min(obj_prob[:10])) # if np.max(cls_boxes[keep_boxes]) > max(np.size(im, 0), np.size(im, 1)): # print('box is offscreen!', np.max(cls_boxes[keep_boxes]), np.size(im, 0), np.size(im, 1)) return { "image_id": image_id, "image_h": np.size(im, 0), "image_w": np.size(im, 1), 'num_boxes': len(keep_boxes), 'boxes': obj_boxes, 'region_feat': region_feat[keep_boxes, :], 'object': objects, 'obj_prob': obj_prob, 'cls_prob': cls_prob }
def im_detect_bbox(model, im, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape( (cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape( (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose() classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] timers['misc_bbox'].toc() return cls_boxes
def proposals_for_one_image( self, im_info, all_anchors, bbox_deltas, scores ): # Get mode-dependent configuration cfg_key = 'TRAIN' if self._train else 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # - bbox deltas will be (4 * A, H, W) format from conv output # - transpose to (H, W, 4 * A) # - reshape to (H * W * A, 4) where rows are ordered by (H, W, A) # in slowest to fastest order to match the enumerated anchors bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4)) # Same story for the scores: # - scores are (A, H, W) format from conv output # - transpose to (H, W, A) # - reshape to (H * W * A, 1) where rows are ordered by (H, W, A) # to match the order of anchors and bbox_deltas scores = scores.transpose((1, 2, 0)).reshape((-1, 1)) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): order = np.argsort(-scores.squeeze()) else: # Avoid sorting possibly large arrays; First partition to get top K # unsorted and then sort just those (~20x faster for 200k scores) inds = np.argpartition( -scores.squeeze(), pre_nms_topN )[:pre_nms_topN] order = np.argsort(-scores[inds].squeeze()) order = inds[order] bbox_deltas = bbox_deltas[order, :] all_anchors = all_anchors[order, :] scores = scores[order] # Transform anchors into proposals via bbox transformations proposals = box_utils.bbox_transform( all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0)) # 2. clip proposals to image (may result in proposals with zero area # that will be removed in the next step) proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < min_size keep = _filter_boxes(proposals, min_size, im_info) proposals = proposals[keep, :] scores = scores[keep] # 6. apply loose nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if nms_thresh > 0: keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] return proposals, scores
def im_detect_bbox(model, im, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2. ** lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape(( cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape(( box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition( cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose() classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack( [box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x)] ) pred_boxes = ( box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] timers['misc_bbox'].toc() return cls_boxes
def get_target_class_weights(inputs,outputs): # 'rois', 'label_mask', 'im_info', 'is_source', 'cls_pred', 'bbox_pred' import numpy as np import detectron.utils.boxes as box_utils rois = inputs[0].data dc_mask = inputs[1].data.astype(bool) im_info = inputs[2].data is_source = inputs[3].data.astype(bool) cls_pred = inputs[4].data bbox_pred = inputs[5].data this_im_info = im_info[~is_source,:][0,:] im_shape = this_im_info[:2] im_scale = this_im_info[2] im_idx = int(this_im_info[3]) # im_info is extended with its index in the roidb. rois = rois[~dc_mask,:] boxes = rois[:, 1:5] / im_scale box_deltas = bbox_pred scores = cls_pred # if cfg.TEST.BBOX_REG: # # Apply bounding-box regression deltas # if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # # Remove predictions for bg class (compat with MSRA code) # box_deltas = box_deltas[:, -4:] pred_boxes = box_utils.bbox_transform( boxes, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS ) pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im_shape) # if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG: # pred_boxes = np.tile(pred_boxes, (1, scores.shape[1])) # else: # # Simply repeat the boxes, once for each class # pred_boxes = np.tile(boxes, (1, scores.shape[1])) # if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN: # # Map scores and predictions back to the original set of boxes # scores = scores[inv_index, :] # pred_boxes = pred_boxes[inv_index, :] # > scores, boxes < num_classes = cfg.MODEL.NUM_CLASSES # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class sum_softmax = np.zeros((num_classes,)) for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = pred_boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype( np.float32, copy=False ) # if cfg.TEST.SOFT_NMS.ENABLED: # nms_dets, _ = box_utils.soft_nms( # dets_j, # sigma=cfg.TEST.SOFT_NMS.SIGMA, # overlap_thresh=cfg.TEST.NMS, # score_thresh=0.0001, # method=cfg.TEST.SOFT_NMS.METHOD # ) # else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] # # Refine the post-NMS boxes using bounding-box voting # if cfg.TEST.BBOX_VOTE.ENABLED: # nms_dets = box_utils.box_voting( # nms_dets, # dets_j, # cfg.TEST.BBOX_VOTE.VOTE_TH, # scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD # ) sum_softmax[j] = nms_dets[:,-1].sum() model.class_weight_db.update_class_weights(im_idx,sum_softmax)
def box_results_with_nms_and_limit(scores, boxes, feats=None): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] feat_pointers = [ [] for _ in range(num_classes) ] # shadows cls_boxes, remembering the pointers to the corresponding feats. # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False) if cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms(dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] if feats is not None: feat_pointers[j] = keep # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD) cls_boxes[j] = nms_dets if feats is not None: # Before limiting the number of detections, first collect global image class distribution: sum_softmax = np.zeros((num_classes, )) for j in range(1, num_classes): sum_softmax[j] = cls_boxes[j][:, -1].sum() # print('ndetects, sum_probs, min_prob, nclasses:',sum([len(subl) for subl in cls_boxes]), sum_softmax.sum(), sum_softmax.min(), sum([ss != 0 for ss in sum_softmax])) else: sum_softmax = None topk_feats = None # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)]) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: sorted_scores = np.sort(image_scores) image_thresh = sorted_scores[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] if feats is not None: feat_pointers[j] = feat_pointers[j][keep] if feats is not None: image_thresh = max( 0.5, sorted_scores[-8] ) # at most the top 8 objects from each image. for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] feat_pointers[j] = feat_pointers[j][keep] feat_pointers = list( set([p for points in feat_pointers for p in points])) topk_feats = feats[feat_pointers] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes, sum_softmax, topk_feats
def box_results_with_nms_and_limit(scores, boxes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype( np.float32, copy=False ) if cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms( dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD ) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD ) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)] ) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes
def im_detect_bbox(model, im, timers=None, model1=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) if model1 is None and os.environ.get('COSIM'): print("cosim must has model1") fp32_ws_name = "__fp32_ws__" int8_ws_name = "__int8_ws__" # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() timers['data1'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], im_scale, inputs['im_info'] = \ blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, cfg.TEST.SIZEFIX) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): if os.environ.get('COSIM'): workspace.SwitchWorkspace(int8_ws_name, True) workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) if os.environ.get('COSIM'): workspace.SwitchWorkspace(fp32_ws_name, True) workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) timers['data1'].toc() if os.environ.get('EPOCH2OLD') == "1": workspace.RunNet(model.net.Proto().name) timers['run'].tic() if os.environ.get('INT8INFO') == "1": algorithm = AbsmaxCalib() kind = os.environ.get('INT8CALIB') if kind == "moving_average": ema_alpha = 0.5 algorithm = EMACalib(ema_alpha) elif kind == "kl_divergence": kl_iter_num_for_range = os.environ.get('INT8KLNUM') if not kl_iter_num_for_range: kl_iter_num_for_range = 100 kl_iter_num_for_range = int(kl_iter_num_for_range) algorithm = KLCalib(kl_iter_num_for_range) calib = Calibrator(algorithm) calib.RunCalibIter(workspace, model.net.Proto()) else: if os.environ.get('COSIM'): with open("int8.txt", "wb") as p: p.write(str(model.net.Proto())) with open("fp32.txt", "wb") as p: p.write(str(model1.net.Proto())) for i in range(len(model.net.Proto().op)): workspace.SwitchWorkspace(int8_ws_name) int8_inputs = [] for inp in model.net.Proto().op[i].input: int8_inputs.append(workspace.FetchBlob(str(inp))) logging.warning(" opint8[{0}] is {1}".format( i, model.net.Proto().op[i])) workspace.RunOperatorOnce(model.net.Proto().op[i]) int8_results = [] for res in model.net.Proto().op[i].output: int8_results.append(workspace.FetchBlob(str(res))) workspace.SwitchWorkspace(fp32_ws_name) fp32_inputs = [] for inp1 in model1.net.Proto().op[i].input: fp32_inputs.append(workspace.FetchBlob(str(inp1))) logging.warning(" opfp32[{0}] is {1}".format( i, model1.net.Proto().op[i])) workspace.RunOperatorOnce(model1.net.Proto().op[i]) fp32_results = [] for res1 in model1.net.Proto().op[i].output: fp32_results.append(workspace.FetchBlob(str(res1))) if len(int8_inputs) != len(fp32_inputs): logging.error("Wrong number of inputs") return if len(int8_results) != len(fp32_results): logging.error("Wrong number of outputs") return logging.warning("begin to check op[{}] {} input".format( i, model.net.Proto().op[i].type)) for k in range(len(int8_inputs)): if model.net.Proto().op[i].input[k][0] == '_': continue #assert_allclose(int8_inputs[k], fp32_inputs[k], **tol) logging.warning("pass checking op[{0}] {1} input".format( i, model.net.Proto().op[i].type)) logging.warning("begin to check op[{0}] {1} output".format( i, model.net.Proto().op[i].type)) for j, int8_result in enumerate(int8_results): if model.net.Proto().op[i].output[j][0] == '_': continue #logging.warning("int8_outputis {} and fp32 output is {} ".format(int8_results[j], fp32_results[j])) #if not compare_utils.assert_allclose(int8_results[j], fp32_results[j], **tol): if not compare_utils.assert_compare( int8_result, fp32_results[j], 1e-01, os.environ.get('COSIM')): for k, int8_input in enumerate(int8_inputs): logging.warning("int8_input[{}] is {}".format( k, int8_input)) logging.warning("fp32_input[{}] is {}".format( k, fp32_inputs[k])) logging.warning("pass checking op[{0}] {1} output".format( i, model.net.Proto().op[i].type)) else: workspace.RunNet(model.net.Proto().name) timers['run'].toc() cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) batch_size = cls_probs[0].shape[0] boxes_all_list = [boxes_all] * batch_size cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape( (cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape( (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] for i in range(batch_size): cls_prob_ravel = cls_prob[i, :].ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_4d = np.array(np.unravel_index( inds, (cls_prob[i, :]).shape)).transpose() classes = inds_4d[:, 1] anchor_ids, y, x = inds_4d[:, 0], inds_4d[:, 2], inds_4d[:, 3] scores = cls_prob[i, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[i, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[i, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= im_scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all_list[i][cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() cls_boxes_list = [] for i in range(batch_size): boxes_all = boxes_all_list[i] # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] cls_boxes_list.append(cls_boxes) timers['misc_bbox'].toc() return cls_boxes_list