def add_gt_proposals(self, proposals, targets): """ Arguments: proposals: list[BoxList] targets: list[BoxList] """ # Get the device we're operating on device = proposals[0].bbox.device gt_boxes = [target.copy_with_fields(['labels']) for target in targets] # later cat of bbox requires all fields to be present for all bbox # so we need to add a dummy for objectness that's missing # print(proposals[0].get_field("regression_targets").shape, len(gt_boxes[0])) for gt_box in gt_boxes: gt_box.add_field("objectness", torch.ones(len(gt_box), device=device)) gt_box.add_field("regression_targets", torch.zeros((len(gt_box), 4), device=device)) proposals = [ cat_boxlist((proposal, gt_box)) for proposal, gt_box in zip(proposals, gt_boxes) ] return proposals
def forward(self, anchors, objectness, box_regression, targets=None): """ Arguments: anchors: list[list[BoxList]] objectness: list[tensor] box_regression: list[tensor] targets: list[BoxList] Returns: boxlists (list[BoxList]): the post-processed anchors, after applying box decoding and NMS """ sampled_boxes = [] num_levels = len(objectness) anchors = list(zip(*anchors)) for a, o, b in zip(anchors, objectness, box_regression): sampled_boxes.append(self.forward_for_single_feature_map(a, o, b)) boxlists = list(zip(*sampled_boxes)) boxlists = [cat_boxlist(boxlist) for boxlist in boxlists] if num_levels > 1: boxlists = self.select_over_all_levels(boxlists) # append ground-truth bboxes to proposals if self.training and targets is not None: boxlists = self.add_gt_proposals(boxlists, targets) return boxlists
def get_det_result(self, locations, box_cls, box_regression, boxes): N = len(box_cls) h, w = self.resolution candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) _boxes = boxes.bbox size = boxes.size boxes_scores = boxes.get_field("scores") results = [] for i in range(N): box = _boxes[i] boxes_score = boxes_scores[i] per_box_cls = box_cls[i] per_candidate_inds = candidate_inds[i] per_box_cls = per_box_cls[per_candidate_inds] per_candidate_nonzeros = per_candidate_inds.nonzero() per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] + 2 per_box_regression = box_regression[i] per_box_regression = per_box_regression[per_box_loc] per_locations = locations[per_box_loc] per_pre_nms_top_n = pre_nms_top_n[i] if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): per_box_cls, top_k_indices = per_box_cls.topk( per_pre_nms_top_n, sorted=False) per_class = per_class[top_k_indices] per_box_regression = per_box_regression[top_k_indices] per_locations = per_locations[top_k_indices] _x1 = per_locations[:, 0] - per_box_regression[:, 0] _y1 = per_locations[:, 1] - per_box_regression[:, 1] _x2 = per_locations[:, 0] + per_box_regression[:, 2] _y2 = per_locations[:, 1] + per_box_regression[:, 3] _x1 = _x1 / w * (box[2] - box[0]) + box[0] _y1 = _y1 / h * (box[3] - box[1]) + box[1] _x2 = _x2 / w * (box[2] - box[0]) + box[0] _y2 = _y2 / h * (box[3] - box[1]) + box[1] detections = torch.stack([_x1, _y1, _x2, _y2], dim=-1) boxlist = BoxList(detections, size, mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field( "scores", torch.sqrt(torch.sqrt(per_box_cls) * boxes_score)) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) results = cat_boxlist(results) return results
def filter_results(self, boxlist, num_classes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class_old = boxlist_for_class if cfg.TEST.SOFT_NMS.ENABLED: boxlist_for_class = boxlist_soft_nms( boxlist_for_class, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=self.nms, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD) else: boxlist_for_class = boxlist_nms(boxlist_for_class, self.nms) # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED and boxes_j.shape[0] > 0: boxlist_for_class = boxlist_box_voting( boxlist_for_class, boxlist_for_class_old, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=device)) result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result
def filter_results(boxlist, nms_thresh=0.5, detections_per_img=100): num_classes = cfg.MODEL.NUM_CLASSES if not cfg.TEST.SOFT_NMS.ENABLED and not cfg.TEST.BBOX_VOTE.ENABLED: result = boxlist_ml_nms(boxlist, nms_thresh) else: boxes = boxlist.bbox scores = boxlist.get_field("scores") labels = boxlist.get_field("labels") result = [] for j in range(1, num_classes): # skip the background inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class_old = boxlist_for_class if cfg.TEST.SOFT_NMS.ENABLED: boxlist_for_class = boxlist_soft_nms( boxlist_for_class, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=nms_thresh, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD) else: boxlist_for_class = boxlist_nms(boxlist_for_class, nms_thresh) # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED and boxes_j.shape[0] > 0: boxlist_for_class = boxlist_box_voting( boxlist_for_class, boxlist_for_class_old, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=scores.device)) result.append(boxlist_for_class) result = cat_boxlist(result) # Limit to max_per_image detections **over all classes** number_of_detections = len(result) if number_of_detections > detections_per_img > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result
def im_detect_bbox(model, ims): box_results = [[] for _ in range(len(ims))] features = [] semseg_pred_results = [] results, net_imgs_size, blob_conv, semseg_pred = im_detect_bbox_net( model, ims, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) if cfg.RPN.RPN_ONLY: return results, None add_results(box_results, results) features.append((net_imgs_size, blob_conv)) semseg_pred_results.append(semseg_pred) if cfg.TEST.BBOX_AUG.ENABLED: if cfg.TEST.BBOX_AUG.H_FLIP: results_hf, net_imgs_size_hf, blob_conv_hf, semseg_pred_hf = im_detect_bbox_net( model, ims, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, True, net_imgs_size) add_results(box_results, results_hf) features.append((net_imgs_size_hf, blob_conv_hf)) semseg_pred_results.append(semseg_pred_hf) for scale in cfg.TEST.BBOX_AUG.SCALES: max_size = cfg.TEST.BBOX_AUG.MAX_SIZE results_scl, net_imgs_size_scl, blob_conv_scl, semseg_pred_scl = im_detect_bbox_net( model, ims, scale, max_size, False, net_imgs_size) add_results(box_results, results_scl) features.append((net_imgs_size_scl, blob_conv_scl)) semseg_pred_results.append(semseg_pred_scl) if cfg.TEST.BBOX_AUG.H_FLIP: results_scl_hf, net_imgs_size_scl_hf, blob_conv_scl_hf, semseg_pred_scl_hf = im_detect_bbox_net( model, ims, scale, max_size, True, net_imgs_size) add_results(box_results, results_scl_hf) features.append((net_imgs_size_scl_hf, blob_conv_scl_hf)) semseg_pred_results.append(semseg_pred_scl_hf) box_results = [cat_boxlist(result) for result in box_results] if cfg.MODEL.FASTER_ON: box_results = [filter_results(result) for result in box_results] if cfg.MODEL.SEMSEG_ON: semseg_pred_results = np.asarray(semseg_pred_results).transpose( (1, 0, 2, 3, 4)) for i in range(len(box_results)): semseg_pred = np.mean(semseg_pred_results[i], axis=0) box_results[i].add_field("semseg", semseg_pred) return box_results, features
def im_detect_bbox(model, ims): box_results = [[] for _ in range(len(ims))] features = [] results, net_imgs_size, conv_features = im_detect_bbox_net( model, ims, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) if cfg.RPN.RPN_ONLY: return results, None add_results(box_results, results) features.append((net_imgs_size, conv_features)) if cfg.TEST.BBOX_AUG.ENABLED: if cfg.TEST.BBOX_AUG.H_FLIP: results_hf, net_imgs_size_hf, conv_features_hf = im_detect_bbox_net( model, ims, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, True, net_imgs_size) add_results(box_results, results_hf) features.append((net_imgs_size_hf, conv_features_hf)) for scale in cfg.TEST.BBOX_AUG.SCALES: max_size = cfg.TEST.BBOX_AUG.MAX_SIZE results_scl, net_imgs_size_scl, conv_features_scl = im_detect_bbox_net( model, ims, scale, max_size, False, net_imgs_size) add_results(box_results, results_scl) features.append((net_imgs_size_scl, conv_features_scl)) if cfg.TEST.BBOX_AUG.H_FLIP: results_scl_hf, net_imgs_size_scl_hf, conv_features_scl_hf = im_detect_bbox_net( model, ims, scale, max_size, True, net_imgs_size) add_results(box_results, results_scl_hf) features.append((net_imgs_size_scl_hf, conv_features_scl_hf)) if cfg.MODEL.HAS_BOX: nms_thresh, detections_per_img = get_detection_params() box_results = [cat_boxlist(result) for result in box_results] box_results = [ filter_results(result, nms_thresh=nms_thresh, detections_per_img=detections_per_img) for result in box_results ] else: box_results = [result[0] for result in box_results] return box_results, features
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness, box_regression = concat_box_prediction_layers( objectness, box_regression) objectness = objectness.squeeze() labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=cfg.RPN.SMOOTH_L1_BETA, size_average=False, ) / (sampled_inds.numel()) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds]) return objectness_loss, box_loss
def im_detect_bbox(model, ims): box_results = [[] for _ in range(len(ims))] features = [] results, net_imgs_size, blob_conv = im_detect_bbox_net(model, ims, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) if cfg.RPN.RPN_ONLY: return results, None add_results(box_results, results) features.append((net_imgs_size, blob_conv)) if cfg.TEST.BBOX_AUG.ENABLED: if cfg.TEST.BBOX_AUG.H_FLIP: results_hf, net_imgs_size_hf, blob_conv_hf = im_detect_bbox_net( model, ims, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, True, net_imgs_size ) add_results(box_results, results_hf) features.append((net_imgs_size_hf, blob_conv_hf)) for scale in cfg.TEST.BBOX_AUG.SCALES: max_size = cfg.TEST.BBOX_AUG.MAX_SIZE results_scl, net_imgs_size_scl, blob_conv_scl = im_detect_bbox_net( model, ims, scale, max_size, False, net_imgs_size ) add_results(box_results, results_scl) features.append((net_imgs_size_scl, blob_conv_scl)) if cfg.TEST.BBOX_AUG.H_FLIP: results_scl_hf, net_imgs_size_scl_hf, blob_conv_scl_hf = im_detect_bbox_net( model, ims, scale, max_size, True, net_imgs_size ) add_results(box_results, results_scl_hf) features.append((net_imgs_size_scl_hf, blob_conv_scl_hf)) box_results = [cat_boxlist(result) for result in box_results] if cfg.MODEL.FASTER_ON: box_results = [filter_results(result) for result in box_results] return box_results, features
def filter_results(boxlist): num_classes = cfg.MODEL.NUM_CLASSES if not cfg.TEST.SOFT_NMS.ENABLED and not cfg.TEST.BBOX_VOTE.ENABLED: # multiclass nms scores = boxlist.get_field("scores") device = scores.device num_repeat = int(boxlist.bbox.shape[0] / num_classes) labels = np.tile(np.arange(num_classes), num_repeat) boxlist.add_field( "labels", torch.from_numpy(labels).to(dtype=torch.int64, device=device)) fg_labels = torch.from_numpy( (np.arange(boxlist.bbox.shape[0]) % num_classes != 0).astype(int)).to(dtype=torch.uint8, device=device) _scores = scores > cfg.FAST_RCNN.SCORE_THRESH inds_all = _scores & fg_labels result = boxlist_ml_nms(boxlist[inds_all], cfg.FAST_RCNN.NMS) else: boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > cfg.FAST_RCNN.SCORE_THRESH for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class_old = boxlist_for_class if cfg.TEST.SOFT_NMS.ENABLED: boxlist_for_class = boxlist_soft_nms( boxlist_for_class, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.FAST_RCNN.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD) else: boxlist_for_class = boxlist_nms(boxlist_for_class, cfg.FAST_RCNN.NMS) # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED and boxes_j.shape[0] > 0: boxlist_for_class = boxlist_box_voting( boxlist_for_class, boxlist_for_class_old, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=device)) result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > cfg.FAST_RCNN.DETECTIONS_PER_IMG > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - cfg.FAST_RCNN.DETECTIONS_PER_IMG + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result
def select_over_all_levels(self, boxlists): num_images = len(boxlists) results = [] for i in range(num_images): if not cfg.TEST.SOFT_NMS.ENABLED and not cfg.TEST.BBOX_VOTE.ENABLED: # multiclass nms result = boxlist_ml_nms(boxlists[i], self.nms_thresh) else: scores = boxlists[i].get_field("scores") labels = boxlists[i].get_field("labels") boxes = boxlists[i].bbox boxlist = boxlists[i] result = [] # skip the background for j in range(2, self.num_classes + 1): inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class_old = boxlist_for_class if cfg.TEST.SOFT_NMS.ENABLED: boxlist_for_class = boxlist_soft_nms( boxlist_for_class, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=self.nms_thresh, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD) else: boxlist_for_class = boxlist_nms(boxlist_for_class, self.nms_thresh, score_field="scores") # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED and boxes_j.shape[0] > 0: boxlist_for_class = boxlist_box_voting( boxlist_for_class, boxlist_for_class_old, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=scores.device)) result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.fpn_post_nms_top_n + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] results.append(result) return results
def forward(self, box_cls_all, box_reg_all, centerness_all, locations, boxes_all): fea_level_num = len(box_cls_all) boxes_per_image = [len(box) for box in boxes_all] cls = [ box_cls.split(boxes_per_image, dim=0) for box_cls in box_cls_all ] reg = [ box_reg.split(boxes_per_image, dim=0) for box_reg in box_reg_all ] center = [ centerness.split(boxes_per_image, dim=0) for centerness in centerness_all ] cls = list(zip(*cls)) reg = list(zip(*reg)) center = list(zip(*center)) results = [] hier_results = [] for box_cls, box_regression, centerness, boxes in zip( cls, reg, center, boxes_all): sampled_boxes = [] hier_boxes = [] hier_scores = [] for _, (l, o, b, c) in enumerate( zip(locations, box_cls, box_regression, centerness)): _results, _hier_boxes, _hier_scores = self.forward_for_single_feature_map( l, o, b, c, boxes) sampled_boxes.append(_results) hier_boxes.append(_hier_boxes) hier_scores.append(_hier_scores) sampled_boxes = cat_boxlist(sampled_boxes) results.append(sampled_boxes) if self.eval_hier: hier_boxes = torch.stack(hier_boxes, dim=2) hier_scores = torch.stack(hier_scores, dim=2) hier_boxes = hier_boxes.reshape(-1, fea_level_num, 4) hier_scores = hier_scores.reshape(-1, fea_level_num) per_box_cls, per_box_ind = hier_scores.max(dim=1) detections = hier_boxes[range(len(hier_boxes)), per_box_ind] hier_boxes = detections.cpu().numpy() hier_scores = per_box_cls.cpu().numpy() boxes_scores = boxes.get_field("scores") hier_results.append([hier_boxes, hier_scores, boxes_scores]) results = self.select_over_all_levels(results) results = [ cat_boxlist([result, boxes]) for result, boxes in zip(results, boxes_all) ] if self.eval_hier: for result, hier_result in zip(results, hier_results): result.add_field("hier_boxes", hier_result[0]) result.add_field("hier_scores", hier_result[1]) result.add_field("pboxes_scores", hier_result[2]) return results