def add_gt_proposals(self, proposals, targets): """ Arguments: proposals: list[BoxList] targets: list[BoxList] """ # Get the device we're operating on device = proposals[0].bbox.device # new_targets = [] ############ change width & height ############ new_targets = [target.set2rboxes() for target in targets] ############################################### gt_boxes = [target.copy_with_fields([]) for target in new_targets] # later cat of bbox requires all fields to be present for all bbox # so we need to add a dummy for objectness that's missing for gt_box in gt_boxes: gt_box.add_field("objectness", torch.ones(len(gt_box), device=device)) proposals = [ cat_boxlist((proposal, gt_box)) for proposal, gt_box in zip(proposals, gt_boxes) ] return proposals
def forward(self, locations, box_cls, box_regression, centerness, image_sizes): """ Arguments: anchors: list[list[BoxList]] box_cls: list[tensor] box_regression: list[tensor] image_sizes: list[(h, w)] Returns: boxlists (list[BoxList]): the post-processed anchors, after applying box decoding and NMS """ sampled_boxes = [] normal_factor = [16, 32, 64, 128, 256] # 5个特征尺度 for layer, (l, o, b, c) in enumerate( zip(locations, box_cls, box_regression, centerness)): sampled_boxes.append( self.forward_for_single_feature_map(l, o, b, c, image_sizes, normal_factor[layer])) boxlists = list(zip(*sampled_boxes)) boxlists = [cat_boxlist(boxlist) for boxlist in boxlists] # print("boxlists", boxlists) # boxlists = self.select_over_all_levels(boxlists) return boxlists
def forward(self, anchors, objectness, box_regression, targets=None): """ Arguments: anchors: list[list[BoxList]] objectness: list[tensor] box_regression: list[tensor] Returns: boxlists (list[BoxList]): the post-processed anchors, after applying box decoding and NMS """ sampled_boxes = [] num_levels = len(objectness) anchors = list(zip(*anchors)) for a, o, b in zip(anchors, objectness, box_regression): sampled_boxes.append(self.forward_for_single_feature_map(a, o, b)) boxlists = list(zip(*sampled_boxes)) boxlists = [cat_boxlist(boxlist) for boxlist in boxlists] if num_levels > 1: boxlists = self.select_over_all_levels(boxlists) # append ground-truth bboxes to proposals if self.training and targets is not None: boxlists = self.add_gt_proposals(boxlists, targets) return boxlists
def filter_results(self, boxlist, num_classes, num_of_fwd_left): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 5) scores = boxlist.get_field("scores").reshape(-1, num_classes) device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] # print("scores_j:", np.unique(scores_j.data.cpu().numpy())[-10:]) boxes_j = boxes[inds, j * 5 : (j + 1) * 5] boxlist_for_class = RBoxList(boxes_j, boxlist.size, mode="xywha") boxlist_for_class.add_field("scores", scores_j) if num_of_fwd_left == 0: boxlist_for_class.rescale(1. / self.shrink_margin) boxlist_for_class = self.nms_fn( boxlist_for_class, self.nms, score_field="scores" ) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device) ) result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1 ) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result
def __call__(self, image_mix_list, target_mix_list): crop_imgs = [] crop_tars = [] maxH, calW = 0, 0 for i in range(len(image_mix_list)): img = image_mix_list[i] tar = target_mix_list[i] # img, tar = self.crop_tool(img, tar) crop_imgs.append(img) crop_tars.append(tar) np_img = np.array(img) H, W = np_img.shape[:2] if H > maxH: maxH = H calW += W mix_img = np.zeros((maxH, calW, 3)) mix_tar = [] shift = 0 for i in range(len(crop_imgs)): crop_im = crop_imgs[i] crop_tar = crop_tars[i] np_img = np.array(crop_im) H, W = np_img.shape[:2] mix_img[:H, shift:W + shift] = np_img if not crop_tar is None: crop_tar = crop_tar.shift(shift, 0, (calW, maxH)) mix_tar.append(crop_tar) shift += W # print("mix_img:", mix_img.shape, type(mix_img), mix_tar) if len(mix_tar) > 0: cat_boxes = cat_boxlist(mix_tar) else: cat_boxes = None return Image.fromarray(mix_img.astype(np.uint8)), cat_boxes
def select_over_all_levels(self, boxlists): num_images = len(boxlists) results = [] for i in range(num_images): scores = boxlists[i].get_field("scores") labels = boxlists[i].get_field("labels") boxes = boxlists[i].bbox boxlist = boxlists[i] result = [] # skip the background for j in range(1, self.num_classes): inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class = boxlist_nms( boxlist_for_class, self.nms_thresh, score_field="scores" ) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels,), j, dtype=torch.int64, device=scores.device) ) result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.fpn_post_nms_top_n + 1 ) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] results.append(result) return results
def sampling_boxes(boxes, max_num=128): """ Given a set of BoxList containing the `labels` field, return a set of BoxList for which `labels > 0`. Arguments: boxes (list of BoxList) """ assert isinstance(boxes, (list, tuple)) assert isinstance(boxes[0], RBoxList) assert boxes[0].has_field("labels") all_boxes = [] positive_boxes = [] positive_inds = [] negative_boxes = [] negative_inds = [] num_boxes = 0 for boxes_per_image in boxes: labels = boxes_per_image.get_field("labels") inds_mask = labels > 0 inds = inds_mask.nonzero().squeeze(1) positive_boxes.append(boxes_per_image[inds][:max_num]) positive_inds.append(inds_mask) neg_mask = labels == 0 neg_inds = neg_mask.nonzero().squeeze(1) negative_box = boxes_per_image[neg_inds][-int(max_num / 4):] negative_boxes.append(negative_box) negative_inds.append(neg_mask) all_boxes.append( cat_boxlist([boxes_per_image[inds][:max_num], negative_box])) return positive_boxes, positive_inds, negative_boxes, negative_inds, all_boxes
def add_gt_proposals(self, proposals, targets): """ Arguments: proposals: list[BoxList] targets: list[BoxList] """ # Get the device we're operating on device = proposals[0].bbox.device gt_boxes = [target.copy_with_fields([]) for target in targets] # later cat of bbox requires all fields to be present for all bbox # so we need to add a dummy for objectness that's missing for gt_box in gt_boxes: gt_box.add_field("objectness", torch.ones(len(gt_box), device=device)) proposals = [ cat_boxlist((proposal, gt_box)) for proposal, gt_box in zip(proposals, gt_boxes) ] # print('rrpn_proposal:', proposals[0].bbox.size(), proposals[0].bbox[:, 2:4]) return proposals
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) # print("pos and neg:", sampled_pos_inds.shape, sampled_neg_inds.shape) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for objectness_per_level, box_regression_per_level in zip( objectness, box_regression): N, A, H, W = objectness_per_level.shape objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape( N, -1) box_regression_per_level = box_regression_per_level.view( N, -1, 5, H, W) box_regression_per_level = box_regression_per_level.permute( 0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape( N, -1, 5) objectness_flattened.append(objectness_per_level) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) objectness = cat(objectness_flattened, dim=1).reshape(-1) box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 5) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_regression = box_regression box_regression_pos = box_regression[sampled_pos_inds] regression_targets_pos = regression_targets[sampled_pos_inds] if self.edge_punished: anchors_cat = torch.cat([anchor.bbox for anchor in anchors], 0) pos_anchors_w = anchors_cat[:, 2:3][sampled_pos_inds] pos_anchors_w_norm = pos_anchors_w / (torch.mean(pos_anchors_w) + 1e-10) # print('box_regression_pos:', pos_anchors_w_norm.size(), box_regression_pos.size()) box_regression_pos = pos_anchors_w_norm * box_regression_pos regression_targets_pos = pos_anchors_w_norm * regression_targets_pos plabels = labels[sampled_inds] if self.OHEM: cls_logits = objectness[sampled_inds] score_sig = torch.sigmoid(cls_logits) # pick hard positive which takes 1/4 pos_score_sig = score_sig[plabels == 1] pos_num = pos_score_sig.shape[0] hard_pos_num = int(pos_num / 4) + 1 hp_vals, hp_indices = torch.topk(-pos_score_sig, hard_pos_num, dim=0) # hard_pos_sig = pos_score_sig[hp_indices] pos_label = plabels[plabels == 1] pos_label = pos_label[hp_indices] pos_logits = cls_logits[plabels == 1] pos_logits = pos_logits[hp_indices] pos_box_reg = box_regression_pos[hp_indices] pos_box_target = regression_targets_pos[hp_indices] # print("box_regression_pos:", box_regression_pos.shape, pos_score_sig.shape, pos_box_reg) # print("pos_score_sig:", hard_pos_sig, pos_score_sig) # pick hard negative which takes 1/4 neg_score_sig = score_sig[plabels != 1] neg_num = neg_score_sig.shape[0] hard_neg_num = int(neg_num / 4) + 1 hn_vals, hn_indices = torch.topk(neg_score_sig, hard_neg_num, dim=0) # hard_neg_sig = neg_score_sig[hn_indices] neg_label = plabels[plabels != 1] neg_label = neg_label[hn_indices] neg_logits = cls_logits[plabels != 1] neg_logits = neg_logits[hn_indices] hard_labels = torch.cat([pos_label, neg_label], dim=0) hard_logits = torch.cat([pos_logits, neg_logits], dim=0) ohem_box_loss = smooth_l1_loss( pos_box_reg, pos_box_target, beta=1.0 / 9, size_average=False, ) / float(hard_pos_num + hard_neg_num) ohem_objectness_loss = F.binary_cross_entropy_with_logits( hard_logits, hard_labels.to(hard_logits.device)) return ohem_objectness_loss, ohem_box_loss else: box_loss = smooth_l1_loss( box_regression_pos, regression_targets_pos, beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) score = objectness[sampled_inds] plabels = plabels objectness_loss = F.binary_cross_entropy_with_logits( score, plabels.to(score.device)) return objectness_loss, box_loss
def forward(self, features, proposals, targets=None): """ Arguments: features (list[Tensor]): feature-maps from possibly several levels proposals (list[BoxList]): proposal boxes targets (list[BoxList], optional): the ground-truth targets. Returns: x (Tensor): the result of the feature extractor proposals (list[BoxList]): during training, the original proposals are returned. During testing, the predicted boxlists are returned with the `mask` field set losses (dict[Tensor]): During training, returns the losses for the head. During testing, returns an empty dict. """ if self.training: # during training, only focus on positive boxes all_proposals = proposals positive_boxes, positive_inds, negative_boxes, negative_inds, proposals = sampling_boxes( proposals, self.max_num_positive) if self.cfg.MODEL.ROI_REC_HEAD.POS_ONLY: proposals = positive_boxes if self.training and self.cfg.MODEL.ROI_REC_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: x = features pos_x = x[torch.cat(positive_inds, dim=0)][:self.max_num_positive] all_proposals = cat_boxlist(all_proposals) pos_proposals = all_proposals[torch.cat( positive_inds, dim=0)][:self.max_num_positive] if self.cfg.MODEL.ROI_REC_HEAD.POS_ONLY: x = pos_x proposals = pos_proposals else: neg_x = x[torch.cat(negative_inds, dim=0)][:self.max_num_positive] x = torch.cat([pos_x, neg_x], dim=0) neg_proposals = all_proposals[torch.cat( negative_inds, dim=0)][:self.max_num_positive] proposals = cat_boxlist([pos_proposals, neg_proposals]) else: if not self.training: proposals = [ proposal.rescale(self.cfg.MODEL.ROI_REC_HEAD.BOXES_MARGIN) for proposal in proposals ] x = self.feature_extractor(features, proposals) if self.training and self.cfg.MODEL.ROI_REC_HEAD.REC_DETACH: x = x.detach() rec_logits = self.predictor(x) if not self.training: if self.cfg.MODEL.ROI_REC_HEAD.STRUCT == "REF_TRANSFORMER": result = self.post_processor(rec_logits, proposals, self.transformer) else: result = self.post_processor(rec_logits, proposals) return x, result, {} if self.cfg.MODEL.ROI_REC_HEAD.STRUCT == "REF_TRANSFORMER": loss_rec = self.loss_evaluator(proposals, rec_logits, targets, self.transformer) else: loss_rec = self.loss_evaluator(proposals, rec_logits, targets) return x, proposals, dict(loss_rec=loss_rec)
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for objectness_per_level, box_regression_per_level in zip( objectness, box_regression ): N, A, H, W = objectness_per_level.shape objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape( N, -1 ) box_regression_per_level = box_regression_per_level.view(N, -1, 5, H, W) box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape(N, -1, 5) objectness_flattened.append(objectness_per_level) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) objectness = cat(objectness_flattened, dim=1).reshape(-1) box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 5) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_regression = box_regression box_regression_pos = box_regression[sampled_pos_inds] regression_targets_pos = regression_targets[sampled_pos_inds] if self.edge_punished: anchors_cat = torch.cat([anchor.bbox for anchor in anchors], 0) pos_anchors_w = anchors_cat[:, 2:3][sampled_pos_inds] pos_anchors_w_norm = pos_anchors_w / (torch.mean(pos_anchors_w) + 1e-10) # print('box_regression_pos:', pos_anchors_w_norm.size(), box_regression_pos.size()) box_regression_pos = pos_anchors_w_norm * box_regression_pos regression_targets_pos = pos_anchors_w_norm * regression_targets_pos box_loss = smooth_l1_loss( box_regression_pos, regression_targets_pos, beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) # print('type:', objectness[sampled_inds], labels[sampled_inds]) score = objectness[sampled_inds]#.detach() ######################## plabels = labels[sampled_inds] plabels = plabels objectness_loss = F.binary_cross_entropy_with_logits( score, plabels.to(score.device) ) return objectness_loss, box_loss