def merge_maskrcnn_benchmark_result(self, corners, results, im_scales=None, image_size=None): import torch from maskrcnn_benchmark.structures.boxlist_ops import BoxList def result_fmt(result): bbox = result.bbox labels = result.extra_fields["labels"].reshape(-1, 1).float() scores = result.extra_fields["scores"].reshape(-1, 1) det_result = torch.cat([bbox, labels, scores], dim=1).detach().cpu().numpy() return det_result input_BoxList = isinstance(results[0], BoxList) if input_BoxList: assert im_scales is not None and image_size is not None, '' if input_BoxList: det_results = [] for result, im_scale in zip(results, im_scales): det_result = result_fmt(result) det_result[:, :4] = det_result[:, :4] / np.array( [im_scale[1], im_scale[0], im_scale[1], im_scale[0]]) det_results.append(det_result) else: det_results = results det_results = self.translate_bboxes(corners, det_results) if len(det_results) == 0: return [] _, keep = self.nms(det_results[:, :4], det_results[:, 5]) det_results = det_results[keep] if input_BoxList: merge_result = BoxList(torch.Tensor(det_results[:, :4]), image_size, 'xyxy') merge_result.add_field("labels", torch.Tensor(det_results[:, 4])) merge_result.add_field("scores", torch.Tensor(det_results[:, 5])) else: merge_result = det_results return merge_result
def __call__(self, anchors, box_cls, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] box_cls_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for box_cls_per_level, box_regression_per_level in zip( box_cls, box_regression ): N, A, H, W = box_cls_per_level.shape C = self.num_classes box_cls_per_level = box_cls_per_level.view(N, -1, C, H, W) box_cls_per_level = box_cls_per_level.permute(0, 3, 4, 1, 2) box_cls_per_level = box_cls_per_level.reshape(N, -1, C) box_regression_per_level = box_regression_per_level.view(N, -1, 4, H, W) box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape(N, -1, 4) box_cls_flattened.append(box_cls_per_level) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) box_cls = cat(box_cls_flattened, dim=1) box_regression = cat(box_regression_flattened, dim=1) cls_prob = torch.sigmoid(box_cls) box_prob = [] positive_numels = 0 positive_losses = [] for img, (anchors_, targets_, cls_prob_, box_regression_) in enumerate( zip(anchors, targets, cls_prob, box_regression) ): labels_ = targets_.get_field("labels").to(dtype=torch.long) - 1 with torch.set_grad_enabled(False): # box_localization: a_{j}^{loc}, shape: [j, 4] box_localization = self.box_coder.decode(box_regression_, anchors_.bbox) # object_box_iou: IoU_{ij}^{loc}, shape: [i, j] object_box_iou = boxlist_iou( targets_, BoxList(box_localization, anchors_.size, mode='xyxy') ) t1 = self.bbox_threshold t2 = object_box_iou.max(dim=1, keepdim=True).values.clamp(min=t1 + 1e-12) # object_box_prob: P{a_{j} -> b_{i}}, shape: [i, j] object_box_prob = ( (object_box_iou - t1) / (t2 - t1) ).clamp(min=0, max=1) indices = torch.stack([torch.arange(len(labels_)).type_as(labels_), labels_], dim=0) # object_cls_box_prob: P{a_{j} -> b_{i}}, shape: [i, c, j] object_cls_box_prob = torch.sparse_coo_tensor(indices, object_box_prob) # image_box_iou: P{a_{j} \in A_{+}}, shape: [j, c] """ from "start" to "end" implement: image_box_iou = torch.sparse.max(object_cls_box_prob, dim=0).t() """ # start indices = torch.nonzero(torch.sparse.sum( object_cls_box_prob, dim=0 ).to_dense()).t_() if indices.numel() == 0: image_box_prob = torch.zeros(anchors_.bbox.size(0), self.num_classes).type_as(object_box_prob) else: nonzero_box_prob = torch.where( (labels_.unsqueeze(dim=-1) == indices[0]), object_box_prob[:, indices[1]], torch.tensor([0.0]).type_as(object_box_prob) ).max(dim=0).values image_box_prob = torch.sparse_coo_tensor( indices.flip([0]), nonzero_box_prob, size=(anchors_.bbox.size(0), self.num_classes) ).to_dense() # end box_prob.append(image_box_prob) # construct bags for objects match_quality_matrix = boxlist_iou(targets_, anchors_) _, matched = torch.topk(match_quality_matrix, self.pre_anchor_topk, dim=1, sorted=False) del match_quality_matrix # matched_cls_prob: P_{ij}^{cls} matched_cls_prob = torch.gather( cls_prob_[matched], 2, labels_.view(-1, 1, 1).repeat(1, self.pre_anchor_topk, 1) ).squeeze(2) # matched_box_prob: P_{ij}^{loc} matched_object_targets = self.box_coder.encode(targets_.bbox.unsqueeze(dim=1), anchors_.bbox[matched]) retinanet_regression_loss = smooth_l1_loss( box_regression_[matched], matched_object_targets, *self.smooth_l1_loss_param ) matched_box_prob = torch.exp(-retinanet_regression_loss) # positive_losses: { -log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) ) } positive_numels += len(targets_) positive_losses.append(self.positive_bag_loss_func(matched_cls_prob * matched_box_prob, dim=1)) # positive_loss: \sum_{i}{ -log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) ) } / ||B|| positive_loss = torch.cat(positive_losses).sum() / max(1, positive_numels) # box_prob: P{a_{j} \in A_{+}} box_prob = torch.stack(box_prob, dim=0) # negative_loss: \sum_{j}{ FL( (1 - P{a_{j} \in A_{+}}) * (1 - P_{j}^{bg}) ) } / n||B|| negative_loss = self.negative_bag_loss_func( cls_prob * (1 - box_prob), self.focal_loss_gamma ) / max(1, positive_numels * self.pre_anchor_topk) #losses = { # "loss_retina_positive": positive_loss * self.focal_loss_alpha, # "loss_retina_negative": negative_loss * (1 - self.focal_loss_alpha), #} #return losses return positive_loss * self.focal_loss_alpha, negative_loss * (1 - self.focal_loss_alpha)
def __call__(self, anchors, box_cls, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] box_cls_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for box_cls_per_level, box_regression_per_level in zip( box_cls, box_regression): N, A, H, W = box_cls_per_level.shape C = self.num_classes box_cls_per_level = box_cls_per_level.view(N, -1, C, H, W) box_cls_per_level = box_cls_per_level.permute(0, 3, 4, 1, 2) box_cls_per_level = box_cls_per_level.reshape(N, -1, C) box_regression_per_level = box_regression_per_level.view( N, -1, 4, H, W) box_regression_per_level = box_regression_per_level.permute( 0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape( N, -1, 4) box_cls_flattened.append(box_cls_per_level) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) box_cls = cat(box_cls_flattened, dim=1) box_regression = cat(box_regression_flattened, dim=1) cls_prob = torch.sigmoid(box_cls) box_prob = [] positive_numels = 0 positive_losses = [] for img, (anchors_, targets_, cls_prob_, box_regression_) in enumerate( zip(anchors, targets, cls_prob, box_regression)): labels_ = targets_.get_field("labels") - 1 with torch.set_grad_enabled(False): box_localization = self.box_coder.decode( box_regression_, anchors_.bbox) object_box_iou = boxlist_iou( targets_, BoxList(box_localization, anchors_.size, mode='xyxy')) H = object_box_iou.max( dim=1, keepdim=True).values.clamp(min=self.bbox_threshold + 1e-12) object_box_prob = ((object_box_iou - self.bbox_threshold) / (H - self.bbox_threshold)).clamp(min=0, max=1) indices = torch.stack( [torch.arange(len(labels_)).type_as(labels_), labels_], dim=0) """ to implement image_box_iou = torch.sparse.max( torch.sparse_coo_tensor(indices, object_box_iou), dim=0 ) """ # start indices = torch.nonzero( torch.sparse.sum(torch.sparse_coo_tensor( indices, object_box_prob), dim=0).to_dense()).t_() if indices.numel() == 0: image_box_prob = torch.zeros( anchors_.bbox.size(0), self.num_classes).type_as(object_box_prob) else: nonzero_box_prob = torch.where( (labels_.unsqueeze(dim=-1) == indices[0]), object_box_prob[:, indices[1]], torch.tensor( [0]).type_as(object_box_prob)).max(dim=0).values image_box_prob = torch.sparse_coo_tensor( indices.flip([0]), nonzero_box_prob, size=(anchors_.bbox.size(0), self.num_classes)).to_dense() # end box_prob.append(image_box_prob) match_quality_matrix = boxlist_iou(targets_, anchors_) _, matched = torch.topk(match_quality_matrix, self.pre_anchor_topk, dim=1, sorted=False) del match_quality_matrix matched_cls_prob = torch.gather( cls_prob_[matched], 2, labels_.view(-1, 1, 1).repeat(1, self.pre_anchor_topk, 1)).squeeze(2) matched_object_targets = self.box_coder.encode( targets_.bbox.unsqueeze(dim=1), anchors_.bbox[matched]) retinanet_regression_loss = smooth_l1_loss( box_regression_[matched], matched_object_targets, *self.smooth_l1_loss_param) matched_box_prob = torch.exp(-retinanet_regression_loss) positive_numels += len(targets_) positive_losses.append( self.positive_bag_loss_func(matched_cls_prob * matched_box_prob, dim=1)) positive_loss = torch.cat(positive_losses).sum() / max( 1, positive_numels) box_prob = torch.stack(box_prob, dim=0) negative_loss = self.negative_bag_loss_func( cls_prob * (1 - box_prob), self.focal_loss_gamma) / max( 1, positive_numels * self.pre_anchor_topk) losses = { "loss_retina_positive": positive_loss * self.focal_loss_alpha, "loss_retina_negative": negative_loss * (1 - self.focal_loss_alpha), } return losses