def get_targets(self, anchor_list, responsible_flag_list, gt_bboxes_list, gt_labels_list, img_metas): """Compute target maps for anchors in multiple images. Args: anchor_list (list[list[Tensor]]): Multi level anchors of each image. The outer list indicates images, and the inner list corresponds to feature levels of the image. Each element of the inner list is a tensor of shape (num_total_anchors, 4). responsible_flag_list (list[list[Tensor]]): Multi level responsible flags of each image. Each element is a tensor of shape (num_total_anchors, ) gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. gt_labels_list (list[Tensor]): Ground truth labels of each box. Returns: tuple: Usually returns a tuple containing learning targets. - target_map_list (list[Tensor]): Target map of each level. - neg_map_list (list[Tensor]): Negative map of each level. """ num_imgs = len(anchor_list) # anchor number of multi levels 每一层anchor的个数,例如输出是10x10,那么10x10x3=300 num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] results = multi_apply(self._get_targets_single, anchor_list, responsible_flag_list, gt_bboxes_list, gt_labels_list, img_metas, num_level_anchors=num_level_anchors) all_target_maps, all_neg_maps = results assert num_imgs == len(all_target_maps) == len(all_neg_maps) # all_target_maps是图片级别list结构,假设有8张图片则len()=8,需要转化为fpn级别的输出,才方便计算Loss,假设有三层输出,则len()=3 target_maps_list = images_to_levels(all_target_maps, num_level_anchors) neg_maps_list = images_to_levels(all_neg_maps, num_level_anchors) return target_maps_list, neg_maps_list
def loss(self, cls_scores, bbox_preds, shape_preds, loc_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.approx_anchor_generator.num_levels device = cls_scores[0].device # get loc targets loc_targets, loc_weights, loc_avg_factor = self.ga_loc_targets( gt_bboxes, featmap_sizes) # anchor的shape预测分支所需要的anchor_list approxs_list, inside_flag_list = self.get_sampled_approxs( featmap_sizes, img_metas, device=device) # 利用anchor生成分支得到guided_anchors(稀疏值),用于后续训练原始分类和回归分支 # 但是作者设置use_loc_filter=False,也就是对loc分支预测值不过滤 # squares_list长度和guided_anchors_list一样,可能是为了使得focal loss的参数不需要改变才这么设置吧 squares_list, guided_anchors_list, _ = self.get_anchors(featmap_sizes, shape_preds, loc_preds, img_metas, device=device) # get shape targets shape_targets = self.ga_shape_targets(approxs_list, inside_flag_list, squares_list, gt_bboxes, img_metas) if shape_targets is None: return None # bbox_anchors_list就是squares_list (bbox_anchors_list, bbox_gts_list, anchor_weights_list, anchor_fg_num, anchor_bg_num) = shape_targets anchor_total_num = (anchor_fg_num if not self.ga_sampling else anchor_fg_num + anchor_bg_num) # get anchor targets label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 # 原始分支的target计算,比较简单 # 需要注意的是如果guided_anchors_list是稀疏的,而且很多iou应该大于0.5,也就是此时该分支训练时候很多都是正样本 # 不平衡问题应该没有原来严重了,但是这里设置依然是密集的 cls_reg_targets = self.get_targets( guided_anchors_list, inside_flag_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets num_total_samples = (num_total_pos + num_total_neg if self.sampling else num_total_pos) # anchor number of multi levels num_level_anchors = [ anchors.size(0) for anchors in guided_anchors_list[0] ] # concat all level anchors to a single tensor concat_anchor_list = [] for i in range(len(guided_anchors_list)): concat_anchor_list.append(torch.cat(guided_anchors_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) # get classification and bbox regression losses # 原始分类和回归分支,和原来一样 losses_cls, losses_bbox = multi_apply( self.loss_single, cls_scores, bbox_preds, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_samples=num_total_samples) # get anchor location loss losses_loc = [] for i in range(len(loc_preds)): loss_loc = self.loss_loc_single(loc_preds[i], loc_targets[i], loc_weights[i], loc_avg_factor=loc_avg_factor) losses_loc.append(loss_loc) # get anchor shape loss losses_shape = [] for i in range(len(shape_preds)): loss_shape = self.loss_shape_single( shape_preds[i], bbox_anchors_list[i], bbox_gts_list[i], anchor_weights_list[i], anchor_total_num=anchor_total_num) losses_shape.append(loss_shape) return dict(loss_cls=losses_cls, loss_bbox=losses_bbox, loss_shape=losses_shape, loss_loc=losses_loc)
def ga_shape_targets(self, approx_list, inside_flag_list, square_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, unmap_outputs=True): """Compute guided anchoring targets. Args: approx_list (list[list]): Multi level approxs of each image. inside_flag_list (list[list]): Multi level inside flags of each image. square_list (list[list]): Multi level squares of each image. gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. img_metas (list[dict]): Meta info of each image. gt_bboxes_ignore_list (list[Tensor]): ignore list of gt bboxes. unmap_outputs (bool): unmap outputs or not. Returns: tuple """ num_imgs = len(img_metas) assert len(approx_list) == len(inside_flag_list) == len( square_list) == num_imgs # anchor number of multi levels num_level_squares = [squares.size(0) for squares in square_list[0]] # concat all level anchors and flags to a single tensor inside_flag_flat_list = [] approx_flat_list = [] square_flat_list = [] for i in range(num_imgs): assert len(square_list[i]) == len(inside_flag_list[i]) inside_flag_flat_list.append(torch.cat(inside_flag_list[i])) approx_flat_list.append(torch.cat(approx_list[i])) square_flat_list.append(torch.cat(square_list[i])) # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] (all_bbox_anchors, all_bbox_gts, all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(self._ga_shape_target_single, approx_flat_list, inside_flag_flat_list, square_flat_list, gt_bboxes_list, gt_bboxes_ignore_list, img_metas, unmap_outputs=unmap_outputs) # no valid anchors if any([bbox_anchors is None for bbox_anchors in all_bbox_anchors]): return None # sampled anchors of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) # split targets to a list w.r.t. multiple levels bbox_anchors_list = images_to_levels(all_bbox_anchors, num_level_squares) bbox_gts_list = images_to_levels(all_bbox_gts, num_level_squares) bbox_weights_list = images_to_levels(all_bbox_weights, num_level_squares) return (bbox_anchors_list, bbox_gts_list, bbox_weights_list, num_total_pos, num_total_neg)
def get_targets(self, anchor_list, valid_flag_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, gt_labels_list=None, label_channels=1, unmap_outputs=True): """Get targets for ATSS head. This method is almost the same as `AnchorHead.get_targets()`. Besides returning the targets as the parent method does, it also returns the anchors as the first element of the returned tuple. """ num_imgs = len(img_metas) assert len(anchor_list) == len(valid_flag_list) == num_imgs # 调试anchor # imgs = [] # for an in anchor_list[0]: # img = show_pos_anchor(img_metas[0], an, gt_bboxes_list[0], is_show=False) # imgs.append(img) # cv_core.show_img(imgs) # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] num_level_anchors_list = [num_level_anchors] * num_imgs # concat all level anchors and flags to a single tensor for i in range(num_imgs): assert len(anchor_list[i]) == len(valid_flag_list[i]) anchor_list[i] = torch.cat(anchor_list[i]) valid_flag_list[i] = torch.cat(valid_flag_list[i]) # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] if gt_labels_list is None: gt_labels_list = [None for _ in range(num_imgs)] (all_anchors, all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(self._get_target_single, anchor_list, valid_flag_list, num_level_anchors_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, img_metas, label_channels=label_channels, unmap_outputs=unmap_outputs) # no valid anchors if any([labels is None for labels in all_labels]): return None # sampled anchors of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) # split targets to a list w.r.t. multiple levels anchors_list = images_to_levels(all_anchors, num_level_anchors) labels_list = images_to_levels(all_labels, num_level_anchors) label_weights_list = images_to_levels(all_label_weights, num_level_anchors) bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors) bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors) return (anchors_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg)
def get_targets(self, proposals_list, valid_flag_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, gt_labels_list=None, stage='init', label_channels=1, unmap_outputs=True): """Compute corresponding GT box and classification targets for proposals. Args: proposals_list (list[list]): Multi level points/bboxes of each image. valid_flag_list (list[list]): Multi level valid flags of each image. gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. img_metas (list[dict]): Meta info of each image. gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be ignored. gt_bboxes_list (list[Tensor]): Ground truth labels of each box. stage (str): `init` or `refine`. Generate target for init stage or refine stage label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: - labels_list (list[Tensor]): Labels of each level. - label_weights_list (list[Tensor]): Label weights of each level. # noqa: E501 - bbox_gt_list (list[Tensor]): Ground truth bbox of each level. - proposal_list (list[Tensor]): Proposals(points/bboxes) of each level. # noqa: E501 - proposal_weights_list (list[Tensor]): Proposal weights of each level. # noqa: E501 - num_total_pos (int): Number of positive samples in all images. # noqa: E501 - num_total_neg (int): Number of negative samples in all images. # noqa: E501 """ assert stage in ['init', 'refine'] num_imgs = len(img_metas) assert len(proposals_list) == len(valid_flag_list) == num_imgs # points number of multi levels num_level_proposals = [points.size(0) for points in proposals_list[0]] # concat all level points and flags to a single tensor for i in range(num_imgs): assert len(proposals_list[i]) == len(valid_flag_list[i]) proposals_list[i] = torch.cat(proposals_list[i]) valid_flag_list[i] = torch.cat(valid_flag_list[i]) # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] if gt_labels_list is None: gt_labels_list = [None for _ in range(num_imgs)] (all_labels, all_label_weights, all_bbox_gt, all_proposals, all_proposal_weights, pos_inds_list, neg_inds_list) = multi_apply(self._point_target_single, proposals_list, valid_flag_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, stage=stage, label_channels=label_channels, unmap_outputs=unmap_outputs) # no valid points if any([labels is None for labels in all_labels]): return None # sampled points of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) labels_list = images_to_levels(all_labels, num_level_proposals) label_weights_list = images_to_levels(all_label_weights, num_level_proposals) bbox_gt_list = images_to_levels(all_bbox_gt, num_level_proposals) proposals_list = images_to_levels(all_proposals, num_level_proposals) proposal_weights_list = images_to_levels(all_proposal_weights, num_level_proposals) return (labels_list, label_weights_list, bbox_gt_list, proposals_list, proposal_weights_list, num_total_pos, num_total_neg)
def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): """Compute losses of the head. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 4, H, W) gt_bboxes (list[Tensor]): Ground truth bboxes for each image with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format. gt_labels (list[Tensor]): class indices corresponding to each box img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore (None | list[Tensor]): specify which bounding boxes can be ignored when computing the loss. Default: None Returns: dict[str, Tensor]: A dictionary of loss components. """ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] # 每个层的特征图size assert len(featmap_sizes) == self.anchor_generator.num_levels device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors( featmap_sizes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets( anchor_list, valid_flag_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets num_total_samples = ( num_total_pos + num_total_neg if self.sampling else num_total_pos) # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors and flags to a single tensor concat_anchor_list = [] for i in range(len(anchor_list)): concat_anchor_list.append(torch.cat(anchor_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) losses_cls, losses_bbox = multi_apply( self.loss_single, cls_scores, bbox_preds, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_samples=num_total_samples) return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)
def get_targets(self, anchor_list, valid_flag_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, gt_labels_list=None, label_channels=1, unmap_outputs=True, return_sampling_results=False): """Compute regression and classification targets for anchors in multiple images. Args: anchor_list (list[list[Tensor]]): Multi level anchors of each image. The outer list indicates images, and the inner list corresponds to feature levels of the image. Each element of the inner list is a tensor of shape (num_anchors, 4). valid_flag_list (list[list[Tensor]]): Multi level valid flags of each image. The outer list indicates images, and the inner list corresponds to feature levels of the image. Each element of the inner list is a tensor of shape (num_anchors, ) gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. img_metas (list[dict]): Meta info of each image. gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be ignored. gt_labels_list (list[Tensor]): Ground truth labels of each box. label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: Usually returns a tuple containing learning targets. - labels_list (list[Tensor]): Labels of each level. - label_weights_list (list[Tensor]): Label weights of each \ level. - bbox_targets_list (list[Tensor]): BBox targets of each level. - bbox_weights_list (list[Tensor]): BBox weights of each level. - num_total_pos (int): Number of positive samples in all \ images. - num_total_neg (int): Number of negative samples in all \ images. additional_returns: This function enables user-defined returns from `self._get_targets_single`. These returns are currently refined to properties at each feature map (i.e. having HxW dimension). The results will be concatenated after the end """ num_imgs = len(img_metas) assert len(anchor_list) == len(valid_flag_list) == num_imgs # 将多层的anchor数据concat,变成一个大矩阵,方便后面计算;也就是相当于只有一个预测输出层 # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors to a single tensor concat_anchor_list = [] concat_valid_flag_list = [] for i in range(num_imgs): assert len(anchor_list[i]) == len(valid_flag_list[i]) concat_anchor_list.append(torch.cat(anchor_list[i])) concat_valid_flag_list.append(torch.cat(valid_flag_list[i])) # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] if gt_labels_list is None: gt_labels_list = [None for _ in range(num_imgs)] results = multi_apply( self._get_targets_single, concat_anchor_list, concat_valid_flag_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, img_metas, label_channels=label_channels, unmap_outputs=unmap_outputs, num_level_anchors=num_level_anchors) (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, pos_inds_list, neg_inds_list, sampling_results_list) = results[:7] rest_results = list(results[7:]) # user-added return values # no valid anchors if any([labels is None for labels in all_labels]): return None # sampled anchors of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) # split targets to a list w.r.t. multiple levels # 前面把多个层的target合并了,现在还原到各自的层上面 labels_list = images_to_levels(all_labels, num_level_anchors) label_weights_list = images_to_levels(all_label_weights, num_level_anchors) bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors) bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors) res = (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) if return_sampling_results: res = res + (sampling_results_list,) for i, r in enumerate(rest_results): # user-added return values rest_results[i] = images_to_levels(r, num_level_anchors) return res + tuple(rest_results)
def get_target(self, approx_list, inside_flag_list, square_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, gt_labels_list=None, label_channels=None, sampling=True, unmap_outputs=True): """Compute bucketing targets. Args: approx_list (list[list]): Multi level approxs of each image. inside_flag_list (list[list]): Multi level inside flags of each image. square_list (list[list]): Multi level squares of each image. gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. img_metas (list[dict]): Meta info of each image. gt_bboxes_ignore_list (list[Tensor]): ignore list of gt bboxes. gt_bboxes_list (list[Tensor]): Gt bboxes of each image. label_channels (int): Channel of label. sampling (bool): Sample Anchors or not. unmap_outputs (bool): unmap outputs or not. Returns: tuple: Returns a tuple containing learning targets. - labels_list (list[Tensor]): Labels of each level. - label_weights_list (list[Tensor]): Label weights of each \ level. - bbox_cls_targets_list (list[Tensor]): BBox cls targets of \ each level. - bbox_cls_weights_list (list[Tensor]): BBox cls weights of \ each level. - bbox_reg_targets_list (list[Tensor]): BBox reg targets of \ each level. - bbox_reg_weights_list (list[Tensor]): BBox reg weights of \ each level. - num_total_pos (int): Number of positive samples in all \ images. - num_total_neg (int): Number of negative samples in all \ images. """ num_imgs = len(img_metas) assert len(approx_list) == len(inside_flag_list) == len( square_list) == num_imgs # anchor number of multi levels num_level_squares = [squares.size(0) for squares in square_list[0]] # concat all level anchors and flags to a single tensor inside_flag_flat_list = [] approx_flat_list = [] square_flat_list = [] for i in range(num_imgs): assert len(square_list[i]) == len(inside_flag_list[i]) inside_flag_flat_list.append(torch.cat(inside_flag_list[i])) approx_flat_list.append(torch.cat(approx_list[i])) square_flat_list.append(torch.cat(square_list[i])) # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] if gt_labels_list is None: gt_labels_list = [None for _ in range(num_imgs)] (all_labels, all_label_weights, all_bbox_cls_targets, all_bbox_cls_weights, all_bbox_reg_targets, all_bbox_reg_weights, pos_inds_list, neg_inds_list) = multi_apply(self._get_target_single, approx_flat_list, inside_flag_flat_list, square_flat_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, img_metas, label_channels=label_channels, sampling=sampling, unmap_outputs=unmap_outputs) # no valid anchors if any([labels is None for labels in all_labels]): return None # sampled anchors of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) # split targets to a list w.r.t. multiple levels labels_list = images_to_levels(all_labels, num_level_squares) label_weights_list = images_to_levels(all_label_weights, num_level_squares) bbox_cls_targets_list = images_to_levels(all_bbox_cls_targets, num_level_squares) bbox_cls_weights_list = images_to_levels(all_bbox_cls_weights, num_level_squares) bbox_reg_targets_list = images_to_levels(all_bbox_reg_targets, num_level_squares) bbox_reg_weights_list = images_to_levels(all_bbox_reg_weights, num_level_squares) return (labels_list, label_weights_list, bbox_cls_targets_list, bbox_cls_weights_list, bbox_reg_targets_list, bbox_reg_weights_list, num_total_pos, num_total_neg)
def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): """Compute losses of the head. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 4, H, W) gt_bboxes (list[Tensor]): Ground truth bboxes of each image with shape (num_obj, 4). gt_labels (list[Tensor]): Ground truth labels of each image with shape (num_obj, 4). img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore (list[Tensor]): Ignored gt bboxes of each image. Default: None. Returns: dict: Loss dict, comprise classification loss, regression loss and carl loss. """ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.anchor_generator.num_levels device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors( featmap_sizes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets( anchor_list, valid_flag_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels, return_sampling_results=True) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg, sampling_results_list) = cls_reg_targets num_total_samples = ( num_total_pos + num_total_neg if self.sampling else num_total_pos) # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors and flags to a single tensor concat_anchor_list = [] for i in range(len(anchor_list)): concat_anchor_list.append(torch.cat(anchor_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) num_imgs = len(img_metas) flatten_cls_scores = [ cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1, label_channels) for cls_score in cls_scores ] flatten_cls_scores = torch.cat( flatten_cls_scores, dim=1).reshape(-1, flatten_cls_scores[0].size(-1)) flatten_bbox_preds = [ bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4) for bbox_pred in bbox_preds ] flatten_bbox_preds = torch.cat( flatten_bbox_preds, dim=1).view(-1, flatten_bbox_preds[0].size(-1)) flatten_labels = torch.cat(labels_list, dim=1).reshape(-1) flatten_label_weights = torch.cat( label_weights_list, dim=1).reshape(-1) flatten_anchors = torch.cat(all_anchor_list, dim=1).reshape(-1, 4) flatten_bbox_targets = torch.cat( bbox_targets_list, dim=1).reshape(-1, 4) flatten_bbox_weights = torch.cat( bbox_weights_list, dim=1).reshape(-1, 4) # Apply ISR-P isr_cfg = self.train_cfg.get('isr', None) if isr_cfg is not None: all_targets = (flatten_labels, flatten_label_weights, flatten_bbox_targets, flatten_bbox_weights) with torch.no_grad(): all_targets = isr_p( flatten_cls_scores, flatten_bbox_preds, all_targets, flatten_anchors, sampling_results_list, bbox_coder=self.bbox_coder, loss_cls=self.loss_cls, num_class=self.num_classes, **self.train_cfg.isr) (flatten_labels, flatten_label_weights, flatten_bbox_targets, flatten_bbox_weights) = all_targets # For convenience we compute loss once instead separating by fpn level, # so that we don't need to separate the weights by level again. # The result should be the same losses_cls = self.loss_cls( flatten_cls_scores, flatten_labels, flatten_label_weights, avg_factor=num_total_samples) losses_bbox = self.loss_bbox( flatten_bbox_preds, flatten_bbox_targets, flatten_bbox_weights, avg_factor=num_total_samples) loss_dict = dict(loss_cls=losses_cls, loss_bbox=losses_bbox) # CARL Loss carl_cfg = self.train_cfg.get('carl', None) if carl_cfg is not None: loss_carl = carl_loss( flatten_cls_scores, flatten_labels, flatten_bbox_preds, flatten_bbox_targets, self.loss_bbox, **self.train_cfg.carl, avg_factor=num_total_pos, sigmoid=True, num_class=self.num_classes) loss_dict.update(loss_carl) return loss_dict