def get_targets(self, anchor_list, responsible_flag_list, gt_bboxes_list, gt_labels_list): """Compute target maps for anchors in multiple images. Args: anchor_list (list[list[Tensor]]): Multi level anchors of each image. The outer list indicates images, and the inner list corresponds to feature levels of the image. Each element of the inner list is a tensor of shape (num_total_anchors, 4). responsible_flag_list (list[list[Tensor]]): Multi level responsible flags of each image. Each element is a tensor of shape (num_total_anchors, ) gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. gt_labels_list (list[Tensor]): Ground truth labels of each box. Returns: tuple: Usually returns a tuple containing learning targets. - target_map_list (list[Tensor]): Target map of each level. - neg_map_list (list[Tensor]): Negative map of each level. """ num_imgs = len(anchor_list) # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] results = multi_apply(self._get_targets_single, anchor_list, responsible_flag_list, gt_bboxes_list, gt_labels_list) all_target_maps, all_neg_maps = results assert num_imgs == len(all_target_maps) == len(all_neg_maps) target_maps_list = images_to_levels(all_target_maps, num_level_anchors) neg_maps_list = images_to_levels(all_neg_maps, num_level_anchors) return target_maps_list, neg_maps_list
def region_targets(self, anchor_list, valid_flag_list, gt_bboxes_list, img_metas, featmap_sizes, gt_bboxes_ignore_list=None, gt_labels_list=None, label_channels=1, unmap_outputs=True): """See :func:`StageCascadeRPNHead.get_targets`.""" num_imgs = len(img_metas) assert len(anchor_list) == len(valid_flag_list) == num_imgs # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] if gt_labels_list is None: gt_labels_list = [None for _ in range(num_imgs)] (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(self._region_targets_single, anchor_list, valid_flag_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, img_metas, featmap_sizes=featmap_sizes, label_channels=label_channels) # no valid anchors if any([labels is None for labels in all_labels]): return None # sampled anchors of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) # split targets to a list w.r.t. multiple levels labels_list = images_to_levels(all_labels, num_level_anchors) label_weights_list = images_to_levels(all_label_weights, num_level_anchors) bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors) bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors) return (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg)
def loss(self, cls_scores, bbox_preds, gt_obboxes, gt_labels, prior_anchors, img_metas, gt_bboxes_ignore=None): if prior_anchors is None: assert not self.with_prior return super().loss(cls_scores, bbox_preds, gt_obboxes, gt_labels, img_metas, gt_bboxes_ignore) else: assert self.with_prior featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.anchor_generator.num_levels device = cls_scores[0].device anchor_list, valid_flag_list = self.get_prior_anchors( featmap_sizes, prior_anchors, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets( anchor_list, valid_flag_list, gt_obboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets num_total_samples = (num_total_pos + num_total_neg if self.sampling else num_total_pos) # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors and flags to a single tensor concat_anchor_list = [] for i in range(len(anchor_list)): concat_anchor_list.append(torch.cat(anchor_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) losses_cls, losses_bbox = multi_apply( self.loss_single, cls_scores, bbox_preds, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_samples=num_total_samples) return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)
def L_wave_min(self, y_f, y_f_r, y_head_cls, y_loc_img, y_cls_img, img_metas, y_loc_img_ignore=None): featmap_sizes = [featmap.size()[-2:] for featmap in y_f[0]] assert len(featmap_sizes) == self.anchor_generator.num_levels device = y_f[0][0].device x_i, valid_flag_list = self.get_anchors(featmap_sizes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets(x_i, valid_flag_list, y_loc_img, img_metas, y_loc_img_ignore_list=y_loc_img_ignore, y_cls_img_list=y_cls_img, label_channels=label_channels) if cls_reg_targets is None: return None (y_cls, label_weights_list, y_loc, bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets num_total_samples = (num_total_pos + num_total_neg if self.sampling else num_total_pos) # anchor number of multi levels num_level_anchors = [x_i_single.size(0) for x_i_single in x_i[0]] # concat all level anchors and flags to a single tensor concat_x_i = [] for i in range(len(x_i)): concat_x_i.append(torch.cat(x_i[i])) all_x_i = images_to_levels(concat_x_i, num_level_anchors) l_wave_dis, l_det_loc = multi_apply(self.l_wave_dis, y_f[0], y_f[1], y_head_cls, y_f_r, all_x_i, y_cls, label_weights_list, y_loc, bbox_weights_list, num_total_samples=num_total_samples) l_det_cls1, l_det_loc1 = multi_apply(self.l_det, y_f[0], y_f_r, all_x_i, y_cls, label_weights_list, y_loc, bbox_weights_list, num_total_samples=num_total_samples) l_det_cls2, l_det_loc2 = multi_apply(self.l_det, y_f[1], y_f_r, all_x_i, y_cls, label_weights_list, y_loc, bbox_weights_list, num_total_samples=num_total_samples) if y_loc_img[0][0][0] < 0: l_det_cls = list(map(lambda m, n: (m + n) * 0, l_det_cls1, l_det_cls2)) l_det_loc = list(map(lambda m, n: (m + n) * 0, l_det_loc1, l_det_loc2)) for (i, value) in enumerate(l_det_loc): if value.isnan(): l_det_loc[i].data = torch.tensor(0.0, device=device) # compute mil loss y_head_cls_1level, y_pseudo = self.get_img_pseudolabel_score(y_f, y_head_cls) if (y_pseudo.sum(1) == 0).sum() > 0: # ignore hard images l_imgcls = self.l_imgcls(y_head_cls_1level, y_pseudo) * 0 else: l_imgcls = self.l_imgcls(y_head_cls_1level, y_pseudo) else: l_det_cls = list(map(lambda m, n: (m + n) / 2, l_det_cls1, l_det_cls2)) l_det_loc = list(map(lambda m, n: (m + n) / 2, l_det_loc1, l_det_loc2)) l_wave_dis = list(map(lambda m: m * 0.0, l_wave_dis)) # compute mil loss y_head_cls_1level, y_cls_1level = self.get_img_gtlabel_score(y_cls_img, y_head_cls) l_imgcls = self.l_imgcls(y_head_cls_1level, y_cls_1level) return dict(l_det_cls=l_det_cls, l_det_loc=l_det_loc, l_wave_dis=l_wave_dis, l_imgcls=[l_imgcls])
def L_det(self, y_f, y_f_r, y_head_cls, y_loc_img, y_cls_img, img_metas, y_loc_img_ignore=None): """Compute losses of the head. Args: y_f (list[Tensor]): Box scores for each scale level Has shape (n, N * C, H, W) y_f_r (list[Tensor]): Box energies / deltas for each scale level with shape (n, N * 4, H, W) y_loc_img (list[Tensor]): Ground truth bboxes for each image with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format. y_cls_img (list[Tensor]): class indices corresponding to each box img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. y_loc_img_ignore (None | list[Tensor]): specify which bounding boxes can be ignored when computing the loss. Default: None Returns: dict[str, Tensor]: A dictionary of loss components. """ featmap_sizes = [featmap.size()[-2:] for featmap in y_f] assert len(featmap_sizes) == self.anchor_generator.num_levels device = y_f[0].device x_i, valid_flag_list = self.get_anchors(featmap_sizes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets(x_i, valid_flag_list, y_loc_img, img_metas, y_loc_img_ignore_list=y_loc_img_ignore, y_cls_img_list=y_cls_img, label_channels=label_channels) if cls_reg_targets is None: return None (y_cls, label_weights_list, y_loc, bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets num_total_samples = (num_total_pos + num_total_neg if self.sampling else num_total_pos) # anchor number of multi levels num_level_anchors = [x_i_single.size(0) for x_i_single in x_i[0]] # concat all level anchors and flags to a single tensor concat_x_i = [] for i in range(len(x_i)): concat_x_i.append(torch.cat(x_i[i])) all_x_i = images_to_levels(concat_x_i, num_level_anchors) l_det_cls, l_det_loc = multi_apply(self.l_det, y_f, y_f_r, all_x_i, y_cls, label_weights_list, y_loc, bbox_weights_list, num_total_samples=num_total_samples) # compute mil loss y_head_cls_1level, y_cls_1level = self.get_img_gtlabel_score(y_cls_img, y_head_cls) l_imgcls = self.l_imgcls(y_head_cls_1level, y_cls_1level) return dict(l_det_cls=l_det_cls, l_det_loc=l_det_loc, l_imgcls=[l_imgcls])
def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): """A combination of the func:``AnchorHead.loss`` and func:``SSDHead.loss``. When ``self.use_ohem == True``, it functions like ``SSDHead.loss``, otherwise, it follows ``AnchorHead.loss``. Besides, it additionally returns ``sampling_results``. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 4, H, W) gt_bboxes (list[Tensor]): Ground truth bboxes for each image with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format. gt_labels (list[Tensor]): Class indices corresponding to each box img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore (None | list[Tensor]): Specify which bounding boxes can be ignored when computing the loss. Default: None Returns: tuple: dict[str, Tensor]: A dictionary of loss components. List[:obj:``SamplingResult``]: Sampler results for each image. """ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.prior_generator.num_levels device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors( featmap_sizes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets( anchor_list, valid_flag_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels, unmap_outputs=not self.use_ohem, return_sampling_results=True) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg, sampling_results) = cls_reg_targets if self.use_ohem: num_images = len(img_metas) all_cls_scores = torch.cat([ s.permute(0, 2, 3, 1).reshape( num_images, -1, self.cls_out_channels) for s in cls_scores ], 1) all_labels = torch.cat(labels_list, -1).view(num_images, -1) all_label_weights = torch.cat(label_weights_list, -1).view(num_images, -1) all_bbox_preds = torch.cat([ b.permute(0, 2, 3, 1).reshape(num_images, -1, 4) for b in bbox_preds ], -2) all_bbox_targets = torch.cat(bbox_targets_list, -2).view(num_images, -1, 4) all_bbox_weights = torch.cat(bbox_weights_list, -2).view(num_images, -1, 4) # concat all level anchors to a single tensor all_anchors = [] for i in range(num_images): all_anchors.append(torch.cat(anchor_list[i])) # check NaN and Inf assert torch.isfinite(all_cls_scores).all().item(), \ 'classification scores become infinite or NaN!' assert torch.isfinite(all_bbox_preds).all().item(), \ 'bbox predications become infinite or NaN!' losses_cls, losses_bbox = multi_apply( self.loss_single_OHEM, all_cls_scores, all_bbox_preds, all_anchors, all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, num_total_samples=num_total_pos) else: num_total_samples = ( num_total_pos + num_total_neg if self.sampling else num_total_pos) # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors and flags to a single tensor concat_anchor_list = [] for i in range(len(anchor_list)): concat_anchor_list.append(torch.cat(anchor_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) losses_cls, losses_bbox = multi_apply( self.loss_single, cls_scores, bbox_preds, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_samples=num_total_samples) return dict( loss_cls=losses_cls, loss_bbox=losses_bbox), sampling_results
def loss(self, fam_cls_scores, fam_bbox_preds, refine_anchors, odm_cls_scores, odm_bbox_preds, gt_bboxes, gt_labels, img_metas, cfg, gt_bboxes_ignore=None): featmap_sizes = [featmap.size()[-2:] for featmap in odm_cls_scores] assert len(featmap_sizes) == len(self.anchor_generators) # check for size zero boxes for img_nr in range(len(gt_bboxes)): zero_inds = gt_bboxes[img_nr][:, 2:4] == 0 gt_bboxes[img_nr][:, 2:4][zero_inds] = 1 device = odm_cls_scores[0].device anchor_list, valid_flag_list = self.get_init_anchors(featmap_sizes, img_metas, device=device) # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors and flags to a single tensor concat_anchor_list = [] for i in range(len(anchor_list)): concat_anchor_list.append(torch.cat(anchor_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) # Feature Alignment Module label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = anchor_target(anchor_list, valid_flag_list, gt_bboxes, img_metas, self.target_means, self.target_stds, cfg.fam_cfg, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels, sampling=self.sampling) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets num_total_samples = (num_total_pos + num_total_neg if self.sampling else num_total_pos) losses_fam_cls, losses_fam_bbox = multi_apply( self.loss_fam_single, fam_cls_scores, fam_bbox_preds, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_samples=num_total_samples, cfg=cfg.fam_cfg) # Oriented Detection Module targets refine_anchors_list, valid_flag_list = self.get_refine_anchors( featmap_sizes, refine_anchors, img_metas, device=device) # anchor number of multi levels num_level_anchors = [ anchors.size(0) for anchors in refine_anchors_list[0] ] # concat all level anchors and flags to a single tensor concat_anchor_list = [] for i in range(len(refine_anchors_list)): concat_anchor_list.append(torch.cat(refine_anchors_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = anchor_target(refine_anchors_list, valid_flag_list, gt_bboxes, img_metas, self.target_means, self.target_stds, cfg.odm_cfg, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels, sampling=self.sampling) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets num_total_samples = (num_total_pos + num_total_neg if self.sampling else num_total_pos) losses_odm_cls, losses_odm_bbox = multi_apply( self.loss_odm_single, odm_cls_scores, odm_bbox_preds, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_samples=num_total_samples, cfg=cfg.odm_cfg) self.last_vals = dict( gt_bboxes=gt_bboxes, gt_labels=gt_labels, img_metas=img_metas, fam_cls_scores=fam_cls_scores, fam_bbox_preds=fam_bbox_preds, refine_anchors=refine_anchors, odm_cls_scores=odm_cls_scores, odm_bbox_preds=odm_bbox_preds, ) if sum(losses_fam_cls) > 1E10 or \ sum(losses_fam_bbox) > 1E10 or \ sum(losses_odm_cls) > 1E10 or \ sum(losses_odm_bbox) > 1E10: print("bad loss") return dict(loss_fam_cls=losses_fam_cls, loss_fam_bbox=losses_fam_bbox, loss_odm_cls=losses_odm_cls, loss_odm_bbox=losses_odm_bbox)
def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_masks, gt_labels, img_metas, gt_bboxes_ignore=None): """Compute losses of the head. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 4, H, W) gt_bboxes (list[Tensor]): Ground truth bboxes for each image with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format. gt_labels (list[Tensor]): class indices corresponding to each box img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore (None | list[Tensor]): specify which bounding boxes can be ignored when computing the loss. Default: None Returns: dict[str, Tensor]: A dictionary of loss components. """ ######################################### # import pickle as pkl # with open('./data/AD.pkl', 'rb') as f: # a = pkl.load(f) # a = dict( # cls_scores=[gt_bboxes[0].new_tensor(torch.Tensor(cs)) for cs in a['cls_scores']], # bbox_preds=[gt_bboxes[0].new_tensor(torch.Tensor(bp)) for bp in a['bbox_preds']], # bbox_targets_list=[gt_bboxes[0].new_tensor(torch.Tensor(i)) for i in a['bbox_targets_list']], # losses_cls=[gt_bboxes[0].new_tensor(torch.Tensor(i)) for i in a['losses_cls']], # losses_bbox=[gt_bboxes[0].new_tensor(torch.Tensor(i)) for i in a['losses_bbox']], # ) # cls_scores = a['cls_scores'] # bbox_preds = a['bbox_preds'] gt_masks = [gtm.masks for gtm in gt_masks] for i, gtb in enumerate(gt_bboxes): neg_inds = torch.logical_or(gtb[:, 2] - gtb[:, 0] <= 1, gtb[:, 3] - gtb[:, 1] <= 1) gtb[gtb[:, 2] - gtb[:, 0] <= 0, 2] += 1 gtb[gtb[:, 3] - gtb[:, 1] <= 0, 3] += 1 pos_inds = torch.logical_not(neg_inds) np_pos_inds = pos_inds.detach().cpu().numpy() gt_bboxes[i] = gtb[pos_inds] gt_masks[i] = gt_masks[i][np_pos_inds] gt_labels[i] = gt_labels[i][pos_inds] # print(torch.sum(neg_inds)) ############################################ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.anchor_generator.num_levels device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors(featmap_sizes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets( anchor_list, valid_flag_list, gt_bboxes, gt_masks, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets num_total_samples = (num_total_pos + num_total_neg if self.sampling else num_total_pos) # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors and flags to a single tensor concat_anchor_list = [] for i in range(len(anchor_list)): concat_anchor_list.append(torch.cat(anchor_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) losses_cls, losses_bbox = multi_apply( self.loss_single, cls_scores, bbox_preds, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_samples=num_total_samples) # for l in losses_bbox: # if torch.isinf(l): # print(losses_bbox) # raise AssertionError return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)
def anchor_offset(self, anchor_list, anchor_strides, featmap_sizes): """ Get offest for deformable conv based on anchor shape NOTE: currently support deformable kernel_size=3 and dilation=1 Args: anchor_list (list[list[tensor])): [NI, NLVL, NA, 4] list of multi-level anchors anchor_strides (list[int]): anchor stride of each level Returns: offset_list (list[tensor]): [NLVL, NA, 2, 18]: offset of DeformConv kernel. """ def _shape_offset(anchors, stride, ks=3, dilation=1): # currently support kernel_size=3 and dilation=1 assert ks == 3 and dilation == 1 pad = (ks - 1) // 2 idx = torch.arange(-pad, pad + 1, dtype=dtype, device=device) yy, xx = torch.meshgrid(idx, idx) # return order matters xx = xx.reshape(-1) yy = yy.reshape(-1) w = (anchors[:, 2] - anchors[:, 0]) / stride h = (anchors[:, 3] - anchors[:, 1]) / stride w = w / (ks - 1) - dilation h = h / (ks - 1) - dilation offset_x = w[:, None] * xx # (NA, ks**2) offset_y = h[:, None] * yy # (NA, ks**2) return offset_x, offset_y def _ctr_offset(anchors, stride, featmap_size): feat_h, feat_w = featmap_size assert len(anchors) == feat_h * feat_w x = (anchors[:, 0] + anchors[:, 2]) * 0.5 y = (anchors[:, 1] + anchors[:, 3]) * 0.5 # compute centers on feature map x = x / stride y = y / stride # compute predefine centers xx = torch.arange(0, feat_w, device=anchors.device) yy = torch.arange(0, feat_h, device=anchors.device) yy, xx = torch.meshgrid(yy, xx) xx = xx.reshape(-1).type_as(x) yy = yy.reshape(-1).type_as(y) offset_x = x - xx # (NA, ) offset_y = y - yy # (NA, ) return offset_x, offset_y num_imgs = len(anchor_list) num_lvls = len(anchor_list[0]) dtype = anchor_list[0][0].dtype device = anchor_list[0][0].device num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] offset_list = [] for i in range(num_imgs): mlvl_offset = [] for lvl in range(num_lvls): c_offset_x, c_offset_y = _ctr_offset(anchor_list[i][lvl], anchor_strides[lvl], featmap_sizes[lvl]) s_offset_x, s_offset_y = _shape_offset(anchor_list[i][lvl], anchor_strides[lvl]) # offset = ctr_offset + shape_offset offset_x = s_offset_x + c_offset_x[:, None] offset_y = s_offset_y + c_offset_y[:, None] # offset order (y0, x0, y1, x2, .., y8, x8, y9, x9) offset = torch.stack([offset_y, offset_x], dim=-1) offset = offset.reshape(offset.size(0), -1) # [NA, 2*ks**2] mlvl_offset.append(offset) offset_list.append(torch.cat(mlvl_offset)) # [totalNA, 2*ks**2] offset_list = images_to_levels(offset_list, num_level_anchors) return offset_list
def get_targets(self, proposals_list, valid_flag_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, gt_labels_list=None, stage='init', label_channels=1, unmap_outputs=True): """Compute corresponding GT box and classification targets for proposals. Args: proposals_list (list[list]): Multi level points/bboxes of each image. valid_flag_list (list[list]): Multi level valid flags of each image. gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. img_metas (list[dict]): Meta info of each image. gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be ignored. gt_bboxes_list (list[Tensor]): Ground truth labels of each box. stage (str): `init` or `refine`. Generate target for init stage or refine stage label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: - labels_list (list[Tensor]): Labels of each level. - label_weights_list (list[Tensor]): Label weights of each level. # noqa: E501 - bbox_gt_list (list[Tensor]): Ground truth bbox of each level. - proposal_list (list[Tensor]): Proposals(points/bboxes) of each level. # noqa: E501 - proposal_weights_list (list[Tensor]): Proposal weights of each level. # noqa: E501 - num_total_pos (int): Number of positive samples in all images. # noqa: E501 - num_total_neg (int): Number of negative samples in all images. # noqa: E501 """ assert stage in ['init', 'refine'] num_imgs = len(img_metas) assert len(proposals_list) == len(valid_flag_list) == num_imgs # points number of multi levels num_level_proposals = [points.size(0) for points in proposals_list[0]] # concat all level points and flags to a single tensor for i in range(num_imgs): assert len(proposals_list[i]) == len(valid_flag_list[i]) proposals_list[i] = torch.cat(proposals_list[i]) valid_flag_list[i] = torch.cat(valid_flag_list[i]) # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] if gt_labels_list is None: gt_labels_list = [None for _ in range(num_imgs)] (all_labels, all_label_weights, all_bbox_gt, all_proposals, all_proposal_weights, pos_inds_list, neg_inds_list) = multi_apply(self._point_target_single, proposals_list, valid_flag_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, stage=stage, label_channels=label_channels, unmap_outputs=unmap_outputs) # no valid points if any([labels is None for labels in all_labels]): return None # sampled points of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) labels_list = images_to_levels(all_labels, num_level_proposals) label_weights_list = images_to_levels(all_label_weights, num_level_proposals) bbox_gt_list = images_to_levels(all_bbox_gt, num_level_proposals) proposals_list = images_to_levels(all_proposals, num_level_proposals) proposal_weights_list = images_to_levels(all_proposal_weights, num_level_proposals) return (labels_list, label_weights_list, bbox_gt_list, proposals_list, proposal_weights_list, num_total_pos, num_total_neg)
def train_detector(model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None): logger = get_root_logger(cfg.log_level) # prepare data loaders dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] if 'imgs_per_gpu' in cfg.data: logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. ' 'Please use "samples_per_gpu" instead') if 'samples_per_gpu' in cfg.data: logger.warning( f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and ' f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"' f'={cfg.data.imgs_per_gpu} is used in this experiments') else: logger.warning( 'Automatically set "samples_per_gpu"="imgs_per_gpu"=' f'{cfg.data.imgs_per_gpu} in this experiments') cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu data_loaders = [ build_dataloader( ds, cfg.data.samples_per_gpu, cfg.data.workers_per_gpu, # cfg.gpus will be ignored if distributed len(cfg.gpu_ids), dist=distributed, seed=cfg.seed) for ds in dataset ] # put model on gpus if distributed: find_unused_parameters = cfg.get('find_unused_parameters', False) # Sets the `find_unused_parameters` parameter in # torch.nn.parallel.DistributedDataParallel model = MMDistributedDataParallel( model.cuda(), device_ids=[torch.cuda.current_device()], broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: model = MMDataParallel( model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) # build runner optimizer = build_optimizer(model, cfg.optimizer) if 'runner' not in cfg: cfg.runner = { 'type': 'EpochBasedRunner', 'max_epochs': cfg.total_epochs } warnings.warn( 'config is now expected to have a `runner` section, ' 'please set `runner` in your config.', UserWarning) else: if 'total_epochs' in cfg: assert cfg.total_epochs == cfg.runner.max_epochs runner = build_runner( cfg.runner, default_args=dict( model=model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta)) # an ugly workaround to make .log and .log.json filenames the same runner.timestamp = timestamp # fp16 setting fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: optimizer_config = Fp16OptimizerHook( **cfg.optimizer_config, **fp16_cfg, distributed=distributed) elif distributed and 'type' not in cfg.optimizer_config: optimizer_config = OptimizerHook(**cfg.optimizer_config) else: optimizer_config = cfg.optimizer_config # register hooks runner.register_training_hooks(cfg.lr_config, optimizer_config, cfg.checkpoint_config, cfg.log_config, cfg.get('momentum_config', None)) if distributed: if isinstance(runner, EpochBasedRunner): runner.register_hook(DistSamplerSeedHook()) # register eval hooks if validate: # Support batch_size > 1 in validation val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1) if val_samples_per_gpu > 1: # Replace 'ImageToTensor' to 'DefaultFormatBundle' cfg.data.val.pipeline = replace_ImageToTensor( cfg.data.val.pipeline) val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) val_dataloader = build_dataloader( val_dataset, samples_per_gpu=val_samples_per_gpu, workers_per_gpu=cfg.data.workers_per_gpu, dist=distributed, shuffle=False) eval_cfg = cfg.get('evaluation', {}) eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' eval_hook = DistEvalHook if distributed else EvalHook runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) # user-defined hooks if cfg.get('custom_hooks', None): custom_hooks = cfg.custom_hooks assert isinstance(custom_hooks, list), \ f'custom_hooks expect list type, but got {type(custom_hooks)}' for hook_cfg in cfg.custom_hooks: assert isinstance(hook_cfg, dict), \ 'Each item in custom_hooks expects dict type, but got ' \ f'{type(hook_cfg)}' hook_cfg = hook_cfg.copy() priority = hook_cfg.pop('priority', 'NORMAL') hook = build_from_cfg(hook_cfg, HOOKS) runner.register_hook(hook, priority=priority) if cfg.resume_from: runner.resume(cfg.resume_from) elif cfg.load_from: runner.load_checkpoint(cfg.load_from) # runner.run(data_loaders, cfg.workflow) anchor_generator = build_anchor_generator(cfg.model.rpn_head.anchor_generator) assigner = build_assigner(cfg.model.train_cfg.rpn.assigner) total_num_targets = torch.tensor([0] * 5) for iteration, data in enumerate(data_loaders): for i in data: # print(i.keys()) img_metas = i['img_metas']._data # print(img_metas) num_imgs = len(img_metas) images = i['img']._data gt_bboxes = i['gt_bboxes']._data h, w = images[0].size()[-2:] features_shape = [] for i in range(2, 7): f_shape = [int(h/(2**i)), int(w/(2**i))] features_shape.append(f_shape) multi_level_anchors = anchor_generator.grid_anchors( features_shape) anchor_list = [multi_level_anchors for _ in range(num_imgs)] # for each image, we compute valid flags of multi level anchors valid_flag_list = [] for img_id, img_meta in enumerate(img_metas): multi_level_flags = anchor_generator.valid_flags( features_shape, img_meta[0]['pad_shape']) valid_flag_list.append(multi_level_flags) # print(anchor_list, valid_flag_list) assert len(anchor_list) == len(valid_flag_list) == num_imgs # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors to a single tensor concat_anchor_list = [] concat_valid_flag_list = [] for i in range(num_imgs): assert len(anchor_list[i]) == len(valid_flag_list[i]) concat_anchor_list.append(torch.cat(anchor_list[i])) concat_valid_flag_list.append(torch.cat(valid_flag_list[i])) gt_bboxes_ignore_list= None # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] inside_flags = anchor_inside_flags(concat_anchor_list[0], concat_valid_flag_list[0], img_metas[0][0]['img_shape'][:2], 0) if not inside_flags.any(): return (None, ) * 7 # assign gt and sample anchors anchors = concat_anchor_list[0][inside_flags, :] assign_result = assigner.assign( anchors.cpu(), gt_bboxes[0][0], gt_bboxes_ignore_list[0], None) print(assign_result.pos_gt_bboxes) pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) labels = anchors.new_full((anchors.shape[0], ), -1, dtype=torch.long) labels[pos_inds] = 1 num_total_anchors = concat_anchor_list[0].size(0) labels = unmap( labels, num_total_anchors, inside_flags, fill=-1) # fill bg label match_results = images_to_levels([labels], num_level_anchors) # print(match_results) for idx, match_result in enumerate(match_results): num = torch.where(match_result==1)[0].numel() total_num_targets[idx] += num # print(total_num_targets) print(total_num_targets) print(total_num_targets)
def loss(self, cls_scores, bbox_preds, shape_preds, loc_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.approx_anchor_generator.num_levels device = cls_scores[0].device # get loc targets loc_targets, loc_weights, loc_avg_factor = self.ga_loc_targets( gt_bboxes, featmap_sizes) # get sampled approxes approxs_list, inside_flag_list = self.get_sampled_approxs( featmap_sizes, img_metas, device=device) # get squares and guided anchors squares_list, guided_anchors_list, _ = self.get_anchors( featmap_sizes, shape_preds, loc_preds, img_metas, device=device) # get shape targets shape_targets = self.ga_shape_targets(approxs_list, inside_flag_list, squares_list, gt_bboxes, img_metas) if shape_targets is None: return None (bbox_anchors_list, bbox_gts_list, anchor_weights_list, anchor_fg_num, anchor_bg_num) = shape_targets anchor_total_num = ( anchor_fg_num if not self.ga_sampling else anchor_fg_num + anchor_bg_num) # get anchor targets label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets( guided_anchors_list, inside_flag_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets num_total_samples = ( num_total_pos + num_total_neg if self.sampling else num_total_pos) # anchor number of multi levels num_level_anchors = [ anchors.size(0) for anchors in guided_anchors_list[0] ] # concat all level anchors to a single tensor concat_anchor_list = [] for i in range(len(guided_anchors_list)): concat_anchor_list.append(torch.cat(guided_anchors_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) # get classification and bbox regression losses losses_cls, losses_bbox = multi_apply( self.loss_single, cls_scores, bbox_preds, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_samples=num_total_samples) # get anchor location loss losses_loc = [] for i in range(len(loc_preds)): loss_loc = self.loss_loc_single( loc_preds[i], loc_targets[i], loc_weights[i], loc_avg_factor=loc_avg_factor) losses_loc.append(loss_loc) # get anchor shape loss losses_shape = [] for i in range(len(shape_preds)): loss_shape = self.loss_shape_single( shape_preds[i], bbox_anchors_list[i], bbox_gts_list[i], anchor_weights_list[i], anchor_total_num=anchor_total_num) losses_shape.append(loss_shape) return dict( loss_cls=losses_cls, loss_bbox=losses_bbox, loss_shape=losses_shape, loss_loc=losses_loc)
def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): """Compute losses of the head. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 4, H, W) gt_bboxes (list[Tensor]): Ground truth bboxes for each image with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format. gt_labels (list[Tensor]): class indices corresponding to each box img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore (None | list[Tensor]): specify which bounding boxes can be ignored when computing the loss. Default: None Returns: dict[str, Tensor]: A dictionary of loss components. """ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.anchor_generator.num_levels device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors(featmap_sizes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets( anchor_list, valid_flag_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets num_total_samples = (num_total_pos + num_total_neg if self.sampling else num_total_pos) # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors and flags to a single tensor concat_anchor_list = [] for i in range(len(anchor_list)): concat_anchor_list.append(torch.cat(anchor_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) losses_cls, losses_bbox = multi_apply( self.loss_single, cls_scores, bbox_preds, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_samples=num_total_samples) return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)
def loss(self, fam_cls_scores, fam_bbox_preds, refine_anchors, odm_cls_scores, odm_bbox_preds, bboxes, gt_bboxes, gt_labels, img_metas, cfg, gt_bboxes_ignore=None): featmap_sizes = [featmap.size()[-2:] for featmap in odm_cls_scores] assert len(featmap_sizes) == len(self.anchor_generators) device = odm_cls_scores[0].device anchors_list, valid_flag_list = self.get_init_anchors( featmap_sizes, img_metas, device=device) refine_anchors_list, valid_flag_list = self.get_refine_anchors( featmap_sizes, refine_anchors, img_metas, device=device) # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchors_list[0]] # Feature Alignment Module label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = anchor_target( anchors_list, valid_flag_list, gt_bboxes, img_metas, self.target_means, self.target_stds, cfg.fam_cfg, refine_anchors_list, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels, sampling=self.sampling, reg_decoded_bbox=self.reg_decoded_bbox, use_vfl=False) if cls_reg_targets is None: return None #labels_list 是一个list,长度为feature map levels数,一般为5,里面的每一个元素是一个tensor,shape为[bs,anchor_nums],其余的类似 (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets num_total_samples = ( num_total_pos + num_total_neg if self.sampling else num_total_pos) all_anchor_list = images_to_levels(anchors_list, num_level_anchors) losses_fam_cls, losses_fam_bbox = multi_apply( self.loss_fam_single, fam_cls_scores, fam_bbox_preds, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_samples=num_total_samples, cfg=cfg.fam_cfg) # Oriented Detection Module targets refine_anchors_list, valid_flag_list = self.get_refine_anchors( featmap_sizes, refine_anchors, img_metas, device=device) num_level_anchors = [anchors.size(0) for anchors in refine_anchors_list[0]] output_bboxes, _ = self.get_refine_anchors( featmap_sizes, bboxes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = anchor_target( refine_anchors_list, valid_flag_list, gt_bboxes, img_metas, self.target_means, self.target_stds, cfg.odm_cfg, output_bboxes, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels, sampling=self.sampling, reg_decoded_bbox=self.reg_decoded_bbox, use_vfl=self.use_vfl) # cls_reg_targets = anchor_target_atss( # refine_anchors_list, # valid_flag_list, # gt_bboxes, # img_metas, # self.target_means, # self.target_stds, # cfg.odm_cfg, # gt_bboxes_ignore_list=gt_bboxes_ignore, # gt_labels_list=gt_labels, # label_channels=label_channels, # sampling=self.sampling, # reg_decoded_bbox=self.reg_decoded_bbox) # cls_reg_targets = anchor_target_rotated( # refine_anchors_list, # valid_flag_list, # gt_bboxes, # img_metas, # self.target_means, # self.target_stds, # cfg.odm_cfg, # output_bboxes, # gt_bboxes_ignore_list=gt_bboxes_ignore, # gt_labels_list=gt_labels, # label_channels=label_channels, # sampling=self.sampling, # reg_decoded_bbox=self.reg_decoded_bbox, # use_vfl=self.use_vfl) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets num_total_samples = ( num_total_pos + num_total_neg if self.sampling else num_total_pos) all_anchor_list = images_to_levels(refine_anchors_list, num_level_anchors) losses_odm_cls, losses_odm_bbox = multi_apply( self.loss_odm_single, odm_cls_scores, odm_bbox_preds, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_samples=num_total_samples, cfg=cfg.odm_cfg) return dict(loss_fam_cls=losses_fam_cls, loss_fam_bbox=losses_fam_bbox, loss_odm_cls=losses_odm_cls, loss_odm_bbox=losses_odm_bbox)
def get_targets(self, cls_scores, bbox_preds, anchor_list, valid_flag_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, gt_labels_list=None, label_channels=1, unmap_outputs=True): """Compute regression and classification targets for anchors in multiple images. Args: cls_scores (Tensor): Classification predictions of images, a 3D-Tensor with shape [num_imgs, num_priors, num_classes]. bbox_preds (Tensor): Decoded bboxes predictions of one image, a 3D-Tensor with shape [num_imgs, num_priors, 4] in [tl_x, tl_y, br_x, br_y] format. anchor_list (list[list[Tensor]]): Multi level anchors of each image. The outer list indicates images, and the inner list corresponds to feature levels of the image. Each element of the inner list is a tensor of shape (num_anchors, 4). valid_flag_list (list[list[Tensor]]): Multi level valid flags of each image. The outer list indicates images, and the inner list corresponds to feature levels of the image. Each element of the inner list is a tensor of shape (num_anchors, ) gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. img_metas (list[dict]): Meta info of each image. gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be ignored. gt_labels_list (list[Tensor]): Ground truth labels of each box. label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: a tuple containing learning targets. - anchors_list (list[list[Tensor]]): Anchors of each level. - labels_list (list[Tensor]): Labels of each level. - label_weights_list (list[Tensor]): Label weights of each level. - bbox_targets_list (list[Tensor]): BBox targets of each level. - norm_alignment_metrics_list (list[Tensor]): Normalized alignment metrics of each level. """ num_imgs = len(img_metas) assert len(anchor_list) == len(valid_flag_list) == num_imgs # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] num_level_anchors_list = [num_level_anchors] * num_imgs # concat all level anchors and flags to a single tensor for i in range(num_imgs): assert len(anchor_list[i]) == len(valid_flag_list[i]) anchor_list[i] = torch.cat(anchor_list[i]) valid_flag_list[i] = torch.cat(valid_flag_list[i]) # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] if gt_labels_list is None: gt_labels_list = [None for _ in range(num_imgs)] # anchor_list: list(b * [-1, 4]) if self.epoch < self.initial_epoch: (all_anchors, all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(super()._get_target_single, anchor_list, valid_flag_list, num_level_anchors_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, img_metas, label_channels=label_channels, unmap_outputs=unmap_outputs) all_assign_metrics = [ weight[..., 0] for weight in all_bbox_weights ] else: (all_anchors, all_labels, all_label_weights, all_bbox_targets, all_assign_metrics) = multi_apply(self._get_target_single, cls_scores, bbox_preds, anchor_list, valid_flag_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, img_metas, label_channels=label_channels, unmap_outputs=unmap_outputs) # no valid anchors if any([labels is None for labels in all_labels]): return None # split targets to a list w.r.t. multiple levels anchors_list = images_to_levels(all_anchors, num_level_anchors) labels_list = images_to_levels(all_labels, num_level_anchors) label_weights_list = images_to_levels(all_label_weights, num_level_anchors) bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors) norm_alignment_metrics_list = images_to_levels(all_assign_metrics, num_level_anchors) return (anchors_list, labels_list, label_weights_list, bbox_targets_list, norm_alignment_metrics_list)
def anchor_target_3d(self, anchor_list, gt_bboxes_list, input_metas, gt_bboxes_ignore_list=None, gt_labels_list=None, label_channels=1, num_classes=1, sampling=True): """Compute regression and classification targets for anchors. Args: anchor_list (list[list]): Multi level anchors of each image. gt_bboxes_list (list[:obj:`BaseInstance3DBoxes`]): Ground truth bboxes of each image. input_metas (list[dict]): Meta info of each image. gt_bboxes_ignore_list (None | list): Ignore list of gt bboxes. gt_labels_list (list[torch.Tensor]): Gt labels of batches. label_channels (int): The channel of labels. num_classes (int): The number of classes. sampling (bool): Whether to sample anchors. Returns: tuple (list, list, list, list, list, list, int, int): Anchor targets, including labels, label weights, bbox targets, bbox weights, direction targets, direction weights, number of postive anchors and number of negative anchors. """ num_imgs = len(input_metas) assert len(anchor_list) == num_imgs if isinstance(anchor_list[0][0], list): # sizes of anchors are different # anchor number of a single level num_level_anchors = [ sum([anchor.size(0) for anchor in anchors]) for anchors in anchor_list[0] ] for i in range(num_imgs): anchor_list[i] = anchor_list[i][0] else: # anchor number of multi levels num_level_anchors = [ anchors.view(-1, self.box_code_size).size(0) for anchors in anchor_list[0] ] # concat all level anchors and flags to a single tensor for i in range(num_imgs): anchor_list[i] = torch.cat(anchor_list[i]) # print(anchor_list[0].shape) # (210000, 9) # print(anchor_list[0][::8][:10][:, :7]) # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] if gt_labels_list is None: gt_labels_list = [None for _ in range(num_imgs)] (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, all_dir_targets, all_dir_weights, pos_inds_list, neg_inds_list) = multi_apply(self.anchor_target_3d_single, anchor_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, input_metas, label_channels=label_channels, num_classes=num_classes, sampling=sampling) # no valid anchors if any([labels is None for labels in all_labels]): return None # sampled anchors of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) # split targets to a list w.r.t. multiple levels labels_list = images_to_levels(all_labels, num_level_anchors) label_weights_list = images_to_levels(all_label_weights, num_level_anchors) bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors) bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors) dir_targets_list = images_to_levels(all_dir_targets, num_level_anchors) dir_weights_list = images_to_levels(all_dir_weights, num_level_anchors) return (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, dir_targets_list, dir_weights_list, num_total_pos, num_total_neg)
def loss(self, cls_scores, bbox_preds, bboxes, gt_bboxes, gt_labels, img_metas, cfg, gt_bboxes_ignore=None): featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == len(self.anchor_generators) device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors(featmap_sizes, img_metas, device=device) output_bboxes, _ = self.get_refine_anchors(featmap_sizes, bboxes, img_metas, device=device) # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_scores_list = [ ] #list[list] 外面这层list是bs大小,里面这层list是feature level数 refine_anchors是一个list,len为feature level数 for img_id, img_meta in enumerate(img_metas): mlvl_cls_score = [] for i in range(5): cls_score = cls_scores[i][img_id].permute(1, 2, 0).reshape(-1, 1) mlvl_cls_score.append(cls_score) cls_scores_list.append(mlvl_cls_score) cls_reg_targets = anchor_hrsc_target( anchor_list, valid_flag_list, gt_bboxes, img_metas, self.target_means, self.target_stds, cfg, output_bboxes, cls_scores_list, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels, sampling=self.sampling, reg_decoded_bbox=self.reg_decoded_bbox, use_vfl=self.use_vfl) # cls_reg_targets = anchor_target( # anchor_list, # valid_flag_list, # gt_bboxes, # img_metas, # self.target_means, # self.target_stds, # cfg, # output_bboxes, # gt_bboxes_ignore_list=gt_bboxes_ignore, # gt_labels_list=gt_labels, # label_channels=label_channels, # sampling=self.sampling, # reg_decoded_bbox=self.reg_decoded_bbox, # use_vfl=self.use_vfl) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets num_total_samples = (num_total_pos + num_total_neg if self.sampling else num_total_pos) all_anchor_list = images_to_levels(anchor_list, num_level_anchors) losses_cls, losses_bbox = multi_apply( self.loss_single, cls_scores, bbox_preds, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_samples=num_total_samples, cfg=cfg) return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)
def get_reg_targets(self, anchor_list, valid_flag_list, num_level_anchors_list, cls_score_list, bbox_pred_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, gt_labels_list=None, label_channels=1, unmap_outputs=True): """Get reg targets for DDOD head. This method is almost the same as `AnchorHead.get_targets()` when is_cls_assigner is False. Besides returning the targets as the parent method does, it also returns the anchors as the first element of the returned tuple. Args: anchor_list (list[Tensor]): anchors of each image. valid_flag_list (list[Tensor]): Valid flags of each image. num_level_anchors (int): Number of anchors of each scale level. cls_scores (list[Tensor]): Classification scores for all scale levels, each is a 4D-tensor, the channels number is num_base_priors * num_classes. bbox_preds (list[Tensor]): Box energies / deltas for all scale levels, each is a 4D-tensor, the channels number is num_base_priors * 4. gt_labels_list (list[Tensor]): class indices corresponding to each box. img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore_list (list[Tensor] | None): specify which bounding boxes can be ignored when computing the loss. Return: tuple[Tensor]: A tuple of reg targets components. """ (all_anchors, all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(self._get_target_single, anchor_list, valid_flag_list, cls_score_list, bbox_pred_list, num_level_anchors_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, img_metas, label_channels=label_channels, unmap_outputs=unmap_outputs, is_cls_assigner=False) # no valid anchors if any([labels is None for labels in all_labels]): return None # sampled anchors of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) # split targets to a list w.r.t. multiple levels anchors_list = images_to_levels(all_anchors, num_level_anchors_list[0]) labels_list = images_to_levels(all_labels, num_level_anchors_list[0]) label_weights_list = images_to_levels(all_label_weights, num_level_anchors_list[0]) bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors_list[0]) bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors_list[0]) return (anchors_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg)
def ga_shape_targets(self, approx_list, inside_flag_list, square_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, unmap_outputs=True): """Compute guided anchoring targets. Args: approx_list (list[list]): Multi level approxs of each image. inside_flag_list (list[list]): Multi level inside flags of each image. square_list (list[list]): Multi level squares of each image. gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. img_metas (list[dict]): Meta info of each image. gt_bboxes_ignore_list (list[Tensor]): ignore list of gt bboxes. unmap_outputs (bool): unmap outputs or not. Returns: tuple """ num_imgs = len(img_metas) assert len(approx_list) == len(inside_flag_list) == len( square_list) == num_imgs # anchor number of multi levels num_level_squares = [squares.size(0) for squares in square_list[0]] # concat all level anchors and flags to a single tensor inside_flag_flat_list = [] approx_flat_list = [] square_flat_list = [] for i in range(num_imgs): assert len(square_list[i]) == len(inside_flag_list[i]) inside_flag_flat_list.append(torch.cat(inside_flag_list[i])) approx_flat_list.append(torch.cat(approx_list[i])) square_flat_list.append(torch.cat(square_list[i])) # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] (all_bbox_anchors, all_bbox_gts, all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply( self._ga_shape_target_single, approx_flat_list, inside_flag_flat_list, square_flat_list, gt_bboxes_list, gt_bboxes_ignore_list, img_metas, unmap_outputs=unmap_outputs) # no valid anchors if any([bbox_anchors is None for bbox_anchors in all_bbox_anchors]): return None # sampled anchors of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) # split targets to a list w.r.t. multiple levels bbox_anchors_list = images_to_levels(all_bbox_anchors, num_level_squares) bbox_gts_list = images_to_levels(all_bbox_gts, num_level_squares) bbox_weights_list = images_to_levels(all_bbox_weights, num_level_squares) return (bbox_anchors_list, bbox_gts_list, bbox_weights_list, num_total_pos, num_total_neg)
def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): """Compute losses of the head. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 4, H, W) gt_bboxes (list[Tensor]): Ground truth bboxes of each image with shape (num_obj, 4). gt_labels (list[Tensor]): Ground truth labels of each image with shape (num_obj, 4). img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore (list[Tensor]): Ignored gt bboxes of each image. Default: None. Returns: dict: Loss dict, comprise classification loss, regression loss and carl loss. """ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.anchor_generator.num_levels device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors(featmap_sizes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets( anchor_list, valid_flag_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels, return_sampling_results=True) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg, sampling_results_list) = cls_reg_targets num_total_samples = (num_total_pos + num_total_neg if self.sampling else num_total_pos) # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors and flags to a single tensor concat_anchor_list = [] for i in range(len(anchor_list)): concat_anchor_list.append(torch.cat(anchor_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) num_imgs = len(img_metas) flatten_cls_scores = [ cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1, label_channels) for cls_score in cls_scores ] flatten_cls_scores = torch.cat(flatten_cls_scores, dim=1).reshape( -1, flatten_cls_scores[0].size(-1)) flatten_bbox_preds = [ bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4) for bbox_pred in bbox_preds ] flatten_bbox_preds = torch.cat(flatten_bbox_preds, dim=1).view( -1, flatten_bbox_preds[0].size(-1)) flatten_labels = torch.cat(labels_list, dim=1).reshape(-1) flatten_label_weights = torch.cat(label_weights_list, dim=1).reshape(-1) flatten_anchors = torch.cat(all_anchor_list, dim=1).reshape(-1, 4) flatten_bbox_targets = torch.cat(bbox_targets_list, dim=1).reshape(-1, 4) flatten_bbox_weights = torch.cat(bbox_weights_list, dim=1).reshape(-1, 4) # Apply ISR-P isr_cfg = self.train_cfg.get('isr', None) if isr_cfg is not None: all_targets = (flatten_labels, flatten_label_weights, flatten_bbox_targets, flatten_bbox_weights) with torch.no_grad(): all_targets = isr_p(flatten_cls_scores, flatten_bbox_preds, all_targets, flatten_anchors, sampling_results_list, bbox_coder=self.bbox_coder, loss_cls=self.loss_cls, num_class=self.num_classes, **self.train_cfg.isr) (flatten_labels, flatten_label_weights, flatten_bbox_targets, flatten_bbox_weights) = all_targets # For convenience we compute loss once instead separating by fpn level, # so that we don't need to separate the weights by level again. # The result should be the same losses_cls = self.loss_cls(flatten_cls_scores, flatten_labels, flatten_label_weights, avg_factor=num_total_samples) losses_bbox = self.loss_bbox(flatten_bbox_preds, flatten_bbox_targets, flatten_bbox_weights, avg_factor=num_total_samples) loss_dict = dict(loss_cls=losses_cls, loss_bbox=losses_bbox) # CARL Loss carl_cfg = self.train_cfg.get('carl', None) if carl_cfg is not None: loss_carl = carl_loss(flatten_cls_scores, flatten_labels, flatten_bbox_preds, flatten_bbox_targets, self.loss_bbox, **self.train_cfg.carl, avg_factor=num_total_pos, sigmoid=True, num_class=self.num_classes) loss_dict.update(loss_carl) return loss_dict
def get_targets(self, anchor_list, valid_flag_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, gt_labels_list=None, label_channels=1, unmap_outputs=True, return_sampling_results=False): """Compute regression and classification targets for anchors in multiple images. Args: anchor_list (list[list[Tensor]]): Multi level anchors of each image. The outer list indicates images, and the inner list corresponds to feature levels of the image. Each element of the inner list is a tensor of shape (num_anchors, 4). valid_flag_list (list[list[Tensor]]): Multi level valid flags of each image. The outer list indicates images, and the inner list corresponds to feature levels of the image. Each element of the inner list is a tensor of shape (num_anchors, ) gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. img_metas (list[dict]): Meta info of each image. gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be ignored. gt_labels_list (list[Tensor]): Ground truth labels of each box. label_channels (int): Channel of label. unmap_outputs (bool): Whether to map outputs back to the original set of anchors. Returns: tuple: Usually returns a tuple containing learning targets. - labels_list (list[Tensor]): Labels of each level. - label_weights_list (list[Tensor]): Label weights of each \ level. - bbox_targets_list (list[Tensor]): BBox targets of each level. - bbox_weights_list (list[Tensor]): BBox weights of each level. - num_total_pos (int): Number of positive samples in all \ images. - num_total_neg (int): Number of negative samples in all \ images. additional_returns: This function enables user-defined returns from `self._get_targets_single`. These returns are currently refined to properties at each feature map (i.e. having HxW dimension). The results will be concatenated after the end """ num_imgs = len(img_metas) assert len(anchor_list) == len(valid_flag_list) == num_imgs # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors to a single tensor concat_anchor_list = [] concat_valid_flag_list = [] for i in range(num_imgs): assert len(anchor_list[i]) == len(valid_flag_list[i]) concat_anchor_list.append(torch.cat(anchor_list[i])) concat_valid_flag_list.append(torch.cat(valid_flag_list[i])) # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] if gt_labels_list is None: gt_labels_list = [None for _ in range(num_imgs)] results = multi_apply(self._get_targets_single, concat_anchor_list, concat_valid_flag_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, img_metas, label_channels=label_channels, unmap_outputs=unmap_outputs) (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, pos_inds_list, neg_inds_list, sampling_results_list) = results[:7] rest_results = list(results[7:]) # user-added return values # no valid anchors if any([labels is None for labels in all_labels]): return None # sampled anchors of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) # split targets to a list w.r.t. multiple levels labels_list = images_to_levels(all_labels, num_level_anchors) label_weights_list = images_to_levels(all_label_weights, num_level_anchors) bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors) bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors) res = (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg) if return_sampling_results: res = res + (sampling_results_list, ) for i, r in enumerate(rest_results): # user-added return values rest_results[i] = images_to_levels(r, num_level_anchors) return res + tuple(rest_results)
def get_target(self, approx_list, inside_flag_list, square_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, gt_labels_list=None, label_channels=None, sampling=True, unmap_outputs=True): """Compute bucketing targets. Args: approx_list (list[list]): Multi level approxs of each image. inside_flag_list (list[list]): Multi level inside flags of each image. square_list (list[list]): Multi level squares of each image. gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image. img_metas (list[dict]): Meta info of each image. gt_bboxes_ignore_list (list[Tensor]): ignore list of gt bboxes. gt_bboxes_list (list[Tensor]): Gt bboxes of each image. label_channels (int): Channel of label. sampling (bool): Sample Anchors or not. unmap_outputs (bool): unmap outputs or not. Returns: tuple: Returns a tuple containing learning targets. - labels_list (list[Tensor]): Labels of each level. - label_weights_list (list[Tensor]): Label weights of each \ level. - bbox_cls_targets_list (list[Tensor]): BBox cls targets of \ each level. - bbox_cls_weights_list (list[Tensor]): BBox cls weights of \ each level. - bbox_reg_targets_list (list[Tensor]): BBox reg targets of \ each level. - bbox_reg_weights_list (list[Tensor]): BBox reg weights of \ each level. - num_total_pos (int): Number of positive samples in all \ images. - num_total_neg (int): Number of negative samples in all \ images. """ num_imgs = len(img_metas) assert len(approx_list) == len(inside_flag_list) == len( square_list) == num_imgs # anchor number of multi levels num_level_squares = [squares.size(0) for squares in square_list[0]] # concat all level anchors and flags to a single tensor inside_flag_flat_list = [] approx_flat_list = [] square_flat_list = [] for i in range(num_imgs): assert len(square_list[i]) == len(inside_flag_list[i]) inside_flag_flat_list.append(torch.cat(inside_flag_list[i])) approx_flat_list.append(torch.cat(approx_list[i])) square_flat_list.append(torch.cat(square_list[i])) # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] if gt_labels_list is None: gt_labels_list = [None for _ in range(num_imgs)] (all_labels, all_label_weights, all_bbox_cls_targets, all_bbox_cls_weights, all_bbox_reg_targets, all_bbox_reg_weights, pos_inds_list, neg_inds_list) = multi_apply( self._get_target_single, approx_flat_list, inside_flag_flat_list, square_flat_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, img_metas, label_channels=label_channels, sampling=sampling, unmap_outputs=unmap_outputs) # no valid anchors if any([labels is None for labels in all_labels]): return None # sampled anchors of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) # split targets to a list w.r.t. multiple levels labels_list = images_to_levels(all_labels, num_level_squares) label_weights_list = images_to_levels(all_label_weights, num_level_squares) bbox_cls_targets_list = images_to_levels(all_bbox_cls_targets, num_level_squares) bbox_cls_weights_list = images_to_levels(all_bbox_cls_weights, num_level_squares) bbox_reg_targets_list = images_to_levels(all_bbox_reg_targets, num_level_squares) bbox_reg_weights_list = images_to_levels(all_bbox_reg_weights, num_level_squares) return (labels_list, label_weights_list, bbox_cls_targets_list, bbox_cls_weights_list, bbox_reg_targets_list, bbox_reg_weights_list, num_total_pos, num_total_neg)
def get_targets(self, anchor_list, valid_flag_list, gt_bboxes_list, img_metas, gt_bboxes_ignore_list=None, gt_labels_list=None, label_channels=1, unmap_outputs=True): """Get targets for ATSS head. This method is almost the same as `AnchorHead.get_targets()`. Besides returning the targets as the parent method does, it also returns the anchors as the first element of the returned tuple. """ num_imgs = len(img_metas) assert len(anchor_list) == len(valid_flag_list) == num_imgs # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] num_level_anchors_list = [num_level_anchors] * num_imgs # concat all level anchors and flags to a single tensor for i in range(num_imgs): assert len(anchor_list[i]) == len(valid_flag_list[i]) anchor_list[i] = torch.cat(anchor_list[i]) valid_flag_list[i] = torch.cat(valid_flag_list[i]) # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] if gt_labels_list is None: gt_labels_list = [None for _ in range(num_imgs)] (all_anchors, all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(self._get_target_single, anchor_list, valid_flag_list, num_level_anchors_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, img_metas, label_channels=label_channels, unmap_outputs=unmap_outputs) # no valid anchors if any([labels is None for labels in all_labels]): return None # sampled anchors of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) # split targets to a list w.r.t. multiple levels anchors_list = images_to_levels(all_anchors, num_level_anchors) labels_list = images_to_levels(all_labels, num_level_anchors) label_weights_list = images_to_levels(all_label_weights, num_level_anchors) bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors) bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors) return (anchors_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg)
def gfl_target(self, anchor_list, valid_flag_list, gt_bboxes_list, img_metas, cfg, gt_bboxes_ignore_list=None, gt_labels_list=None, label_channels=1, unmap_outputs=True): """ almost the same with anchor_target, with a little modification, here we need return the anchor """ num_imgs = len(img_metas) assert len(anchor_list) == len(valid_flag_list) == num_imgs # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] num_level_anchors_list = [num_level_anchors] * num_imgs # concat all level anchors and flags to a single tensor for i in range(num_imgs): assert len(anchor_list[i]) == len(valid_flag_list[i]) anchor_list[i] = torch.cat(anchor_list[i]) valid_flag_list[i] = torch.cat(valid_flag_list[i]) # compute targets for each image if gt_bboxes_ignore_list is None: gt_bboxes_ignore_list = [None for _ in range(num_imgs)] if gt_labels_list is None: gt_labels_list = [None for _ in range(num_imgs)] (all_anchors, all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply( self.gfl_target_single, anchor_list, valid_flag_list, num_level_anchors_list, gt_bboxes_list, gt_bboxes_ignore_list, gt_labels_list, img_metas, cfg=cfg, label_channels=label_channels, unmap_outputs=unmap_outputs) # no valid anchors if any([labels is None for labels in all_labels]): return None # sampled anchors of all images num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list]) num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list]) # split targets to a list w.r.t. multiple levels anchors_list = images_to_levels(all_anchors, num_level_anchors) labels_list = images_to_levels(all_labels, num_level_anchors) label_weights_list = images_to_levels(all_label_weights, num_level_anchors) bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors) bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors) return (anchors_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg)
def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): for i in range(len(bbox_preds)): # loop over fpn level # avoid 0 area of the predicted bbox bbox_preds[i] = bbox_preds[i].clamp(min=1e-4) # TODO: It may directly use the base-class loss function. featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.anchor_generator.num_levels batch_size = len(gt_bboxes) device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors(featmap_sizes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets( anchor_list, valid_flag_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_pos, num_total_neg, pos_assigned_gt_inds_list) = cls_reg_targets num_gts = np.array(list(map(len, gt_labels))) num_total_samples = (num_total_pos + num_total_neg if self.sampling else num_total_pos) # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors and flags to a single tensor concat_anchor_list = [] for i in range(len(anchor_list)): concat_anchor_list.append(torch.cat(anchor_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) losses_cls, losses_bbox = multi_apply( self.loss_single, cls_scores, bbox_preds, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_samples=num_total_samples) # `pos_assigned_gt_inds_list` (length: fpn_levels) stores the assigned # gt index of each anchor bbox in each fpn level. cum_num_gts = list(np.cumsum(num_gts)) # length of batch_size for i, assign in enumerate(pos_assigned_gt_inds_list): # loop over fpn levels for j in range(1, batch_size): # loop over batch size # Convert gt indices in each img to those in the batch assign[j][assign[j] >= 0] += int(cum_num_gts[j - 1]) pos_assigned_gt_inds_list[i] = assign.flatten() labels_list[i] = labels_list[i].flatten() num_gts = sum(map(len, gt_labels)) # total number of gt in the batch # The unique label index of each gt in the batch label_sequence = torch.arange(num_gts, device=device) # Collect the average loss of each gt in each level with torch.no_grad(): loss_levels, = multi_apply(self.collect_loss_level_single, losses_cls, losses_bbox, pos_assigned_gt_inds_list, labels_seq=label_sequence) # Shape: (fpn_levels, num_gts). Loss of each gt at each fpn level loss_levels = torch.stack(loss_levels, dim=0) # Locate the best fpn level for loss back-propagation if loss_levels.numel() == 0: # zero gt argmin = loss_levels.new_empty((num_gts, ), dtype=torch.long) else: _, argmin = loss_levels.min(dim=0) # Reweight the loss of each (anchor, label) pair, so that only those # at the best gt level are back-propagated. losses_cls, losses_bbox, pos_inds = multi_apply( self.reweight_loss_single, losses_cls, losses_bbox, pos_assigned_gt_inds_list, labels_list, list(range(len(losses_cls))), min_levels=argmin) num_pos = torch.cat(pos_inds, 0).sum().float() acc = self.calculate_accuracy(cls_scores, labels_list, pos_inds) if num_pos == 0: # No gt avg_factor = num_pos + float(num_total_neg) else: avg_factor = num_pos for i in range(len(losses_cls)): losses_cls[i] /= avg_factor losses_bbox[i] /= avg_factor return dict(loss_cls=losses_cls, loss_bbox=losses_bbox, num_pos=num_pos / batch_size, accuracy=acc)
def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None, batch_idx=0, analysis_scale=1.0): """Compute losses of the head. Args: cls_scores (list[Tensor]): Box scores for each scale level Has shape (N, num_anchors * num_classes, H, W) bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, num_anchors * 4, H, W) gt_bboxes (list[Tensor]): Ground truth bboxes for each image with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format. gt_labels (list[Tensor]): class indices corresponding to each box img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. gt_bboxes_ignore (None | list[Tensor]): specify which bounding boxes can be ignored when computing the loss. Default: None Returns: dict[str, Tensor]: A dictionary of loss components. """ # print("in anchor head analysis loss func...") featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.anchor_generator.num_levels device = cls_scores[0].device anchor_list, valid_flag_list = self.get_anchors(featmap_sizes, img_metas, device=device) label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 cls_reg_targets = self.get_targets( anchor_list, valid_flag_list, gt_bboxes, img_metas, gt_bboxes_ignore_list=gt_bboxes_ignore, gt_labels_list=gt_labels, label_channels=label_channels, batch_idx=batch_idx, analysis_scale=analysis_scale) if cls_reg_targets is None: return None (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, pos_anchor_flags_list, num_total_pos, num_total_neg) = cls_reg_targets num_total_samples = (num_total_pos + num_total_neg if self.sampling else num_total_pos) for i, (pos_anchor_flags, featmap_size) in enumerate( zip(pos_anchor_flags_list, featmap_sizes)): pos_anchor_flags = pos_anchor_flags.view(-1, featmap_size[0], featmap_size[1], 9) # flatten_anchor_flags = torch.zeros((pos_anchor_flags.size(0), 1, featmap_size[0], featmap_size[1])) flatten_anchor_flags = torch.sum(pos_anchor_flags, dim=3, keepdim=True) flatten_anchor_flags = flatten_anchor_flags.view( -1, 1, featmap_size[0], featmap_size[1]) save_image( flatten_anchor_flags, f"analysis_results/image_{batch_idx}_feature_{i}_flatten_anchor_flags_scale_{analysis_scale}.png" ) # anchor number of multi levels num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] # concat all level anchors and flags to a single tensor concat_anchor_list = [] for i in range(len(anchor_list)): concat_anchor_list.append(torch.cat(anchor_list[i])) all_anchor_list = images_to_levels(concat_anchor_list, num_level_anchors) losses_cls, losses_bbox = multi_apply( self.loss_single, cls_scores, bbox_preds, all_anchor_list, labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, num_total_samples=num_total_samples) return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)