def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, reduce=True, img_meta_2=None, img_meta_3=None): losses = dict() if cls_score is not None: if img_meta_3 is not None: losses['loss_cls_3'] = weighted_cross_entropy(cls_score, labels, label_weights, reduce=reduce) losses['acc_3'] = accuracy(cls_score, labels) elif img_meta_2 is not None: losses['loss_cls_2'] = weighted_cross_entropy(cls_score, labels, label_weights, reduce=reduce) losses['acc_2'] = accuracy(cls_score, labels) else: losses['loss_cls'] = weighted_cross_entropy(cls_score, labels, label_weights, reduce=reduce) losses['acc'] = accuracy(cls_score, labels) if bbox_pred is not None: pos_inds = labels > 0 if self.reg_class_agnostic: pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), 6)[pos_inds] else: pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), -1, 6)[pos_inds, labels[pos_inds]] if img_meta_3 is not None: losses['loss_reg_3'] = weighted_smoothl1( pos_bbox_pred, bbox_targets[pos_inds], bbox_weights[pos_inds], avg_factor=bbox_targets.size(0)) elif img_meta_2 is not None: losses['loss_reg_2'] = weighted_smoothl1( pos_bbox_pred, bbox_targets[pos_inds], bbox_weights[pos_inds], avg_factor=bbox_targets.size(0)) else: losses['loss_reg'] = weighted_smoothl1( pos_bbox_pred, bbox_targets[pos_inds], bbox_weights[pos_inds], avg_factor=bbox_targets.size(0)) return losses
def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, reduce=True): losses = dict() if cls_score is not None: losses['loss_cls'] = self.cls_loss_func(cls_score, labels, label_weights, reduce=reduce) losses['acc'] = accuracy(cls_score, labels) if bbox_pred is not None: if labels.dim() == 1: pos_inds = labels > 0 if self.reg_class_agnostic: pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), 4)[pos_inds] else: pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), -1, 4)[pos_inds, labels[pos_inds]] loss = weighted_smoothl1(pos_bbox_pred, bbox_targets[pos_inds], bbox_weights[pos_inds], avg_factor=bbox_targets.size(0)) else: x = torch.nonzero(labels > 0) if x.numel() == 0: loss = 0 else: sampled_pos_inds_subset, labels_pos = x[:, 0], x[:, 1] if self.reg_class_agnostic: map_inds = torch.tensor([0, 1, 2, 3], device=bbox_pred.device) else: map_inds = 4 * labels_pos[:, None] + torch.tensor( [0, 1, 2, 3], device=bbox_pred.device) sampled_box_regression = bbox_pred[ sampled_pos_inds_subset[:, None], map_inds] sampled_box_target = bbox_targets[sampled_pos_inds_subset] sampled_bbox_weights = bbox_weights[ sampled_pos_inds_subset] loss = weighted_smoothl1(sampled_box_regression, sampled_box_target, sampled_bbox_weights, avg_factor=bbox_targets.size(0)) losses['loss_reg'] = loss return losses
def loss( self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, reduce=True, ): losses = dict() if cls_score is not None: losses["loss_cls"] = weighted_cross_entropy(cls_score, labels, label_weights, reduce=reduce) losses["acc"] = accuracy(cls_score, labels) if bbox_pred is not None: pos_inds = labels > 0 if self.reg_class_agnostic: pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), 4)[pos_inds] else: pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), -1, 4)[pos_inds, labels[pos_inds]] losses["loss_reg"] = weighted_smoothl1( pos_bbox_pred, bbox_targets[pos_inds], bbox_weights[pos_inds], avg_factor=bbox_targets.size(0), ) return losses
def loss_single(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, num_pos_samples, cfg): # classification loss labels = labels.contiguous().view(-1, self.cls_out_channels) label_weights = label_weights.contiguous().view( -1, self.cls_out_channels) cls_score = cls_score.permute(0, 2, 3, 1).contiguous().view( -1, self.cls_out_channels) if 'ghmc' in cfg: loss_cls = self.ghmc_loss.calc(cls_score, labels, label_weights) else: loss_cls = weighted_sigmoid_focal_loss(cls_score, labels, label_weights, cfg.gamma, cfg.alpha, avg_factor=num_pos_samples) # regression loss bbox_targets = bbox_targets.contiguous().view(-1, 4) bbox_weights = bbox_weights.contiguous().view(-1, 4) bbox_pred = bbox_pred.permute(0, 2, 3, 1).contiguous().view(-1, 4) if 'ghmr' in cfg: loss_reg = self.ghmr_loss.calc(bbox_pred, bbox_targets, bbox_weights) else: loss_reg = weighted_smoothl1(bbox_pred, bbox_targets, bbox_weights, beta=cfg.smoothl1_beta, avg_factor=num_pos_samples) return loss_cls, loss_reg
def loss_single(self, rpn_cls_score, rpn_bbox_pred, labels, label_weights, bbox_targets, bbox_weights, num_total_samples, coo_num, cfg): # classification loss labels = labels.contiguous().view(-1) label_weights = label_weights.contiguous().view(-1) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1).contiguous().view(-1) criterion = weighted_binary_cross_entropy else: rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1).contiguous().view(-1, 2) criterion = weighted_cross_entropy loss_cls = criterion( rpn_cls_score, labels, label_weights, avg_factor=num_total_samples) # regression loss bbox_targets = bbox_targets.contiguous().view(-1, coo_num) bbox_weights = bbox_weights.contiguous().view(-1, coo_num) rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).contiguous().view( -1, coo_num) loss_reg = weighted_smoothl1( rpn_bbox_pred, bbox_targets, bbox_weights, beta=cfg.smoothl1_beta, avg_factor=num_total_samples) return loss_cls, loss_reg
def loss_single( self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, num_total_samples, cfg, ): loss_cls_all = (F.cross_entropy(cls_score, labels, reduction="none") * label_weights) pos_inds = (labels > 0).nonzero().view(-1) neg_inds = (labels == 0).nonzero().view(-1) num_pos_samples = pos_inds.size(0) num_neg_samples = cfg.neg_pos_ratio * num_pos_samples if num_neg_samples > neg_inds.size(0): num_neg_samples = neg_inds.size(0) topk_loss_cls_neg, _ = loss_cls_all[neg_inds].topk(num_neg_samples) loss_cls_pos = loss_cls_all[pos_inds].sum() loss_cls_neg = topk_loss_cls_neg.sum() loss_cls = (loss_cls_pos + loss_cls_neg) / num_total_samples loss_reg = weighted_smoothl1( bbox_pred, bbox_targets, bbox_weights, beta=cfg.smoothl1_beta, avg_factor=num_total_samples, ) return loss_cls[None], loss_reg
def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, mix_inds=None, reduce=True): losses = dict() if cls_score is not None: losses['loss_cls'] = weighted_cross_entropy(cls_score, labels, label_weights, mix_inds=mix_inds, reduce=reduce) losses['acc'] = accuracy(cls_score, labels) if bbox_pred is not None: losses['loss_reg'] = weighted_smoothl1( bbox_pred, bbox_targets, bbox_weights, mix_inds=mix_inds, avg_factor=bbox_targets.size(0)) return losses
def loss_single( self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, num_total_samples, cfg, ): # classification loss labels = labels.reshape(-1) label_weights = label_weights.reshape(-1) cls_score = cls_score.permute(0, 2, 3, 1).reshape( -1, self.cls_out_channels ) if self.use_sigmoid_cls: if self.use_focal_loss: cls_criterion = weighted_sigmoid_focal_loss else: cls_criterion = weighted_binary_cross_entropy else: if self.use_focal_loss: raise NotImplementedError else: cls_criterion = weighted_cross_entropy if self.use_focal_loss: loss_cls = cls_criterion( cls_score, labels, label_weights, gamma=cfg.gamma, alpha=cfg.alpha, avg_factor=num_total_samples, ) else: loss_cls = cls_criterion( cls_score, labels, label_weights, avg_factor=num_total_samples, ) # regression loss bbox_targets = bbox_targets.reshape(-1, 4) bbox_weights = bbox_weights.reshape(-1, 4) bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4) loss_reg = weighted_smoothl1( bbox_pred, bbox_targets, bbox_weights, beta=cfg.smoothl1_beta, avg_factor=num_total_samples, ) return loss_cls, loss_reg
def loss_single(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, anchors, num_total_samples, cfg, level, gt_bboxes, iteration): # classification loss labels = labels.reshape(-1) label_weights = label_weights.reshape(-1) # 3D images cls_score = cls_score.permute(0, 3, 4, 2, 1).reshape(-1, self.cls_out_channels) # debug only... # self.visualize_anchors_across_levels(anchors, gt_bboxes, labels, label_weights, iteration=iteration, level=level['level']) # self.print_cls_scores(cls_score, labels, label_weights, num_total_samples) if self.use_sigmoid_cls: if self.use_focal_loss: cls_criterion = weighted_sigmoid_focal_loss else: cls_criterion = weighted_binary_cross_entropy else: if self.use_focal_loss: raise NotImplementedError else: cls_criterion = weighted_cross_entropy if self.use_focal_loss: loss_cls = cls_criterion(cls_score, labels, label_weights, gamma=cfg.gamma, alpha=cfg.alpha, avg_factor=num_total_samples) else: loss_cls = cls_criterion(cls_score, labels, label_weights, avg_factor=num_total_samples) # regression loss bbox_targets = bbox_targets.reshape(-1, 6) bbox_weights = bbox_weights.reshape(-1, 6) # 3D images bbox_pred = bbox_pred.permute(0, 3, 4, 2, 1).reshape(-1, 6) loss_reg = weighted_smoothl1(bbox_pred, bbox_targets, bbox_weights, beta=cfg.smoothl1_beta, avg_factor=num_total_samples) # debug only... # print('level {} loss_cls: {}'.format(level['level'], loss_cls)) # print('level {} loss_reg: {}'.format(level['level'], loss_reg)) level['level'] += 1 return loss_cls, loss_reg
def loss(self, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, reduce=True, img_meta_2=None, img_meta_3=None): losses = dict() if bbox_pred is not None: pos_inds = labels > 0 if self.reg_class_agnostic: pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), 6)[pos_inds] else: pos_bbox_pred = bbox_pred.view(bbox_pred.size(0), -1, 6)[pos_inds, labels[pos_inds]] if img_meta_2 is not None: losses['loss_refinement_reg_2'] = weighted_smoothl1( pos_bbox_pred, bbox_targets[pos_inds], bbox_weights[pos_inds], avg_factor=bbox_targets.size(0)) elif img_meta_3 is not None: losses['loss_refinement_reg_3'] = weighted_smoothl1( pos_bbox_pred, bbox_targets[pos_inds], bbox_weights[pos_inds], avg_factor=bbox_targets.size(0)) else: losses['loss_refinement_reg'] = weighted_smoothl1( pos_bbox_pred, bbox_targets[pos_inds], bbox_weights[pos_inds], avg_factor=bbox_targets.size(0)) return losses
def loss_single(self, rpn_cls_score, rpn_bbox_pred, rpn_dir_pred, labels, label_weights, bbox_targets, bbox_weights, dir_labels, dir_weights, num_total_samples, cfg): # classification loss labels = labels.contiguous().view(-1) label_weights = label_weights.contiguous().view(-1) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1).contiguous().view(-1) #criterion = weighted_binary_cross_entropy criterion = weighted_sigmoid_focal_loss else: rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1).contiguous().view(-1, 2) criterion = weighted_cross_entropy loss_cls = criterion(rpn_cls_score, labels, label_weights, avg_factor=num_total_samples) # regression loss bbox_targets = bbox_targets.contiguous().view(-1, 7) bbox_weights = bbox_weights.contiguous().view(-1, 7) rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).contiguous().view(-1, 7) rpn_bbox_pred, bbox_targets = add_sin_difference( rpn_bbox_pred, bbox_targets) loss_reg = weighted_smoothl1(rpn_bbox_pred, bbox_targets, bbox_weights, beta=cfg.smoothl1_beta, avg_factor=num_total_samples) # direction loss dir_logits = rpn_dir_pred.permute(0, 2, 3, 1).contiguous().view(-1, 2) loss_dir = weighted_cross_entropy(dir_logits, dir_labels, dir_weights, avg_factor=num_total_samples) loss_reg *= 2 loss_dir *= .2 return loss_cls, loss_reg, loss_dir
def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, iou_targets): soft_label = torch.clamp(2 * iou_targets - 0.5, 0, 1) labels = soft_label * labels.float() losses = dict() if cls_score is not None: losses['loss_cls'] = weighted_binary_cross_entropy( cls_score.view(-1), labels, label_weights) if bbox_pred is not None: bbox_pred, bbox_targets = add_sin_difference( bbox_pred, bbox_targets) losses['loss_reg'] = weighted_smoothl1( bbox_pred, bbox_targets, bbox_weights, avg_factor=bbox_targets.size(0)) return losses
def loss(self, cls_score, bbox_pred, A_pred, A_gt, labels, label_weights, bbox_targets, bbox_weights, reduce=True): losses = dict() if cls_score is not None: losses['loss_cls'] = weighted_cross_entropy( cls_score, labels, label_weights, reduce=reduce) losses['acc'] = accuracy(cls_score, labels) if bbox_pred is not None: losses['loss_reg'] = weighted_smoothl1( bbox_pred, bbox_targets, bbox_weights, avg_factor=bbox_targets.size(0)) if A_pred: assert len(A_pred) == len(A_gt) assert A_pred[0].size() == A_gt[0].size() num_a_pred = len(A_pred) for i in range(num_a_pred): losses['loss_adj' + str(i)] = F.mse_loss(A_pred[i], A_gt[i].detach()) return losses
def loss_single(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, num_total_samples, cfg): # F.cross_entropy: input为`(N, C)` or `(N, 8732, C)` where `C = number of classes` # target为 `(N)' or `(N, 8732)' # reduction= 'none' | 'mean' | 'sum' # 'none': 不增加任何减幅 # 'mean': 输出的和除以输出的元素总数 # 'sum' : 将输出全加起来 # loss_cls_all: [B, 8732] loss_cls_all = F.cross_entropy( cls_score, labels, reduction='none') * label_weights # nonzero(): 符合条件的索引值 pos_inds = (labels > 0).nonzero().view(-1) neg_inds = (labels == 0).nonzero().view(-1) num_pos_samples = pos_inds.size(0) # cfg.neg_pos_ratio = 3 num_neg_samples = cfg.neg_pos_ratio * num_pos_samples if num_neg_samples > neg_inds.size(0): num_neg_samples = neg_inds.size(0) # topk(input, k, dim=None, largest=True, sorted=True, out=None) # 如果维度没有指定,则选择为输入的最后一个维度 # 选出分类损失中由大到小的前k个负样本的分类损失 topk_loss_cls_neg, _ = loss_cls_all[neg_inds].topk(num_neg_samples) # loss_cls_pos:将所有正样本的分类损失相加 loss_cls_pos = loss_cls_all[pos_inds].sum() # loss_cls_neg:将前k个负样本的分类损失相加 loss_cls_neg = topk_loss_cls_neg.sum() loss_cls = (loss_cls_pos + loss_cls_neg) / num_total_samples # 位置误差仅针对正样本进行计算 loss_reg = weighted_smoothl1( bbox_pred, bbox_targets, bbox_weights, beta=cfg.smoothl1_beta, avg_factor=num_total_samples) # sum()以后是 0 维 Tensor 加个 [None] 变成torch.size([1]), 1维的Tensor # return : loss_reg: [1] # loss_cls[None]: [1] return loss_cls[None], loss_reg
def loss(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, alpha=0.25, num_level=4, reduce=True): losses = dict() for i in range(num_level): cls_score_level_i = cls_score[i::num_level, :] bbox_pred_level_i = bbox_pred[i::num_level, :] losses['loss_cls_level%d' % i] = weighted_cross_entropy( cls_score_level_i, labels, label_weights, reduce=reduce) * alpha losses['loss_reg_level%d' % i] = weighted_smoothl1( bbox_pred_level_i, bbox_targets, bbox_weights, avg_factor=bbox_targets.size(0)) * alpha return losses
def loss_single(self, cls_score, bbox_pred, teacher_cls_scores, teacher_bbox_preds, labels, label_weights, bbox_targets, bbox_weights, num_total_samples, cfg): # classification loss if self.use_sigmoid_cls: labels = labels.reshape(-1, self.cls_out_channels) label_weights = label_weights.reshape(-1, self.cls_out_channels) else: raise NotImplementedError cls_score = cls_score.permute(0, 2, 3, 1).reshape( -1, self.cls_out_channels) teacher_cls_scores = teacher_cls_scores.permute(0, 2, 3, 1).reshape( -1, self.cls_out_channels) if self.use_sigmoid_cls: if self.use_focal_loss: cls_criterion = weighted_sigmoid_focal_loss #if cfg.teacher.kd_with_focal: # kd_cls_criterion = weighted_sigmoid_kldiv_focal_loss #else: # kd_cls_criterion = weighted_sigmoid_kldiv else: raise NotImplementedError else: if self.use_focal_loss: raise NotImplementedError else: raise NotImplementedError if self.use_focal_loss: if cfg.focal_loss == 0.: loss_cls = torch.zeros([]).cuda() else: loss_cls = cls_criterion( cls_score, labels, label_weights, gamma=cfg.gamma, alpha=cfg.alpha, avg_factor=num_total_samples) if cfg.teacher.kd_with_focal: loss_kd_cls = weighted_sigmoid_kldiv_focal_loss( cls_score, teacher_cls_scores, label_weights, temperature=cfg.teacher.temperature, gamma=cfg.gamma, alpha=cfg.alpha, teacher_alpha=cfg.teacher.alpha, avg_factor=num_total_samples) else: loss_kd_cls = weighted_sigmoid_kldiv( cls_score, teacher_cls_scores, label_weights, temperature=cfg.teacher.temperature, teacher_alpha=cfg.teacher.alpha, avg_factor=num_total_samples) else: raise NotImplementedError # regression loss bbox_targets = bbox_targets.reshape(-1, 4) bbox_weights = bbox_weights.reshape(-1, 4) bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4) loss_reg = weighted_smoothl1( bbox_pred, bbox_targets, bbox_weights, beta=cfg.smoothl1_beta, avg_factor=num_total_samples) return loss_cls, loss_kd_cls, loss_reg
def loss_single(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, anchors, num_total_samples, cfg): """ compute the losses for one image :param cls_score: tensor of shape [num_total_examples, cls_out_channels] :param bbox_pred: tensor of shape [num_total_examples, 4] or [num_total_examples, 5] :param labels: tensor os shape [num_total_examples] storing gt labels such as 0, 1, 2, 80 for corresponding class. :param label_weights: tensor of shape [num_total_examples] :param bbox_targets: tensor of shape [num_total_examples, 4], Store the parametrized coordinates of targets for positives and 0 for negatives. :param bbox_weights: tensor of shape [num_total_examples, 4], 1 for positives and 0 for negatives and neutrals. :param anchors: tensor of shape [num_total_examples, 4] :param num_total_samples: the number of positive examples. :param cfg: :return: """ loss_cls_all = F.cross_entropy(cls_score, labels, reduction='none') * label_weights # index tensor of shape (Pos, 1), each row is a index for a non-zero element. pos_inds = (labels > 0).nonzero().view(-1) neg_inds = (labels == 0).nonzero().view(-1) # (Neg, 1) # added by Shengkai Wu IoU_balanced_Cls = False IoU_balanced_Loc = False eta = 1.5 delta = 1.5 cls_loss_weight = 1.0 bbox_loss_weight = 1.0 if IoU_balanced_Cls or IoU_balanced_Loc: pred_box = delta2bbox(anchors, bbox_pred, self.target_means, self.target_stds) # the negatives will stores the anchors information(x, y, w, h) target_box = delta2bbox(anchors, bbox_targets, self.target_means, self.target_stds) # iou between the regressed positive example and the corresponding ground truth box or # iou between the regressed negative example and the original negative example. iou = bbox_overlaps(target_box, pred_box, is_aligned=True) if IoU_balanced_Cls: target = iou.new_zeros(iou.size(0)) target[pos_inds] = 1 # target = target.type_as(cls_score) iou_weights = (1 - target) + (target * iou).pow(eta) raw2 = loss_cls_all * iou_weights normalizer = (loss_cls_all * target).sum() / ( (raw2 * target).sum() + 1e-6) normalized_iou_weights = ( 1 - target) + (target * iou).pow(eta) * normalizer normalized_iou_weights = normalized_iou_weights.detach() loss_cls_all = loss_cls_all * normalized_iou_weights num_pos_samples = pos_inds.size(0) num_neg_samples = cfg.neg_pos_ratio * num_pos_samples if num_neg_samples > neg_inds.size(0): num_neg_samples = neg_inds.size(0) topk_loss_cls_neg, _ = loss_cls_all[neg_inds].topk(num_neg_samples) loss_cls_pos = loss_cls_all[pos_inds].sum() loss_cls_neg = topk_loss_cls_neg.sum() loss_cls = (loss_cls_pos + loss_cls_neg) / num_total_samples if IoU_balanced_Loc: loss_bbox = bbox_loss_weight * weighted_iou_balanced_smoothl1( bbox_pred, bbox_targets, iou, bbox_weights, beta=cfg.smoothl1_beta, delta=delta, avg_factor=num_total_samples) else: loss_bbox = weighted_smoothl1(bbox_pred, bbox_targets, bbox_weights, beta=cfg.smoothl1_beta, avg_factor=num_total_samples) return loss_cls[None], loss_bbox
def forward(self, pred, target, weight, *args, **kwargs): loss_bbox = self.loss_weight * weighted_smoothl1( pred, target, weight, beta=self.beta, *args, **kwargs) return loss_bbox
def forward_train(self, img, img_meta, gt_bboxes, gt_bboxes_8_coo, gt_labels, gt_bboxes_ignore=None, gt_masks=None, proposals=None): if gt_bboxes_ignore is None: gt_bboxes_ignore = [None for _ in range(len(gt_bboxes))] x = self.extract_feat(img) losses = dict() # RPN forward and loss if self.with_rpn: rpn_outs = self.rpn_head(x) rpn_loss_inputs = rpn_outs + (gt_bboxes, gt_bboxes_8_coo, img_meta, self.coo_num, self.train_cfg.rpn) rpn_losses = self.rpn_head.loss(*rpn_loss_inputs) losses.update(rpn_losses) proposal_inputs = rpn_outs + (img_meta, self.coo_num, self.test_cfg.rpn) proposal_list = self.rpn_head.get_proposals(*proposal_inputs) else: proposal_list = proposals if self.train_cfg.rpn.with_gt_bboxes: for i in range(len(gt_bboxes)): gt_bboxes_trans = [] for j in self.train_cfg.rpn.gt_bboxes_scale: x_center = (gt_bboxes[i][:, 0:1]+gt_bboxes[i][:, 2:3])/2 y_center = (gt_bboxes[i][:, 1:2]+gt_bboxes[i][:, 3:4])/2 left = torch.clamp(((gt_bboxes[i][:, 0:1] - x_center) * j + x_center), min=0) right = torch.clamp(((gt_bboxes[i][:, 2:3] - x_center) * j + x_center), max=img_meta[i]['img_shape'][1]) top = torch.clamp(((gt_bboxes[i][:, 1:2] - y_center) * j + y_center), min=0) bottom = torch.clamp(((gt_bboxes[i][:, 3:4] - y_center) * j + y_center), max=img_meta[i]['img_shape'][0]) trans_gt_bboxes = torch.cat([left, top, right, bottom], 1) gt_bboxes_trans.append(trans_gt_bboxes) gt_bboxes_trans = torch.cat(gt_bboxes_trans, 0) n = gt_bboxes_trans.shape[0] gt_bboxes_trans = torch.cat([gt_bboxes_trans, torch.ones([n, 1], device=gt_bboxes[i].device)], 1) proposal_list[i] = torch.cat([proposal_list[i], gt_bboxes_trans], 0) # assign gts and sample proposals _gt_bboxes_8_coo = [None for i in range(len(proposal_list))] if self.with_bbox or self.with_mask: assign_results, sampling_results = multi_apply( assign_and_sample, proposal_list, gt_bboxes, _gt_bboxes_8_coo, gt_bboxes_ignore, gt_labels, cfg=self.train_cfg.rcnn) # bbox head forward and loss if self.with_bbox: rois = bbox2roi([res.bboxes for res in sampling_results]) # TODO: a more flexible way to decide which feature maps to use bbox_feats = self.bbox_roi_extractor( x[:self.bbox_roi_extractor.num_inputs], rois, self.level, self.merge_mode) cls_score, bbox_pred = self.bbox_head(bbox_feats) bbox_targets = self.bbox_head.get_target( sampling_results, gt_bboxes, gt_labels, self.train_cfg.rcnn) loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, *bbox_targets) if self.train_cfg.rcnn.with_hard_example_mining: pred_label = torch.argmax(cls_score, 1) labels, label_weights, bbox_gt, bbox_weights = bbox_targets ind = pred_label != labels if torch.sum(ind).item() != 0: x_stop_grad = [feature.data for feature in x] bbox_feats_stop_grad = self.bbox_roi_extractor( x_stop_grad[:self.bbox_roi_extractor.num_inputs], rois, self.level, self.merge_mode) cls_score_stop_grad, bbox_pred_stop_grad = self.bbox_head(bbox_feats_stop_grad) cls_score_stop_grad, labels, label_weights = cls_score_stop_grad[ind], labels[ind], label_weights[ind] num = cls_score.shape[0] loss_bbox['loss_cls'] = loss_bbox['loss_cls'] + weighted_cross_entropy( cls_score_stop_grad, labels, label_weights, avg_factor=num) if self.train_cfg.rcnn.with_reg: bbox_pred_stop_grad, bbox_gt, bbox_weights = bbox_pred_stop_grad[ind], bbox_gt[ind], bbox_weights[ind] loss_bbox['loss_reg'] = loss_bbox['loss_reg'] + weighted_smoothl1( bbox_pred_stop_grad, bbox_gt, bbox_weights, avg_factor=num) losses.update(loss_bbox) # mask head forward and loss if self.with_mask: pos_rois = bbox2roi([res.pos_bboxes for res in sampling_results]) mask_feats = self.mask_roi_extractor( x[:self.mask_roi_extractor.num_inputs], pos_rois) mask_pred = self.mask_head(mask_feats) mask_targets = self.mask_head.get_target( sampling_results, gt_masks, self.train_cfg.rcnn) pos_labels = torch.cat( [res.pos_gt_labels for res in sampling_results]) loss_mask = self.mask_head.loss(mask_pred, mask_targets, pos_labels) losses.update(loss_mask) return losses