def test_iter(self, metrics=None): self.eval_step += 1 self.nets['netG'].eval() self.nets['netDH'].eval() with paddle.no_grad(): self.forward() sync_loss = get_sync_loss(self.mel, self.g, self.nets['netDS']) l1loss = self.recon_loss(self.g, self.y) pred_real = self.nets['netDH'](self.y) pred_fake = self.nets['netDH'](self.g) disc_real_loss = F.binary_cross_entropy( pred_real, paddle.ones((len(pred_real), 1))) disc_fake_loss = F.binary_cross_entropy( pred_fake, paddle.zeros((len(pred_fake), 1))) self.eval_disc_fake_losses.append(disc_fake_loss.numpy().item()) self.eval_disc_real_losses.append(disc_real_loss.numpy().item()) self.eval_sync_losses.append(sync_loss.numpy().item()) self.eval_recon_losses.append(l1loss.numpy().item()) if self.disc_wt > 0.: if isinstance(self.nets['netDH'], paddle.DataParallel ): #paddle.fluid.dygraph.parallel.DataParallel) perceptual_loss = self.nets[ 'netDH']._layers.perceptual_forward( self.g).numpy().item() else: perceptual_loss = self.nets['netDH'].perceptual_forward( self.g).numpy().item() else: perceptual_loss = 0. self.eval_perceptual_losses.append(perceptual_loss) if self.eval_step == self.max_eval_steps: averaged_sync_loss = sum(self.eval_sync_losses) / len( self.eval_sync_losses) averaged_recon_loss = sum(self.eval_recon_losses) / len( self.eval_recon_losses) averaged_perceptual_loss = sum(self.eval_perceptual_losses) / len( self.eval_perceptual_losses) averaged_disc_fake_loss = sum(self.eval_disc_fake_losses) / len( self.eval_disc_fake_losses) averaged_disc_real_loss = sum(self.eval_disc_real_losses) / len( self.eval_disc_real_losses) if averaged_sync_loss < .75: self.syncnet_wt = 0.01 print( 'L1: {}, Sync loss: {}, Percep: {}, Fake: {}, Real: {}'.format( averaged_recon_loss, averaged_sync_loss, averaged_perceptual_loss, averaged_disc_fake_loss, averaged_disc_real_loss)) self.eval_sync_losses, self.eval_recon_losses = [], [] self.eval_disc_real_losses, self.eval_disc_fake_losses = [], [] self.eval_perceptual_losses = [] self.eval_step = 0 self.nets['netG'].train() self.nets['netDH'].train()
def backward_D(self): self.pred_real = self.nets['netDH'](self.y) self.disc_real_loss = F.binary_cross_entropy( self.pred_real, paddle.ones((len(self.pred_real), 1))) self.losses['disc_real_loss'] = self.disc_real_loss self.disc_real_loss.backward() self.pred_fake = self.nets['netDH'](self.g.detach()) self.disc_fake_loss = F.binary_cross_entropy( self.pred_fake, paddle.zeros((len(self.pred_fake), 1))) self.losses['disc_fake_loss'] = self.disc_fake_loss self.disc_fake_loss.backward()
def create_loss(self, click_pred, conversion_pred, click_label, conversion_label, constraint_weight=0.6): click_loss = F.binary_cross_entropy(click_pred, click_label) conversion_loss = F.binary_cross_entropy(conversion_pred, conversion_label) label_constraint = paddle.maximum(conversion_pred - click_pred, paddle.zeros_like(click_label)) constraint_loss = paddle.sum(label_constraint) loss = click_loss + conversion_loss + constraint_weight * constraint_loss return loss
def forward(self, true_binary, rule_masks, raw_logits): """ tbd """ if cmd_args.loss_type == 'binary': exp_pred = paddle.exp(raw_logits) * rule_masks norm = paddle.sum(exp_pred, axis=2, keepdim=True) prob = paddle.divide(exp_pred, norm) return F.binary_cross_entropy( prob, true_binary) * cmd_args.max_decode_steps if cmd_args.loss_type == 'perplexity': my_perp_loss = MyPerpLoss() return my_perp_loss(true_binary, rule_masks, raw_logits) if cmd_args.loss_type == 'vanilla': exp_pred = paddle.exp(raw_logits) * rule_masks + 1e-30 norm = paddle.sum(exp_pred, 2, keepdim=True) prob = paddle.divide(exp_pred, norm) ll = paddle.abs(paddle.sum(true_binary * prob, 2)) mask = 1 - rule_masks[:, :, -1] logll = mask * paddle.log(ll) loss = -paddle.sum(logll) / true_binary.shape[1] return loss print('unknown loss type %s' % cmd_args.loss_type) raise NotImplementedError
def forward(self, features, im_info, boxes=None): # prediction pred_cls_score_list = [] pred_bbox_offsets_list = [] for x in features: t = F.relu(self.rpn_conv(x)) pred_cls_score_list.append(self.rpn_cls_score(t)) pred_bbox_offsets_list.append(self.rpn_bbox_offsets(t)) # get anchors all_anchors_list = [] # stride: 64,32,16,8,4 p6->p2 base_stride = 4 off_stride = 2**(len(features) - 1) # 16 for fm in features: layer_anchors = self.anchors_generator(fm, base_stride, off_stride) off_stride = off_stride // 2 all_anchors_list.append(layer_anchors) # sample from the predictions rpn_rois = find_top_rpn_proposals(self.training, pred_bbox_offsets_list, pred_cls_score_list, all_anchors_list, im_info) rpn_rois = rpn_rois.cast('float32') if self.training: rpn_labels, rpn_bbox_targets = fpn_anchor_target( boxes, im_info, all_anchors_list) #rpn_labels = rpn_labels.astype(np.int32) pred_cls_score, pred_bbox_offsets = fpn_rpn_reshape( pred_cls_score_list, pred_bbox_offsets_list) # rpn loss valid_masks = rpn_labels >= 0 # objectness_loss = softmax_loss( # torch.gather(pred_cls_score,torch.nonzero(valid_masks)), # torch.gather(rpn_labels,torch.nonzero(valid_masks))) objectness_loss = F.binary_cross_entropy( F.softmax( torch.gather(pred_cls_score, torch.nonzero(valid_masks))), torch.gather( torch.eye(2), torch.gather(rpn_labels, torch.nonzero(valid_masks)))) pos_masks = rpn_labels > 0 # localization_loss = smooth_l1_loss( # pred_bbox_offsets[pos_masks], # rpn_bbox_targets[pos_masks], # config.rpn_smooth_l1_beta) localization_loss = \ F.smooth_l1_loss(torch.gather(pred_bbox_offsets, torch.nonzero(pos_masks)), torch.gather(rpn_bbox_targets, torch.nonzero(pos_masks)),delta=config.rcnn_smooth_l1_beta) normalizer = 1 / valid_masks.cast('float32').sum() loss_rpn_cls = objectness_loss.sum() * normalizer loss_rpn_loc = localization_loss.sum() * normalizer loss_dict = {} loss_dict['loss_rpn_cls'] = loss_rpn_cls loss_dict['loss_rpn_loc'] = loss_rpn_loc return rpn_rois, loss_dict else: return rpn_rois
def _focal_loss(score, label, alpha=0.25, gamma=2.0): weight = (score - label).pow(gamma) if alpha > 0: alpha_t = alpha * label + (1 - alpha) * (1 - label) weight *= alpha_t loss = F.binary_cross_entropy(score, label, weight=weight, reduction='sum') return loss
def perceptual_forward(self, false_face_sequences): false_face_sequences = self.to_2d(false_face_sequences) false_face_sequences = self.get_lower_half(false_face_sequences) false_feats = false_face_sequences for f in self.face_encoder_blocks: false_feats = f(false_feats) false_pred_loss = F.binary_cross_entropy( paddle.reshape(self.binary_pred(false_feats), (len(false_feats), -1)), paddle.ones((len(false_feats), 1))) return false_pred_loss
def varifocal_loss(pred, target, alpha=0.75, gamma=2.0, iou_weighted=True, use_sigmoid=True): """`Varifocal Loss <https://arxiv.org/abs/2008.13367>`_ Args: pred (Tensor): The prediction with shape (N, C), C is the number of classes target (Tensor): The learning target of the iou-aware classification score with shape (N, C), C is the number of classes. alpha (float, optional): A balance factor for the negative part of Varifocal Loss, which is different from the alpha of Focal Loss. Defaults to 0.75. gamma (float, optional): The gamma for calculating the modulating factor. Defaults to 2.0. iou_weighted (bool, optional): Whether to weight the loss of the positive example with the iou target. Defaults to True. """ # pred and target should be of the same size assert pred.shape == target.shape if use_sigmoid: pred_new = F.sigmoid(pred) else: pred_new = pred target = target.cast(pred.dtype) if iou_weighted: focal_weight = target * (target > 0.0).cast('float32') + \ alpha * (pred_new - target).abs().pow(gamma) * \ (target <= 0.0).cast('float32') else: focal_weight = (target > 0.0).cast('float32') + \ alpha * (pred_new - target).abs().pow(gamma) * \ (target <= 0.0).cast('float32') if use_sigmoid: loss = F.binary_cross_entropy_with_logits( pred, target, reduction='none') * focal_weight else: loss = F.binary_cross_entropy(pred, target, reduction='none') * focal_weight loss = loss.sum(axis=1) return loss
def forward(self, input, label, mask=None, weight=None, name=None): loss = F.binary_cross_entropy(input, label, reduction=self.reduction) return loss
def create_loss(self, pred, label): loss = F.binary_cross_entropy(pred, label.astype('float32')) return loss
def __call__(self, flatten_cls_pred_scores, flatten_center_and_stride, flatten_bboxes, gt_bboxes, gt_labels, eps=1e-7): """Assign gt to priors using SimOTA. TODO: add comment. Returns: assign_result: The assigned result. """ num_gt = gt_bboxes.shape[0] num_bboxes = flatten_bboxes.shape[0] if num_gt == 0 or num_bboxes == 0: # No ground truth or boxes label = np.ones([num_bboxes], dtype=np.int64) * self.num_classes label_weight = np.ones([num_bboxes], dtype=np.float32) bbox_target = np.zeros_like(flatten_center_and_stride) return 0, label, label_weight, bbox_target is_in_gts_or_centers_all, is_in_gts_or_centers_all_inds, is_in_boxes_and_center = self.get_in_gt_and_in_center_info( flatten_center_and_stride, gt_bboxes) # bboxes and scores to calculate matrix valid_flatten_bboxes = flatten_bboxes[is_in_gts_or_centers_all_inds] valid_cls_pred_scores = flatten_cls_pred_scores[ is_in_gts_or_centers_all_inds] num_valid_bboxes = valid_flatten_bboxes.shape[0] pairwise_ious = batch_bbox_overlaps(valid_flatten_bboxes, gt_bboxes) # [num_points,num_gts] if self.use_vfl: gt_vfl_labels = gt_labels.squeeze(-1).unsqueeze(0).tile( [num_valid_bboxes, 1]).reshape([-1]) valid_pred_scores = valid_cls_pred_scores.unsqueeze(1).tile( [1, num_gt, 1]).reshape([-1, self.num_classes]) vfl_score = np.zeros(valid_pred_scores.shape) vfl_score[np.arange(0, vfl_score.shape[0]), gt_vfl_labels.numpy()] = pairwise_ious.reshape([-1]) vfl_score = paddle.to_tensor(vfl_score) losses_vfl = varifocal_loss(valid_pred_scores, vfl_score, use_sigmoid=False).reshape( [num_valid_bboxes, num_gt]) losses_giou = batch_bbox_overlaps(valid_flatten_bboxes, gt_bboxes, mode='giou') cost_matrix = ( losses_vfl * self.cls_weight + losses_giou * self.iou_weight + paddle.logical_not(is_in_boxes_and_center).cast('float32') * 100000000) else: iou_cost = -paddle.log(pairwise_ious + eps) gt_onehot_label = (F.one_hot( gt_labels.squeeze(-1).cast(paddle.int64), flatten_cls_pred_scores.shape[-1]).cast('float32').unsqueeze( 0).tile([num_valid_bboxes, 1, 1])) valid_pred_scores = valid_cls_pred_scores.unsqueeze(1).tile( [1, num_gt, 1]) cls_cost = F.binary_cross_entropy(valid_pred_scores, gt_onehot_label, reduction='none').sum(-1) cost_matrix = ( cls_cost * self.cls_weight + iou_cost * self.iou_weight + paddle.logical_not(is_in_boxes_and_center).cast('float32') * 100000000) match_gt_inds_to_fg, match_fg_mask_inmatrix = \ self.dynamic_k_matching( cost_matrix, pairwise_ious, num_gt) # sample and assign results assigned_gt_inds = np.zeros([num_bboxes], dtype=np.int64) match_fg_mask_inall = np.zeros_like(assigned_gt_inds) match_fg_mask_inall[ is_in_gts_or_centers_all.numpy()] = match_fg_mask_inmatrix assigned_gt_inds[match_fg_mask_inall.astype( np.bool)] = match_gt_inds_to_fg + 1 pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds \ = self.get_sample(assigned_gt_inds, gt_bboxes.numpy()) bbox_target = np.zeros_like(flatten_bboxes) bbox_weight = np.zeros_like(flatten_bboxes) label = np.ones([num_bboxes], dtype=np.int64) * self.num_classes label_weight = np.zeros([num_bboxes], dtype=np.float32) if len(pos_inds) > 0: gt_labels = gt_labels.numpy() pos_bbox_targets = pos_gt_bboxes bbox_target[pos_inds, :] = pos_bbox_targets bbox_weight[pos_inds, :] = 1.0 if not np.any(gt_labels): label[pos_inds] = 0 else: label[pos_inds] = gt_labels.squeeze(-1)[pos_assigned_gt_inds] label_weight[pos_inds] = 1.0 if len(neg_inds) > 0: label_weight[neg_inds] = 1.0 pos_num = max(pos_inds.size, 1) return pos_num, label, label_weight, bbox_target
def get_loss(self, head_outs, targets): pred_cls, pred_bboxes, pred_obj,\ anchor_points, stride_tensor, num_anchors_list = head_outs gt_labels = targets['gt_class'] gt_bboxes = targets['gt_bbox'] pred_scores = (pred_cls * pred_obj).sqrt() # label assignment center_and_strides = paddle.concat( [anchor_points, stride_tensor, stride_tensor], axis=-1) pos_num_list, label_list, bbox_target_list = [], [], [] for pred_score, pred_bbox, gt_box, gt_label in zip( pred_scores.detach(), pred_bboxes.detach() * stride_tensor, gt_bboxes, gt_labels): pos_num, label, _, bbox_target = self.assigner( pred_score, center_and_strides, pred_bbox, gt_box, gt_label) pos_num_list.append(pos_num) label_list.append(label) bbox_target_list.append(bbox_target) labels = paddle.to_tensor(np.stack(label_list, axis=0)) bbox_targets = paddle.to_tensor(np.stack(bbox_target_list, axis=0)) bbox_targets /= stride_tensor # rescale bbox # 1. obj score loss mask_positive = (labels != self.num_classes) loss_obj = F.binary_cross_entropy(pred_obj, mask_positive.astype( pred_obj.dtype).unsqueeze(-1), reduction='sum') num_pos = sum(pos_num_list) if num_pos > 0: num_pos = paddle.to_tensor(num_pos, dtype=self._dtype).clip(min=1) loss_obj /= num_pos # 2. iou loss bbox_mask = mask_positive.unsqueeze(-1).tile([1, 1, 4]) pred_bboxes_pos = paddle.masked_select(pred_bboxes, bbox_mask).reshape([-1, 4]) assigned_bboxes_pos = paddle.masked_select( bbox_targets, bbox_mask).reshape([-1, 4]) bbox_iou = bbox_overlaps(pred_bboxes_pos, assigned_bboxes_pos) bbox_iou = paddle.diag(bbox_iou) loss_iou = self.iou_loss(pred_bboxes_pos.split(4, axis=-1), assigned_bboxes_pos.split(4, axis=-1)) loss_iou = loss_iou.sum() / num_pos # 3. cls loss cls_mask = mask_positive.unsqueeze(-1).tile( [1, 1, self.num_classes]) pred_cls_pos = paddle.masked_select(pred_cls, cls_mask).reshape( [-1, self.num_classes]) assigned_cls_pos = paddle.masked_select(labels, mask_positive) assigned_cls_pos = F.one_hot(assigned_cls_pos, self.num_classes + 1)[..., :-1] assigned_cls_pos *= bbox_iou.unsqueeze(-1) loss_cls = F.binary_cross_entropy(pred_cls_pos, assigned_cls_pos, reduction='sum') loss_cls /= num_pos # 4. l1 loss if targets['epoch_id'] >= self.l1_epoch: loss_l1 = F.l1_loss(pred_bboxes_pos, assigned_bboxes_pos, reduction='sum') loss_l1 /= num_pos else: loss_l1 = paddle.zeros([1]) loss_l1.stop_gradient = False else: loss_cls = paddle.zeros([1]) loss_iou = paddle.zeros([1]) loss_l1 = paddle.zeros([1]) loss_cls.stop_gradient = False loss_iou.stop_gradient = False loss_l1.stop_gradient = False loss = self.loss_weight['obj'] * loss_obj + \ self.loss_weight['cls'] * loss_cls + \ self.loss_weight['iou'] * loss_iou if targets['epoch_id'] >= self.l1_epoch: loss += (self.loss_weight['l1'] * loss_l1) yolox_losses = { 'loss': loss, 'loss_cls': loss_cls, 'loss_obj': loss_obj, 'loss_iou': loss_iou, 'loss_l1': loss_l1, } return yolox_losses
def yolov3_loss(self, p, t, gt_box, anchor, downsample, scale=1., eps=1e-10): na = len(anchor) b, c, h, w = p.shape if self.iou_aware_loss: ioup, p = p[:, 0:na, :, :], p[:, na:, :, :] ioup = ioup.unsqueeze(-1) p = p.reshape((b, na, -1, h, w)).transpose((0, 1, 3, 4, 2)) x, y = p[:, :, :, :, 0:1], p[:, :, :, :, 1:2] w, h = p[:, :, :, :, 2:3], p[:, :, :, :, 3:4] obj, pcls = p[:, :, :, :, 4:5], p[:, :, :, :, 5:] self.distill_pairs.append([x, y, w, h, obj, pcls]) t = t.transpose((0, 1, 3, 4, 2)) tx, ty = t[:, :, :, :, 0:1], t[:, :, :, :, 1:2] tw, th = t[:, :, :, :, 2:3], t[:, :, :, :, 3:4] tscale = t[:, :, :, :, 4:5] tobj, tcls = t[:, :, :, :, 5:6], t[:, :, :, :, 6:] tscale_obj = tscale * tobj loss = dict() x = scale * F.sigmoid(x) - 0.5 * (scale - 1.) y = scale * F.sigmoid(y) - 0.5 * (scale - 1.) if abs(scale - 1.) < eps: loss_x = F.binary_cross_entropy(x, tx, reduction='none') loss_y = F.binary_cross_entropy(y, ty, reduction='none') loss_xy = tscale_obj * (loss_x + loss_y) else: loss_x = paddle.abs(x - tx) loss_y = paddle.abs(y - ty) loss_xy = tscale_obj * (loss_x + loss_y) loss_xy = loss_xy.sum([1, 2, 3, 4]).mean() loss_w = paddle.abs(w - tw) loss_h = paddle.abs(h - th) loss_wh = tscale_obj * (loss_w + loss_h) loss_wh = loss_wh.sum([1, 2, 3, 4]).mean() loss['loss_xy'] = loss_xy loss['loss_wh'] = loss_wh if self.iou_loss is not None: # warn: do not modify x, y, w, h in place box, tbox = [x, y, w, h], [tx, ty, tw, th] pbox = bbox_transform(box, anchor, downsample) gbox = bbox_transform(tbox, anchor, downsample) loss_iou = self.iou_loss(pbox, gbox) loss_iou = loss_iou * tscale_obj loss_iou = loss_iou.sum([1, 2, 3, 4]).mean() loss['loss_iou'] = loss_iou if self.iou_aware_loss is not None: box, tbox = [x, y, w, h], [tx, ty, tw, th] pbox = bbox_transform(box, anchor, downsample) gbox = bbox_transform(tbox, anchor, downsample) loss_iou_aware = self.iou_aware_loss(ioup, pbox, gbox) loss_iou_aware = loss_iou_aware * tobj loss_iou_aware = loss_iou_aware.sum([1, 2, 3, 4]).mean() loss['loss_iou_aware'] = loss_iou_aware box = [x, y, w, h] loss_obj = self.obj_loss(box, gt_box, obj, tobj, anchor, downsample) loss_obj = loss_obj.sum(-1).mean() loss['loss_obj'] = loss_obj loss_cls = self.cls_loss(pcls, tcls) * tobj loss_cls = loss_cls.sum([1, 2, 3, 4]).mean() loss['loss_cls'] = loss_cls return loss
def get_assignments( self, N, A, G, gt_bboxes, gt_classes, bbox_preds, expanded_strides, x_shifts, y_shifts, cls_preds, obj_preds, is_gt, ): # 4-2.get_assignments()确定正负样本,里面的张量不需要梯度。 # 4-2-1.确定 候选正样本。 # is_in_boxes_or_center。 [N, A] 每个格子是否是在 任意gt内部 或 任意gt的镜像gt内部(不要求同一个gt) # 值为1处的格子可以叫做“候选正样本” # is_in_boxes_and_center。 [N, G, A] 每个格子是否是在 某个gt内部 且 这个gt的镜像gt内部(要求同一个gt) # 每个格子持有G个值,G个值中若至少有1个值为1,不难证明,这个格子其实也是“候选正样本”中的某个。 # is_in_boxes_and_center的作用是 用来帮助确定 某些高质量的候选正样本 成为最终正样本。 # 因为若某个格子既在gt内又在这个gt的镜像gt内时,它就更应该负责去学习这个gt。 is_in_boxes_or_center, is_in_boxes_and_center = self.get_in_boxes_info( gt_bboxes, expanded_strides, x_shifts, y_shifts, A, G, ) ''' gt_bboxes [N, G, 4] bbox_preds [N, A, 4] ''' # 4-2-2.计算每张图片 所有gt 和 所有预测框 两两之间的iou 的cost,iou越大cost越小,越有可能成为最终正样本。 pair_wise_ious = bboxes_iou_batch(gt_bboxes, bbox_preds, False) # [N, G, A] 两两之间的iou。 # 假gt 和 任意预测框 的iou置为0 pair_wise_ious *= is_gt.unsqueeze(2) # 非候选正样本 和 任意gt 的iou置为0。因为只有候选正样本才有资格成为最终的正样本。 pair_wise_ious *= is_in_boxes_or_center.unsqueeze(1) pair_wise_ious_loss = -paddle.log( pair_wise_ious + 1e-8) # [N, G, A] iou取对数再取相反数。 # 假gt 和 任意预测框 的ious_cost放大 pair_wise_ious_loss += (1.0 - is_gt.unsqueeze(2)) * 100000.0 # 非候选正样本 和 任意gt 的ious_cost放大 pair_wise_ious_loss += (1.0 - is_in_boxes_or_center.unsqueeze(1)) * 100000.0 # 4-2-3.计算每张图片 所有gt 和 所有预测框 两两之间的cls 的cost,cost越小,越有可能成为最终正样本。 p1 = cls_preds.unsqueeze(1) # [N, 1, A, 80] p2 = obj_preds.unsqueeze(1) # [N, 1, A, 1] p = F.sigmoid(p1) * F.sigmoid(p2) # [N, 1, A, 80] 各类别分数 p = paddle.tile(p, [1, G, 1, 1]) # [N, G, A, 80] 各类别分数 p = paddle.sqrt(p) # [N, G, A, 80] 各类别分数开根号求平均 # 获得N*G个gt的one_hot类别向量,每个候选正样本持有一个。 gt_classes = paddle.reshape(gt_classes, (N * G, )) # [N*G, ] gt_classes = paddle.cast(gt_classes, 'int32') # [N*G, ] one_hots = F.one_hot(gt_classes, num_classes=self.num_classes) # [N*G, 80] one_hots = paddle.reshape(one_hots, (N, G, 1, self.num_classes)) # [N, G, 1, 80] one_hots = paddle.tile(one_hots, [1, 1, A, 1]) # [N, G, A, 80] gt_clss = one_hots # 二值交叉熵 # pos_loss = gt_clss * (0 - paddle.log(p + 1e-9)) # [N, G, A, 80] # neg_loss = (1.0 - gt_clss) * (0 - paddle.log(1 - p + 1e-9)) # [N, G, A, 80] # pair_wise_cls_loss = pos_loss + neg_loss # [N, G, A, 80] # del pos_loss, neg_loss, p, gt_clss, one_hots # 二值交叉熵 pair_wise_cls_loss = F.binary_cross_entropy( p, gt_clss, reduction='none') # [N, G, A, 80] del p, gt_clss, one_hots pair_wise_cls_loss = pair_wise_cls_loss.sum( -1) # [N, G, A] cost越小,越有可能成为最终正样本。 # 假gt 和 任意预测框 的cls_cost放大 pair_wise_cls_loss += (1.0 - is_gt.unsqueeze(2)) * 100000.0 # 非候选正样本 和 任意gt 的cls_cost放大 pair_wise_cls_loss += (1.0 - is_in_boxes_or_center.unsqueeze(1)) * 100000.0 # 4-2-4.计算每张图片 所有gt 和 所有预测框 两两之间的 总的cost,cost越小,越有可能成为最终正样本。 # is_in_boxes_and_center的作用是 用来帮助确定 某些高质量的候选正样本 成为最终正样本。 # 因为若某个格子既在gt内又在这个gt的镜像gt内时,它就更应该负责去学习这个gt。 # is_in_boxes_and_center是1,cost越小,对应格子越有可能成为最终正样本,学习的是为1处的那个gt。 # is_in_boxes_and_center是0,cost越大,对应格子越不可能成为最终正样本。 cost = (pair_wise_cls_loss + 3.0 * pair_wise_ious_loss + 100000.0 * (1.0 - is_in_boxes_and_center)) # [N, G, A] # 4-2-5.根据cost从 候选正样本 中 确定 最终正样本。 ( num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds, fg_mask, ) = self.dynamic_k_matching(cost, pair_wise_ious, gt_classes, N, G, A, is_in_boxes_or_center, is_gt) del cost, pair_wise_cls_loss, pair_wise_ious_loss, is_in_boxes_and_center return ( gt_matched_classes, fg_mask, pred_ious_this_matching, matched_gt_inds, num_fg, )