def losses(self, gt_classes, gt_shifts_deltas, pred_class_logits, pred_shift_deltas): """ Args: For `gt_classes` and `gt_shifts_deltas` parameters, see :meth:`FCOS.get_ground_truth`. Their shapes are (N, R) and (N, R, 4), respectively, where R is the total number of shifts across levels, i.e. sum(Hi x Wi) For `pred_class_logits` and `pred_shift_deltas`, see :meth:`FCOSHead.forward`. Returns: dict[str: Tensor]: mapping from a named loss to a scalar tensor storing the loss. Used during training only. The dict keys are: "loss_cls" and "loss_box_reg" """ pred_class_logits, pred_shift_deltas = \ permute_all_cls_and_box_to_N_HWA_K_and_concat( pred_class_logits, pred_shift_deltas, self.num_classes ) # Shapes: (N x R, K) and (N x R, 4), respectively. gt_classes = gt_classes.flatten() gt_shifts_deltas = gt_shifts_deltas.view(-1, 4) valid_idxs = gt_classes >= 0 foreground_idxs = (gt_classes >= 0) & (gt_classes != self.num_classes) num_foreground = foreground_idxs.sum() gt_classes_target = torch.zeros_like(pred_class_logits) gt_classes_target[foreground_idxs, gt_classes[foreground_idxs]] = 1 num_foreground = comm.all_reduce(num_foreground) / float( comm.get_world_size()) # logits loss loss_cls = sigmoid_focal_loss_jit( pred_class_logits[valid_idxs], gt_classes_target[valid_idxs], alpha=self.focal_loss_alpha, gamma=self.focal_loss_gamma, reduction="sum", ) / max(1.0, num_foreground) # regression loss loss_box_reg = iou_loss( pred_shift_deltas[foreground_idxs], gt_shifts_deltas[foreground_idxs], box_mode="ltrb", loss_type=self.iou_loss_type, reduction="sum", ) / max(1.0, num_foreground) * self.reg_weight return { "loss_cls": loss_cls, "loss_box_reg": loss_box_reg, }
def losses(self, gt_classes, gt_anchors_deltas, pred_class_logits, pred_anchor_deltas): """ Args: For `gt_classes` and `gt_anchors_deltas` parameters, see :meth:`EfficientDet.get_ground_truth`. Their shapes are (N, R) and (N, R, 4), respectively, where R is the total number of anchors across levels, i.e. sum(Hi x Wi x A) For `pred_class_logits` and `pred_anchor_deltas`, see :meth:`EfficientDetHead.forward`. Returns: dict[str: Tensor]: mapping from a named loss to a scalar tensor storing the loss. Used during training only. The dict keys are: "loss_cls" and "loss_box_reg" """ pred_class_logits, pred_anchor_deltas = permute_all_cls_and_box_to_N_HWA_K_and_concat( pred_class_logits, pred_anchor_deltas, self.num_classes ) # Shapes: (N x R, K) and (N x R, 4), respectively. gt_classes = gt_classes.flatten() gt_anchors_deltas = gt_anchors_deltas.view(-1, 4) valid_idxs = gt_classes >= 0 foreground_idxs = (gt_classes >= 0) & (gt_classes != self.num_classes) num_foreground = foreground_idxs.sum() gt_classes_target = torch.zeros_like(pred_class_logits) gt_classes_target[foreground_idxs, gt_classes[foreground_idxs]] = 1 # Classification loss loss_cls = sigmoid_focal_loss_jit( pred_class_logits[valid_idxs], gt_classes_target[valid_idxs], alpha=self.focal_loss_alpha, gamma=self.focal_loss_gamma, reduction="sum", ) / max(1, num_foreground) # Regression loss, refer to the official released code. # See: https://github.com/google/automl/blob/master/efficientdet/det_model_fn.py loss_box_reg = self.box_loss_weight * self.smooth_l1_loss_beta * smooth_l1_loss( pred_anchor_deltas[foreground_idxs], gt_anchors_deltas[foreground_idxs], beta=self.smooth_l1_loss_beta, reduction="sum", ) / max(1, num_foreground * self.regress_norm) return {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg}
def losses(self, gt_classes, gt_anchors_deltas, pred_class_logits, pred_anchor_deltas): """ Args: For `gt_classes` and `gt_anchors_deltas` parameters, see :meth:`RetinaNet.get_ground_truth`. Their shapes are (N, R) and (N, R, 4), respectively, where R is the total number of anchors across levels, i.e. sum(Hi x Wi x A) For `pred_class_logits` and `pred_anchor_deltas`, see :meth:`RetinaNetHead.forward`. Returns: dict[str: Tensor]: mapping from a named loss to a scalar tensor storing the loss. Used during training only. The dict keys are: "loss_cls" and "loss_box_reg" """ pred_class_logits, pred_anchor_deltas = permute_all_cls_and_box_to_N_HWA_K_and_concat( pred_class_logits, pred_anchor_deltas, self.num_classes ) # Shapes: (N x R, K) and (N x R, 4), respectively. gt_classes = gt_classes.flatten() gt_anchors_deltas = gt_anchors_deltas.view(-1, 4) pos_inds = torch.nonzero((gt_classes >= 0) & (gt_classes != self.num_classes)).squeeze(1) retinanet_regression_loss = smooth_l1_loss( pred_anchor_deltas[pos_inds], gt_anchors_deltas[pos_inds], beta=self.smooth_l1_loss_beta, # size_average=False, reduction="sum", ) / max(1, pos_inds.numel() * self.regress_norm) labels = torch.ones_like(gt_classes) # convert labels from 0~79 to 1~80 labels[pos_inds] += gt_classes[pos_inds] labels[gt_classes == -1] = gt_classes[gt_classes == -1] labels[gt_classes == self.num_classes] = 0 labels = labels.int() retinanet_cls_loss = self.box_cls_loss_func(pred_class_logits, labels) return { "loss_cls": retinanet_cls_loss, "loss_box_reg": retinanet_regression_loss }
def losses( self, gt_class_info, gt_delta_info, gt_mask_info, num_fg, pred_logits, pred_deltas, pred_masks, ): """ Args: For `gt_class_info`, `gt_delta_info`, `gt_mask_info` and `num_fg` parameters, see :meth:`TensorMask.get_ground_truth`. For `pred_logits`, `pred_deltas` and `pred_masks`, see :meth:`TensorMaskHead.forward`. Returns: losses (dict[str: Tensor]): mapping from a named loss to a scalar tensor storing the loss. Used during training only. The potential dict keys are: "loss_cls", "loss_box_reg" and "loss_mask". """ gt_classes_target, gt_valid_inds = gt_class_info gt_deltas, gt_fg_inds = gt_delta_info gt_masks, gt_mask_inds = gt_mask_info loss_normalizer = torch.tensor(max(1, num_fg), dtype=torch.float32, device=self.device) # classification and regression pred_logits, pred_deltas = permute_all_cls_and_box_to_N_HWA_K_and_concat( pred_logits, pred_deltas, self.num_classes) loss_cls = (sigmoid_focal_loss_star_jit( pred_logits[gt_valid_inds], gt_classes_target[gt_valid_inds], alpha=self.focal_loss_alpha, gamma=self.focal_loss_gamma, reduction="sum", ) / loss_normalizer) if num_fg == 0: loss_box_reg = pred_deltas.sum() * 0 else: loss_box_reg = (smooth_l1_loss( pred_deltas[gt_fg_inds], gt_deltas, beta=0.0, reduction="sum") / loss_normalizer) losses = {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg} # mask prediction if self.mask_on: loss_mask = 0 for lvl in range(self.num_levels): cur_level_factor = 2**lvl if self.bipyramid_on else 1 for anc in range(self.num_anchors): cur_gt_mask_inds = gt_mask_inds[lvl][anc] if cur_gt_mask_inds is None: loss_mask += pred_masks[lvl][anc][0, 0, 0, 0] * 0 else: cur_mask_size = self.mask_sizes[anc] * cur_level_factor # TODO maybe there are numerical issues when mask sizes are large cur_size_divider = torch.tensor( self.mask_loss_weight / (cur_mask_size**2), dtype=torch.float32, device=self.device, ) cur_pred_masks = pred_masks[lvl][anc][ cur_gt_mask_inds[:, 0], # N :, # V x U cur_gt_mask_inds[:, 1], # H cur_gt_mask_inds[:, 2], # W ] loss_mask += F.binary_cross_entropy_with_logits( # V, U cur_pred_masks.view(-1, cur_mask_size, cur_mask_size), gt_masks[lvl][anc].to(dtype=torch.float32), reduction="sum", weight=cur_size_divider, pos_weight=self.mask_pos_weight, ) losses["loss_mask"] = loss_mask / loss_normalizer return losses