Пример #1
0
    def get_pred_iou_lossv1(self):
        '''
        使用预测的bboxes与gtbboxes的iou作为目标
        :return: 
        '''
        with tf.name_scope("get_pred_iouv1_loss"):
            gt_proposal_deltas = wmlt.batch_gather(
                self.proposals.gt_boxes, tf.nn.relu(self.proposals.indices))
            batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape(
                gt_proposal_deltas)
            gt_proposal_deltas = tf.reshape(gt_proposal_deltas,
                                            [batch_size * box_nr, box_dim])
            proposal_bboxes = tf.reshape(self.proposals.boxes,
                                         [batch_size * box_nr, box_dim])
            cls_agnostic_bbox_reg = self.pred_proposal_deltas.get_shape(
            ).as_list()[-1] == box_dim
            num_classes = self.pred_class_logits.get_shape().as_list()[-1]
            fg_num_classes = num_classes - 1
            pred_iou_logits = self.pred_iou_logits

            fg_inds = tf.greater(self.gt_classes, 0)
            gt_proposal_deltas = tf.boolean_mask(gt_proposal_deltas, fg_inds)
            pred_proposal_deltas = tf.boolean_mask(self.pred_proposal_deltas,
                                                   fg_inds)
            proposal_bboxes = tf.boolean_mask(proposal_bboxes, fg_inds)
            gt_logits_i = tf.boolean_mask(self.gt_classes, fg_inds)
            pred_iou_logits_pos = tf.reshape(
                tf.boolean_mask(pred_iou_logits, fg_inds), [-1])
            pred_iou_logits_neg = tf.reshape(
                tf.boolean_mask(pred_iou_logits, tf.logical_not(fg_inds)),
                [-1])
            if not cls_agnostic_bbox_reg:
                pred_proposal_deltas = tf.reshape(
                    pred_proposal_deltas, [-1, fg_num_classes, box_dim])
                pred_proposal_deltas = wmlt.select_2thdata_by_index_v2(
                    pred_proposal_deltas, gt_logits_i - 1)

            pred_bboxes = self.box2box_transform.apply_deltas(
                pred_proposal_deltas, boxes=proposal_bboxes)
            loss_box_reg = odl.giou(pred_bboxes, gt_proposal_deltas)
            loss_box_reg = tf.stop_gradient(loss_box_reg)
            loss_pos = wnn.sigmoid_cross_entropy_with_logits_FL(
                labels=loss_box_reg, logits=pred_iou_logits_pos)
            loss_pos = tf.reduce_mean(loss_pos)
            loss_neg = wnn.sigmoid_cross_entropy_with_logits_FL(
                labels=tf.zeros_like(pred_iou_logits_neg),
                logits=pred_iou_logits_neg)
            loss_neg = tf.reduce_mean(loss_neg) * 0.5
            tf.summary.scalar("iou_pos_loss", loss_pos)
            tf.summary.scalar("iou_neg_loss", loss_neg)

            loss = loss_pos + loss_neg

        return loss
Пример #2
0
def rpn_losses_giou(
    gt_objectness_logits,
    gt_anchor_deltas,
    pred_objectness_logits,
    pred_anchor_deltas,
):
    reg_loss_sum = 1.0 - odl.giou(pred_anchor_deltas, gt_anchor_deltas)
    localization_loss = tf.reduce_sum(reg_loss_sum)

    objectness_loss = tf.losses.sigmoid_cross_entropy(
        logits=tf.expand_dims(pred_objectness_logits,1),
        multi_class_labels=tf.cast(tf.expand_dims(gt_objectness_logits,axis=1),tf.float32),
        reduction=tf.losses.Reduction.SUM,
        loss_collection=None
    )
    return objectness_loss, localization_loss
Пример #3
0
    def regression_losses(self):
        """
        Args:
            For `gt_classes` and `gt_anchors_deltas` parameters, see
                :meth:`RetinaNetGIou.get_ground_truth`.
            Their shapes are (N, R) and (N, R, 4), respectively, where R is
            the total number of anchors across levels, i.e. sum(Hi x Wi x A)
            For `pred_class_logits` and `pred_anchor_deltas`, see
                :meth:`RetinaNetGIouHead.forward`.

        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a scalar tensor
                storing the loss. Used during training only. The dict keys are:
                "loss_cls" and "loss_box_reg"
        """

        assert len(self.pred_logits[0].get_shape()) == 4, "error logits dim"
        assert len(self.pred_anchor_deltas[0].get_shape()) == 4, "error anchors dim"

        gt_classes, gt_anchors_deltas = self._get_regression_ground_truth()
        pred_class_logits, pred_anchor_deltas = permute_all_cls_and_box_to_N_HWA_K_and_concat(
            self.pred_logits, self.pred_anchor_deltas, self.num_classes
        )  # Shapes: (N, R, K) and (N, R, 4), respectively.

        foreground_idxs = (gt_classes > 0)
        num_foreground = tf.reduce_sum(tf.cast(foreground_idxs, tf.int32))

        # regression loss
        pred_anchor_deltas = tf.boolean_mask(pred_anchor_deltas, foreground_idxs)
        gt_anchors_deltas = tf.boolean_mask(gt_anchors_deltas, foreground_idxs)
        B, X = wmlt.combined_static_and_dynamic_shape(foreground_idxs)
        anchors = tf.tile(self.anchors, [B, 1, 1])
        anchors = tf.boolean_mask(anchors, foreground_idxs)
        box = self.box2box_transform.apply_deltas(pred_anchor_deltas, anchors)
        reg_loss_sum = 1.0 - odl.giou(box, gt_anchors_deltas)
        loss_box_reg = tf.reduce_sum(reg_loss_sum) / tf.cast(tf.maximum(1, num_foreground), tf.float32)
        loss_box_reg = loss_box_reg * self.cfg.BOX_REG_LOSS_SCALE

        return loss_box_reg
Пример #4
0
    def losses(self):
        """
        Args:
            For `gt_classes` and `gt_anchors_deltas` parameters, see
                :meth:`FCOSGIou.get_ground_truth`.
            Their shapes are (N, R) and (N, R, 4), respectively, where R is
            the total number of anchors across levels, i.e. sum(Hi x Wi x A)
            For `pred_class_logits` and `pred_anchor_deltas`, see
                :meth:`FCOSGIouHead.forward`.

        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a scalar tensor
                storing the loss. Used during training only. The dict keys are:
                "loss_cls" and "loss_box_reg"
        """

        assert len(self.pred_logits[0].get_shape()) == 4, "error logits dim"

        gt_results = self._get_ground_truth()
        loss_cls_list = []
        loss_regression_list = []
        loss_center_ness_list = []
        total_num_foreground = []

        img_size = tf.shape(self.batched_inputs[IMAGE])[1:3]

        for i, gt_results_item in enumerate(gt_results):
            gt_classes = gt_results_item['g_classes']
            gt_boxes = gt_results_item['g_boxes']
            g_center_ness = gt_results_item['g_center_ness']
            pred_class_logits = self.pred_logits[i]
            pred_regression = self.pred_regression[i]
            pred_center_ness = self.pred_center_ness[i]

            foreground_idxs = (gt_classes > 0)
            num_foreground = tf.reduce_sum(tf.cast(foreground_idxs, tf.int32))
            total_num_foreground.append(num_foreground)

            gt_classes_target = tf.one_hot(gt_classes,
                                           depth=self.num_classes + 1)
            gt_classes_target = gt_classes_target[..., 1:]

            #
            pred_center_ness = tf.expand_dims(pred_center_ness, axis=-1)
            wsummary.histogram_or_scalar(pred_center_ness, "center_ness")
            # logits loss
            loss_cls = tf.reduce_sum(
                wnn.sigmoid_cross_entropy_with_logits_FL(
                    labels=gt_classes_target,
                    logits=pred_class_logits,
                    alpha=self.focal_loss_alpha,
                    gamma=self.focal_loss_gamma))

            # regression loss
            pred_boxes = self.box2box_transform.apply_deltas(
                regression=pred_regression, img_size=img_size)
            if global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG and gt_classes.get_shape(
            ).as_list()[0] > 1:
                log_boxes = self.box2box_transform.apply_deltas(
                    regression=gt_results_item['g_regression'],
                    img_size=img_size)
                log_boxes = odbox.tfabsolutely_boxes_to_relative_boxes(
                    log_boxes, width=img_size[1], height=img_size[0])
                boxes1 = tf.reshape(log_boxes[1:2], [1, -1, 4])
                wsummary.detection_image_summary(
                    images=self.batched_inputs[IMAGE][1:2],
                    boxes=boxes1,
                    name="FCOSGIou_decode_test")
            pred_center_ness = tf.boolean_mask(pred_center_ness,
                                               foreground_idxs)
            g_center_ness = tf.boolean_mask(g_center_ness, foreground_idxs)
            pred_boxes = tf.boolean_mask(pred_boxes, foreground_idxs)
            gt_boxes = tf.boolean_mask(gt_boxes, foreground_idxs)
            wsummary.histogram_or_scalar(pred_center_ness, "center_ness_pos")
            reg_loss_sum = (1.0 - odl.giou(pred_boxes, gt_boxes))
            wmlt.variable_summaries_v2(reg_loss_sum, f"giou_loss{i}")
            pred_center_ness = tf.squeeze(pred_center_ness, axis=-1)
            reg_norm = tf.reduce_sum(g_center_ness) + 1e-5
            reg_loss_sum = reg_loss_sum * g_center_ness
            wmlt.variable_summaries_v2(reg_loss_sum, f"loss_sum{i}")
            loss_box_reg = tf.reduce_sum(reg_loss_sum) * 300 / reg_norm
            wmlt.variable_summaries_v2(loss_box_reg, f"box_reg_loss_{i}")

            loss_center_ness = 0.5 * tf.nn.sigmoid_cross_entropy_with_logits(
                labels=g_center_ness, logits=pred_center_ness)
            loss_center_ness = tf.reduce_sum(loss_center_ness) * 0.1
            wmlt.variable_summaries_v2(loss_center_ness,
                                       f"center_ness_loss{i}")

            loss_cls_list.append(loss_cls)
            loss_regression_list.append(loss_box_reg)
            loss_center_ness_list.append(loss_center_ness)

        total_num_foreground = tf.to_float(
            tf.maximum(tf.add_n(total_num_foreground), 1))
        return {
            "fcos_loss_cls":
            tf.add_n(loss_cls_list) / total_num_foreground,
            "fcos_loss_center_ness":
            tf.add_n(loss_center_ness_list) / total_num_foreground,
            "fcos_loss_box_reg":
            tf.add_n(loss_regression_list) / total_num_foreground
        }
Пример #5
0
    def losses(self):
        """
        Args:
            For `gt_classes` and `gt_anchors_deltas` parameters, see
                :meth:`RetinaNetGIou.get_ground_truth`.
            Their shapes are (N, R) and (N, R, 4), respectively, where R is
            the total number of anchors across levels, i.e. sum(Hi x Wi x A)
            For `pred_class_logits` and `pred_anchor_deltas`, see
                :meth:`RetinaNetGIouHead.forward`.

        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a scalar tensor
                storing the loss. Used during training only. The dict keys are:
                "loss_cls" and "loss_box_reg"
        """

        assert len(self.pred_logits[0].get_shape()) == 4, "error logits dim"
        assert len(
            self.pred_anchor_deltas[0].get_shape()) == 4, "error anchors dim"

        gt_classes, gt_anchors_deltas = self._get_ground_truth()
        pred_class_logits, pred_anchor_deltas = permute_all_cls_and_box_to_N_HWA_K_and_concat(
            self.pred_logits, self.pred_anchor_deltas,
            self.num_classes)  # Shapes: (N, R, K) and (N, R, 4), respectively.

        valid_idxs = gt_classes >= 0
        foreground_idxs = (gt_classes > 0)
        num_foreground = tf.reduce_sum(tf.cast(foreground_idxs, tf.int32))

        gt_classes_target = tf.boolean_mask(gt_classes, valid_idxs)
        gt_classes_target = tf.one_hot(gt_classes_target,
                                       depth=self.num_classes + 1)
        gt_classes_target = gt_classes_target[:,
                                              1:]  #RetinaNetGIou中没有背景, 因为背景index=0, 所以要在one hot 后去掉背景
        pred_class_logits = tf.boolean_mask(pred_class_logits, valid_idxs)

        # logits loss
        loss_cls = tf.reduce_sum(
            wnn.sigmoid_cross_entropy_with_logits_FL(
                labels=gt_classes_target,
                logits=pred_class_logits,
                alpha=self.focal_loss_alpha,
                gamma=self.focal_loss_gamma,
            )) / tf.cast(tf.maximum(1, num_foreground), tf.float32)

        # regression loss
        pred_anchor_deltas = tf.boolean_mask(pred_anchor_deltas,
                                             foreground_idxs)
        gt_anchors_deltas = tf.boolean_mask(gt_anchors_deltas, foreground_idxs)
        B, X = wmlt.combined_static_and_dynamic_shape(foreground_idxs)
        anchors = tf.tile(self.anchors, [B, 1, 1])
        anchors = tf.boolean_mask(anchors, foreground_idxs)
        box = self.box2box_transform.apply_deltas(pred_anchor_deltas, anchors)
        reg_loss_sum = 1.0 - odl.giou(box, gt_anchors_deltas)
        loss_box_reg = tf.reduce_sum(reg_loss_sum) / tf.cast(
            tf.maximum(1, num_foreground), tf.float32)
        loss_cls = loss_cls * self.cfg.BOX_CLS_LOSS_SCALE
        loss_box_reg = loss_box_reg * self.cfg.BOX_REG_LOSS_SCALE

        return {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg}