Exemplo n.º 1
0
    def __loss_per_scale(self, name, conv, pred, label, bboxes, anchors,
                         stride):
        """
        :param name: loss的名字
        :param conv: conv是yolo卷积层的原始输出
        shape为(batch_size, output_size, output_size, anchor_per_scale * (5 + num_class))
        :param pred: conv是yolo输出的预测bbox的信息(x, y, w, h, conf, prob),
        其中(x, y, w, h)的大小是相对于input_size的,如input_size=416,(x, y, w, h) = (120, 200, 50, 70)
        shape为(batch_size, output_size, output_size, anchor_per_scale, 5 + num_class)
        :param label: shape为(batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes)
        只有best anchor对应位置的数据才为(x, y, w, h, 1, classes), (x, y, w, h)的大小是bbox纠正后的原始大小
        :param bboxes: shape为(batch_size, max_bbox_per_scale, 4),
        存储的坐标为(x, y, w, h),(x, y, w, h)的大小都是bbox纠正后的原始大小
        bboxes用于计算相应detector的预测框与该detector负责预测的所有bbox的IOU
        :param anchors: 相应detector的anchors
        :param stride: 相应detector的stride
        """
        with tf.name_scope(name):
            conv_shape = tf.shape(conv)
            batch_size = conv_shape[0]
            output_size = conv_shape[1]
            input_size = stride * output_size
            conv = tf.reshape(
                conv, (batch_size, output_size, output_size,
                       self.__anchor_per_scale, 5 + self.__num_classes))
            conv_raw_dxdy = conv[:, :, :, :, 0:2]
            conv_raw_dwdh = conv[:, :, :, :, 2:4]
            conv_raw_conf = conv[:, :, :, :, 4:5]
            conv_raw_prob = conv[:, :, :, :, 5:]

            pred_xywh = pred[:, :, :, :, 0:4]
            pred_conf = pred[:, :, :, :, 4:5]

            label_xy = label[:, :, :, :, 0:2]
            label_wh = label[:, :, :, :, 2:4]
            respond_bbox = label[:, :, :, :, 4:5]
            label_prob = label[:, :, :, :, 5:]

            # (1)计算xywh损失
            y = tf.tile(
                tf.range(output_size, dtype=tf.int32)[:, tf.newaxis],
                [1, output_size])
            x = tf.tile(
                tf.range(output_size, dtype=tf.int32)[tf.newaxis, :],
                [output_size, 1])
            xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]],
                                axis=-1)
            xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :],
                              [batch_size, 1, 1, self.__anchor_per_scale, 1])
            xy_grid = tf.cast(xy_grid, tf.float32)

            label_txty = 1.0 * label_xy / stride - xy_grid
            label_raw_twth = tf.log((1.0 * label_wh / stride) / anchors)
            label_raw_twth = tf.where(tf.is_inf(label_raw_twth),
                                      tf.zeros_like(label_raw_twth),
                                      label_raw_twth)

            input_size = tf.cast(input_size, tf.float32)
            bbox_loss_scale = 2.0 - 1.0 * label_wh[:, :, :, :, 0:
                                                   1] * label_wh[:, :, :, :,
                                                                 1:2] / (
                                                                     input_size
                                                                     **2)

            xy_loss = respond_bbox * bbox_loss_scale * \
                      tf.nn.sigmoid_cross_entropy_with_logits(labels=label_txty, logits=conv_raw_dxdy)
            wh_loss = 0.5 * respond_bbox * bbox_loss_scale * tf.square(
                label_raw_twth - conv_raw_dwdh)

            # (2)计算confidence损失
            iou = utils.iou_calc4(
                pred_xywh[:, :, :, :, np.newaxis, :],
                bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
            max_iou = tf.reduce_max(iou, axis=-1)
            max_iou = max_iou[:, :, :, :, np.newaxis]
            respond_bgd = (1.0 - respond_bbox) * tf.cast(
                max_iou < self.__iou_loss_thresh, tf.float32)

            conf_focal = self.__focal(respond_bbox, pred_conf)

            conf_loss = conf_focal * (
                respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=respond_bbox, logits=conv_raw_conf) +
                respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=respond_bbox, logits=conv_raw_conf))

            # (3)计算classes损失
            prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(
                labels=label_prob, logits=conv_raw_prob)
            loss = tf.concat([xy_loss, wh_loss, conf_loss, prob_loss], axis=-1)
            loss = tf.reduce_mean(tf.reduce_sum(loss, axis=[1, 2, 3, 4]))
            return loss
Exemplo n.º 2
0
    def __loss_per_scale(self, name, conv, pred, label, bboxes, anchors,
                         stride):
        """
        :param name: loss的名字
        :param conv: conv是yolo卷积层的原始输出
        shape为(batch_size, output_size, output_size, anchor_per_scale * (5 + num_class))
        :param pred: conv是yolo输出的预测bbox的信息(x, y, w, h, conf, prob),
        其中(x, y, w, h)的大小是相对于input_size的,如input_size=416,(x, y, w, h) = (120, 200, 50, 70)
        shape为(batch_size, output_size, output_size, anchor_per_scale, 5 + num_class)
        :param label: shape为(batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes)
        只有best anchor对应位置的数据才为(x, y, w, h, 1, classes), (x, y, w, h)的大小是bbox纠正后的原始大小
        :param bboxes: shape为(batch_size, max_bbox_per_scale, 4),
        存储的坐标为(x, y, w, h),(x, y, w, h)的大小都是bbox纠正后的原始大小
        bboxes用于计算相应detector的预测框与该detector负责预测的所有bbox的IOU
        :param anchors: 相应detector的anchors
        :param stride: 相应detector的stride
        """
        with tf.name_scope(name):
            conv_shape = tf.shape(conv)
            batch_size = conv_shape[0]
            output_size = conv_shape[1]
            input_size = stride * output_size
            conv = tf.reshape(
                conv, (batch_size, output_size, output_size,
                       self.__anchor_per_scale, 5 + self.__num_classes))
            conv_raw_conf = conv[:, :, :, :, 4:5]
            conv_raw_prob = conv[:, :, :, :, 5:]

            pred_xywh = pred[:, :, :, :, 0:4]
            pred_conf = pred[:, :, :, :, 4:5]

            label_xywh = label[:, :, :, :, 0:4]
            respond_bbox = label[:, :, :, :, 4:5]
            label_prob = label[:, :, :, :, 5:]

            GIOU = utils.GIOU(pred_xywh, label_xywh)
            GIOU = GIOU[..., np.newaxis]
            input_size = tf.cast(input_size, tf.float32)
            bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:
                                                     3] * label_xywh[:, :, :, :,
                                                                     3:4] / (
                                                                         input_size
                                                                         **2)
            GIOU_loss = respond_bbox * bbox_loss_scale * (1.0 - GIOU)

            # (2)计算confidence损失
            iou = utils.iou_calc4(
                pred_xywh[:, :, :, :, np.newaxis, :],
                bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
            max_iou = tf.reduce_max(iou, axis=-1)
            max_iou = max_iou[:, :, :, :, np.newaxis]
            respond_bgd = (1.0 - respond_bbox) * tf.cast(
                max_iou < self.__iou_loss_thresh, tf.float32)

            conf_focal = self.__focal(respond_bbox, pred_conf)

            conf_loss = conf_focal * (
                respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=respond_bbox, logits=conv_raw_conf) +
                respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=respond_bbox, logits=conv_raw_conf))

            # (3)计算classes损失
            prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(
                labels=label_prob, logits=conv_raw_prob)
            loss = tf.concat([GIOU_loss, conf_loss, prob_loss], axis=-1)
            loss = tf.reduce_mean(tf.reduce_sum(loss, axis=[1, 2, 3, 4]))
            return loss