def compute_loss(self, input_tensor, binary_label, instance_label, name):
        """
        计算LaneNet模型损失函数
        :param input_tensor:
        :param binary_label:
        :param instance_label:
        :param name:
        :return:
        """
        with tf.variable_scope(name):
            # 前向传播获取logits
            inference_ret = self._build_model(input_tensor=input_tensor,
                                              name='inference')
            # 计算二值分割损失函数
            decode_logits = inference_ret['logits']
            binary_segmenatation_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=decode_logits,
                labels=tf.squeeze(binary_label, squeeze_dims=[3]),
                name='entropy_loss')
            binary_segmenatation_loss = tf.reduce_mean(
                binary_segmenatation_loss)
            # 计算discriminative loss损失函数
            decode_deconv = inference_ret['deconv']
            # 像素嵌入
            pix_embedding = self.conv2d(inputdata=decode_deconv,
                                        out_channel=3,
                                        kernel_size=1,
                                        use_bias=False,
                                        name='pix_embedding_conv')
            pix_embedding = self.relu(inputdata=pix_embedding,
                                      name='pix_embedding_relu')
            # 计算discriminative loss
            image_shape = (pix_embedding.get_shape().as_list()[1],
                           pix_embedding.get_shape().as_list()[2])
            disc_loss, l_var, l_dist, l_reg = \
                lanenet_discriminative_loss.discriminative_loss(
                    pix_embedding, instance_label, 3, image_shape, 0.5, 1.5, 1.0, 1.0, 0.001)

            # 合并损失
            total_loss = 0.5 * binary_segmenatation_loss + 0.5 * disc_loss

            ret = {
                'total_loss': total_loss,
                'binary_seg_logits': decode_logits,
                'instance_seg_logits': pix_embedding,
                'binary_seg_loss': binary_segmenatation_loss,
                'discriminative_loss': disc_loss
            }

            return ret
    def compute_loss(self, input_tensor, label, name):
        """
        计算损失函数
        :param input_tensor:
        :param label: 1D label image with different n lane with pix value from [1] to [n],
                      background pix value is [0]
        :param name:
        :return:
        """
        with tf.variable_scope(name):
            # 前向传播获取logits
            inference_ret = self.build_model(input_tensor=input_tensor,
                                             name='inference')
            # 计算损失
            decode_deconv = inference_ret['deconv']
            # 像素嵌入,如果要是灰度图像的话,这里的3要改成1吧。。。。。。。
            pix_embedding = self.conv2d(inputdata=decode_deconv,
                                        out_channel=3,
                                        kernel_size=1,
                                        use_bias=False,
                                        name='pix_embedding_conv')
            pix_embedding = self.relu(inputdata=pix_embedding,
                                      name='pix_embedding_relu')
            # 计算discriminative loss,如果要是灰度图像的话,这里的3要改成1吧。。。。。。。
            image_shape = (pix_embedding.get_shape().as_list()[1],
                           pix_embedding.get_shape().as_list()[2])
            disc_loss, l_var, l_dist, l_reg = \
                lanenet_discriminative_loss.discriminative_loss(
                    pix_embedding, label, 3, image_shape, 0.5, 1.5, 1.0, 1.0, 0.001)

            ret = {
                'total_loss': disc_loss,
                'loss_var': l_var,
                'loss_dist': l_dist,
                'loss_reg': l_reg,
                'binary_seg_logits': decode_deconv,
                'embedding': pix_embedding
            }

            return ret
Пример #3
0
    def compute_loss(self, binary_seg_logits, binary_label,
                     instance_seg_logits, instance_label, name, reuse):
        """
        compute lanenet loss
        :param binary_seg_logits:
        :param binary_label:
        :param instance_seg_logits:
        :param instance_label:
        :param name:
        :param reuse:
        :return:
        """
        with tf.variable_scope(name_or_scope=name, reuse=reuse):
            # calculate class weighted binary seg loss
            with tf.variable_scope(name_or_scope='binary_seg'):
                binary_label_onehot = tf.one_hot(tf.reshape(
                    tf.cast(binary_label, tf.int32),
                    shape=[
                        binary_label.get_shape().as_list()[0],
                        binary_label.get_shape().as_list()[1],
                        binary_label.get_shape().as_list()[2]
                    ]),
                                                 depth=CFG.TRAIN.CLASSES_NUMS,
                                                 axis=-1)

                binary_label_plain = tf.reshape(
                    binary_label,
                    shape=[
                        binary_label.get_shape().as_list()[0] *
                        binary_label.get_shape().as_list()[1] *
                        binary_label.get_shape().as_list()[2] *
                        binary_label.get_shape().as_list()[3]
                    ])
                unique_labels, unique_id, counts = tf.unique_with_counts(
                    binary_label_plain)
                counts = tf.cast(counts, tf.float32)
                inverse_weights = tf.divide(
                    1.0,
                    tf.log(
                        tf.add(tf.divide(counts, tf.reduce_sum(counts)),
                               tf.constant(1.02))))

                binary_segmenatation_loss = self._compute_class_weighted_cross_entropy_loss(
                    onehot_labels=binary_label_onehot,
                    logits=binary_seg_logits,
                    classes_weights=inverse_weights)

            # calculate class weighted instance seg loss
            with tf.variable_scope(name_or_scope='instance_seg'):

                pix_bn = tf.layers.batch_normalization(
                    inputs=instance_seg_logits,
                    training=self._is_training,
                    name='pix_bn')
                pix_relu = tf.nn.relu(pix_bn, name='pix_relu')
                pix_embedding = tf.layers.conv2d(
                    inputs=pix_relu,
                    filters=CFG.TRAIN.EMBEDDING_FEATS_DIMS,
                    padding='SAME',
                    kernel_size=1,
                    use_bias=False,
                    name='pix_embedding_conv')
                pix_image_shape = (pix_embedding.get_shape().as_list()[1],
                                   pix_embedding.get_shape().as_list()[2])
                instance_segmentation_loss, l_var, l_dist, l_reg = \
                    lanenet_discriminative_loss.discriminative_loss(
                        pix_embedding, instance_label, CFG.TRAIN.EMBEDDING_FEATS_DIMS,
                        pix_image_shape, 0.5, 3.0, 1.0, 1.0, 0.001
                    )

            l2_reg_loss = tf.constant(0.0, tf.float32)
            for vv in tf.trainable_variables():
                if 'bn' in vv.name or 'gn' in vv.name:
                    continue
                else:
                    l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
            l2_reg_loss *= 0.001
            total_loss = binary_segmenatation_loss + instance_segmentation_loss + l2_reg_loss

            ret = {
                'total_loss': total_loss,
                'binary_seg_logits': binary_seg_logits,
                'instance_seg_logits': pix_embedding,
                'binary_seg_loss': binary_segmenatation_loss,
                'discriminative_loss': instance_segmentation_loss
            }

        return ret
    def compute_loss(self, input_tensor, binary_label, instance_label, name):
        """
        计算LaneNet模型损失函数
        :param input_tensor:
        :param binary_label:
        :param instance_label:
        :param name:
        :return:
        """
        with tf.variable_scope(name):
            # 前向传播获取logits
            inference_ret = self._build_model(input_tensor=input_tensor,
                                              name='inference')
            decode_logits = inference_ret['logits']
            decode_deconv = inference_ret['deconv']

            # 计算二值分割损失函数
            binary_label_plain = tf.reshape(
                binary_label,
                shape=[
                    binary_label.get_shape().as_list()[0] *
                    binary_label.get_shape().as_list()[1] *
                    binary_label.get_shape().as_list()[2]
                ])
            # 加入class weights
            unique_labels, unique_id, counts = tf.unique_with_counts(
                binary_label_plain)
            counts = tf.cast(counts, tf.float32)
            inverse_weights = tf.divide(
                1.0,
                tf.log(
                    tf.add(tf.divide(tf.constant(1.0), counts),
                           tf.constant(1.02))))
            inverse_weights = tf.concat(
                [tf.constant([5.]), inverse_weights[1:]], axis=0)
            inverse_weights = tf.gather(inverse_weights, binary_label)
            binary_segmenatation_loss = tf.losses.sparse_softmax_cross_entropy(
                labels=binary_label,
                logits=decode_logits,
                weights=inverse_weights)
            binary_segmenatation_loss = tf.reduce_mean(
                binary_segmenatation_loss)

            # 计算discriminative loss损失函数
            # 像素嵌入
            pix_embedding = self.conv2d(inputdata=decode_deconv,
                                        out_channel=4,
                                        kernel_size=1,
                                        use_bias=False,
                                        name='pix_embedding_conv')
            pix_embedding = self.relu(inputdata=pix_embedding,
                                      name='pix_embedding_relu')
            # 计算discriminative loss
            image_shape = (pix_embedding.get_shape().as_list()[1],
                           pix_embedding.get_shape().as_list()[2])
            disc_loss, l_var, l_dist, l_reg = \
                lanenet_discriminative_loss.discriminative_loss(
                    pix_embedding, instance_label, 4, image_shape, 0.5, 3.0, 1.0, 1.0, 0.001)

            # 合并损失
            l2_reg_loss = tf.constant(0.0, tf.float32)
            for vv in tf.trainable_variables():
                if 'bn' in vv.name or 'batchnorm' in vv.name or 'batch_norm' in vv.name\
                    and 'alpha' in vv.name:
                    continue
                else:
                    l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
            l2_reg_loss *= 0.001
            total_loss = 0.5 * binary_segmenatation_loss + 0.5 * disc_loss + l2_reg_loss
            # total_loss = binary_segmenatation_loss

            ret = {
                'total_loss': total_loss,
                'binary_seg_logits': decode_logits,
                'instance_seg_logits': pix_embedding,
                'binary_seg_loss': binary_segmenatation_loss,
                'discriminative_loss': disc_loss
            }

            return ret
    def compute_loss(self, binary_seg_logits, binary_label,
                     instance_seg_logits, instance_label, name, reuse):
        """
        compute lanenet loss
        :param binary_seg_logits:
        :param binary_label:
        :param instance_seg_logits:
        :param instance_label:
        :param name:
        :param reuse:
        :return:
        """
        with tf.variable_scope(name_or_scope=name, reuse=reuse):
            # calculate class weighted binary seg loss
            with tf.variable_scope(name_or_scope='binary_seg'):
                binary_label_onehot = tf.one_hot(tf.reshape(
                    tf.cast(binary_label, tf.int32),
                    shape=[
                        binary_label.get_shape().as_list()[0],
                        binary_label.get_shape().as_list()[1],
                        binary_label.get_shape().as_list()[2]
                    ]),
                                                 depth=self._class_nums,
                                                 axis=-1)

                binary_label_plain = tf.reshape(
                    binary_label,
                    shape=[
                        binary_label.get_shape().as_list()[0] *
                        binary_label.get_shape().as_list()[1] *
                        binary_label.get_shape().as_list()[2] *
                        binary_label.get_shape().as_list()[3]
                    ])
                unique_labels, unique_id, counts = tf.unique_with_counts(
                    binary_label_plain)
                counts = tf.cast(counts, tf.float32)
                inverse_weights = tf.divide(
                    1.0,
                    tf.log(
                        tf.add(tf.divide(counts, tf.reduce_sum(counts)),
                               tf.constant(1.02))))
                if self._binary_loss_type == 'cross_entropy':
                    binary_segmenatation_loss = self._compute_class_weighted_cross_entropy_loss(
                        onehot_labels=binary_label_onehot,
                        logits=binary_seg_logits,
                        classes_weights=inverse_weights)
                elif self._binary_loss_type == 'focal':
                    binary_segmenatation_loss = self._multi_category_focal_loss(
                        onehot_labels=binary_label_onehot,
                        logits=binary_seg_logits,
                        classes_weights=inverse_weights)
                else:
                    raise NotImplementedError

            # calculate class weighted instance seg loss
            with tf.variable_scope(name_or_scope='instance_seg'):

                pix_bn = self.layerbn(inputdata=instance_seg_logits,
                                      is_training=self._is_training,
                                      name='pix_bn')
                pix_relu = self.relu(inputdata=pix_bn, name='pix_relu')
                pix_embedding = self.conv2d(inputdata=pix_relu,
                                            out_channel=self._embedding_dims,
                                            kernel_size=1,
                                            use_bias=False,
                                            name='pix_embedding_conv')
                pix_image_shape = (pix_embedding.get_shape().as_list()[1],
                                   pix_embedding.get_shape().as_list()[2])
                instance_segmentation_loss, l_var, l_dist, l_reg = \
                    lanenet_discriminative_loss.discriminative_loss(
                        pix_embedding, instance_label, self._embedding_dims,
                        pix_image_shape, 0.5, 3.0, 1.0, 1.0, 0.001
                    )

            l2_reg_loss = tf.constant(0.0, tf.float32)
            for vv in tf.trainable_variables():
                if 'bn' in vv.name or 'gn' in vv.name:
                    continue
                else:
                    l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
            l2_reg_loss *= 0.001
            total_loss = binary_segmenatation_loss + instance_segmentation_loss + l2_reg_loss

            ret = {
                'total_loss': total_loss,
                'binary_seg_logits': binary_seg_logits,
                'instance_seg_logits': pix_embedding,
                'binary_seg_loss': binary_segmenatation_loss,
                'discriminative_loss': instance_segmentation_loss
            }

        return ret
    def compute_loss(self, input_tensor, binary_label, instance_label, name):
        """
        计算LaneNet模型损失函数
        :param input_tensor:
        :param binary_label:
        :param instance_label:
        :param name:
        :return:
        """
        with tf.variable_scope(name):
            # 前向传播获取logits
            inference_ret = self._build_model(input_tensor=input_tensor, name='inference')
            # 计算二值分割损失函数
            decode_logits = inference_ret['logits']
            binary_label_plain = tf.reshape(
                binary_label,
                shape=[binary_label.get_shape().as_list()[0] *
                       binary_label.get_shape().as_list()[1] *
                       binary_label.get_shape().as_list()[2]])
            # 加入class weights
            unique_labels, unique_id, counts = tf.unique_with_counts(binary_label_plain)
            counts = tf.cast(counts, tf.float32)
            inverse_weights = tf.divide(1.0,
                                        tf.log(tf.add(tf.divide(tf.constant(1.0), counts),
                                                      tf.constant(1.02))))
            inverse_weights = tf.gather(inverse_weights, binary_label)
            binary_segmenatation_loss = tf.losses.sparse_softmax_cross_entropy(
                labels=binary_label, logits=decode_logits, weights=inverse_weights)
            binary_segmenatation_loss = tf.reduce_mean(binary_segmenatation_loss)

            # 计算discriminative loss损失函数
            decode_deconv = inference_ret['deconv']
            # 像素嵌入
            pix_embedding = self.conv2d(inputdata=decode_deconv, out_channel=4, kernel_size=1,
                                        use_bias=False, name='pix_embedding_conv')
            pix_embedding = self.relu(inputdata=pix_embedding, name='pix_embedding_relu')
            # 计算discriminative loss
            image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2])
            disc_loss, l_var, l_dist, l_reg = \
                lanenet_discriminative_loss.discriminative_loss(
                    pix_embedding, instance_label, 4, image_shape, 0.5, 3.0, 1.0, 1.0, 0.001)

            # 合并损失
            l2_reg_loss = tf.constant(0.0, tf.float32)
            for vv in tf.trainable_variables():
                if 'bn' in vv.name:
                    continue
                else:
                    l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
            l2_reg_loss *= 0.001
            total_loss = 0.5 * binary_segmenatation_loss + 0.5 * disc_loss + l2_reg_loss

            ret = {
                'total_loss': total_loss,
                'binary_seg_logits': decode_logits,
                'instance_seg_logits': pix_embedding,
                'binary_seg_loss': binary_segmenatation_loss,
                'discriminative_loss': disc_loss
            }

            return ret
    def compute_loss(self,
                     binary_seg_logits,
                     binary_label,
                     instance_seg_logits,
                     instance_label,
                     name,
                     reuse,
                     need_layer_norm=True):
        """
        compute lanenet loss
        :param binary_seg_logits: 256x512x2
        :param binary_label: 256x512x1
        :param instance_seg_logits: 256x512x64
        :param instance_label: # 256x512x1
        :param name:
        :param reuse:
        :return:
        """
        with tf.variable_scope(name_or_scope=name, reuse=reuse):
            # calculate class weighted binary seg loss
            with tf.variable_scope(name_or_scope='binary_seg'):
                # binary_label_onehot = tf.one_hot(
                #     tf.reshape(
                #         tf.cast(binary_label, tf.int32),
                #         shape=[binary_label.get_shape().as_list()[0],
                #                binary_label.get_shape().as_list()[1],
                #                binary_label.get_shape().as_list()[2]]),
                #     depth=CFG.TRAIN.CLASSES_NUMS,
                #     axis=-1
                # ) # 256x512x1 -> 256x512x2(one-hot)

                binary_label_onehot = tf.one_hot(
                    tf.cast(binary_label, tf.int32)[:, :, :, 0],
                    depth=CFG.TRAIN.CLASSES_NUMS,
                    axis=-1)  # 256x512x1 -> 256x512x2(one-hot)

                # binary_label_plain = tf.reshape(
                #     binary_label,
                #     shape=[binary_label.get_shape().as_list()[0] *
                #            binary_label.get_shape().as_list()[1] *
                #            binary_label.get_shape().as_list()[2] *
                #            binary_label.get_shape().as_list()[3]])

                binary_label_plain = tf.reshape(binary_label, shape=[
                    -1,
                ])  #
                unique_labels, unique_id, counts = tf.unique_with_counts(
                    binary_label_plain)
                counts = tf.cast(counts, tf.float32)  # 每个类别的像素数量
                inverse_weights = tf.divide(
                    1.0,
                    tf.log(
                        tf.add(tf.divide(counts, tf.reduce_sum(counts)),
                               tf.constant(
                                   1.02))))  # 1/log(counts/all_counts + 1.02)

                binary_segmentation_loss = self._compute_class_weighted_cross_entropy_loss(
                    onehot_labels=binary_label_onehot,
                    logits=binary_seg_logits,
                    classes_weights=inverse_weights)

            # calculate class weighted instance seg loss
            with tf.variable_scope(name_or_scope='instance_seg'):
                if need_layer_norm:
                    instance_seg_logits = self.layerbn(
                        inputdata=instance_seg_logits,
                        is_training=self._is_training,
                        name='pix_bn')
                pix_bn = instance_seg_logits
                pix_relu = self.relu(inputdata=pix_bn, name='pix_relu')
                pix_embedding = self.conv2d(
                    inputdata=pix_relu,
                    out_channel=CFG.TRAIN.EMBEDDING_FEATS_DIMS,
                    kernel_size=1,
                    use_bias=False,
                    name='pix_embedding_conv')

                instance_segmentation_loss, l_var, l_dist, l_reg = \
                    lanenet_discriminative_loss.discriminative_loss(
                        pix_embedding, instance_label, CFG.TRAIN.EMBEDDING_FEATS_DIMS,
                        delta_v=0.5, delta_d=3.0, param_var=1.0, param_dist=1.0, param_reg=0.001
                    )

            l2_reg_loss = tf.constant(0.0, tf.float32)
            for vv in tf.trainable_variables():
                if 'bn' in vv.name or 'batchnorm' in vv.name or 'batch_norm' in vv.name \
                        or 'batch_normalization' in vv.name or 'gn' in vv.name:
                    continue
                else:
                    l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
            l2_reg_loss *= 0.001
            total_loss = binary_segmentation_loss + instance_segmentation_loss + l2_reg_loss

            ret = {
                'total_loss': total_loss,
                'binary_seg_logits': binary_seg_logits,
                'instance_seg_logits': pix_embedding,
                'binary_seg_loss': binary_segmentation_loss,
                'discriminative_loss': instance_segmentation_loss,
                'l2_reg_loss': l2_reg_loss
            }

        return ret
    def compute_loss(self, binary_seg_logits, binary_label,
                     instance_seg_logits, instance_label, name, reuse):
        """
        compute lanenet loss
        :param binary_seg_logits:
        :param binary_label:
        :param instance_seg_logits:
        :param instance_label:
        :param name:
        :param reuse:
        :return:
        """
        with tf.variable_scope(name_or_scope=name, reuse=reuse):
            # calculate class weighted binary seg loss
            with tf.variable_scope(name_or_scope='binary_seg'):
                binary_label_onehot = tf.one_hot(tf.reshape(
                    tf.cast(binary_label, tf.int32),
                    shape=[
                        binary_label.get_shape().as_list()[0],
                        binary_label.get_shape().as_list()[1],
                        binary_label.get_shape().as_list()[2]
                    ]),
                                                 depth=2,
                                                 axis=-1)

                classes_weights = [1.4506131276238088, 21.525424601474068]
                binary_segmenatation_loss = self._compute_class_weighted_cross_entropy_loss(
                    onehot_labels=binary_label_onehot,
                    logits=binary_seg_logits,
                    classes_weights=classes_weights)

            # calculate class weighted instance seg loss
            with tf.variable_scope(name_or_scope='instance_seg'):

                pix_bn = self.layerbn(inputdata=instance_seg_logits,
                                      is_training=self._is_training,
                                      name='pix_bn')
                pix_relu = self.relu(inputdata=pix_bn, name='pix_relu')
                pix_embedding = self.conv2d(
                    inputdata=pix_relu,
                    out_channel=CFG.TRAIN.EMBEDDING_FEATS_DIMS,
                    kernel_size=1,
                    use_bias=False,
                    name='pix_embedding_conv')
                pix_image_shape = (pix_embedding.get_shape().as_list()[1],
                                   pix_embedding.get_shape().as_list()[2])
                disc_loss, l_var, l_dist, l_reg = \
                    lanenet_discriminative_loss.discriminative_loss(
                        pix_embedding, instance_label, CFG.TRAIN.EMBEDDING_FEATS_DIMS,
                        pix_image_shape, 0.5, 3.5, 1.0, 1.0, 0.001
                    )

            l2_reg_loss = tf.constant(0.0, tf.float32)
            for vv in tf.trainable_variables():
                if 'bn' in vv.name or 'gn' in vv.name:
                    continue
                else:
                    l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
            l2_reg_loss *= 0.001
            total_loss = 0.5 * binary_segmenatation_loss + 0.5 * disc_loss + l2_reg_loss

            ret = {
                'total_loss': total_loss,
                'binary_seg_logits': binary_seg_logits,
                'instance_seg_logits': pix_embedding,
                'binary_seg_loss': binary_segmenatation_loss,
                'discriminative_loss': disc_loss
            }

        return ret
Пример #9
0
    def compute_loss(self, input_tensor, binary_label, instance_label, name):
        """
        计算LaneNet模型损失函数
        :param input_tensor:原图[256,512,3]
        :param binary_label:[256,512,1]
        :param instance_label:[256,512]
        :param name:
        :return:
        """
        with tf.variable_scope(name):
            # 前向传播获取logits
            inference_ret = self._build_model(input_tensor=input_tensor, name='inference')
            # 计算二值分割损失函数
            decode_logits = inference_ret['logits']# 256,512,2
            binary_label_plain = tf.reshape(
                binary_label,
                shape=[binary_label.get_shape().as_list()[0] *
                       binary_label.get_shape().as_list()[1] *
                       binary_label.get_shape().as_list()[2]])# 拉成一维向量 0/1 256x512x1
            # 加入class weights
			# unique_with_counts函数返回值,对1维张量X进行统计返回:(X中所有不同的数字集合Y,X中每个元素在Y的索引,Y中每个种类在X中出现的次数)
            unique_labels, unique_id, counts = tf.unique_with_counts(binary_label_plain)
            counts = tf.cast(counts, tf.float32)# 每个类别的像素数量
            inverse_weights = tf.divide(1.0,
                                        tf.log(tf.add(tf.divide(tf.constant(1.0), counts),
                                                      tf.constant(1.02))))
										 # 1/log(counts/all_counts + 1.02)
			"""
			# tf.gather:用一个一维的索引数组,将张量中对应索引的向量提取出来
			b = tf.Variable([1,2,3,4,5,6,7,8,9,10])
			index_b = tf.Variable([2,4,6,8])
			sess.run(tf.gather(b, index_b))#  [3 5 7 9]
			"""
            inverse_weights = tf.gather(inverse_weights, binary_label)
			# weights:loss的系数.这必须是标量或可广播的labels(即相同的秩,每个维度是1或者是相同的).
            binary_segmenatation_loss = tf.losses.sparse_softmax_cross_entropy(
                labels=binary_label, logits=decode_logits, weights=inverse_weights)
            binary_segmenatation_loss = tf.reduce_mean(binary_segmenatation_loss)

            # 计算 discriminative loss损失函数
            decode_deconv = inference_ret['deconv']
            # 像素嵌入
            pix_embedding = self.conv2d(inputdata=decode_deconv, out_channel=4, kernel_size=1,
                                        use_bias=False, name='pix_embedding_conv')
            pix_embedding = self.relu(inputdata=pix_embedding, name='pix_embedding_relu')
            # 计算discriminative loss 详细见lanenet_discriminative_loss.py
            image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2])
            disc_loss, l_var, l_dist, l_reg = \
                lanenet_discriminative_loss.discriminative_loss(
                    pix_embedding, instance_label, 4, image_shape, 0.5, 3.0, 1.0, 1.0, 0.001)

            # 合并损失
            l2_reg_loss = tf.constant(0.0, tf.float32)
            for vv in tf.trainable_variables():
                if 'bn' in vv.name:
                    continue
                else:
                    l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
            l2_reg_loss *= 0.001
            total_loss = 0.5 * binary_segmenatation_loss + 0.5 * disc_loss + l2_reg_loss

            ret = {
                'total_loss': total_loss,
                'binary_seg_logits': decode_logits,
                'instance_seg_logits': pix_embedding,
                'binary_seg_loss': binary_segmenatation_loss,
                'discriminative_loss': disc_loss
            }

            return ret
    def compute_loss(self, binary_seg_logits, binary_label,
                     instance_seg_logits, instance_label, name, reuse):
        """
        compute lanenet loss
        :param binary_seg_logits:
        :param binary_label:
        :param instance_seg_logits:
        :param instance_label:
        :param name:
        :param reuse:
        :return:
        """
        with tf.variable_scope(name_or_scope=name, reuse=reuse):
            # calculate class weighted binary seg loss
            with tf.variable_scope(name_or_scope='binary_seg'):
                binary_label_onehot = tf.one_hot(
                    tf.reshape(tf.cast(binary_label, tf.int32),
                               shape=[
                                   binary_label.get_shape().as_list()[0],
                                   binary_label.get_shape().as_list()[1],
                                   binary_label.get_shape().as_list()[2]
                               ]),
                    depth=CFG.TRAIN.CLASSES_NUMS,  # 2
                    axis=-1)
                """
                indices是一个列表,指定张量中独热向量的独热位置,或者说indeces是非负整数表示的标签列表。len(indices)就是分类的类别数。
                tf.one_hot返回的张量的阶数为indeces的阶数+1。
                当indices的某个分量取-1时,即对应的向量没有独热值。
                depth是每个独热向量的维度
                on_value是独热值
                off_value是非独热值
                axis指定第几阶为depth维独热向量,默认为-1,即,指定张量的最后一维为独热向量
                example:
                    labels = [0, 2, -1, 1]
                    # labels是shape=(4,)的张量。则返回的targets是shape=(len(labels), depth)张量。
                    # 且这种情况下,axis=-1等价于axis=1
                    targets = tf.one_hot(indices=labels, depth=5, on_value=1.0, off_value=0.0, axis=-1)
                    with tf.Session() as sess:
                        print(sess.run(targets))
                    [[ 1.  0.  0.  0.  0.]
                     [ 0.  0.  1.  0.  0.]
                     [ 0.  0.  0.  0.  0.]
                     [ 0.  1.  0.  0.  0.]]
                """

                binary_label_plain = tf.reshape(
                    binary_label,
                    shape=[
                        binary_label.get_shape().as_list()[0] *
                        binary_label.get_shape().as_list()[1] *
                        binary_label.get_shape().as_list()[2] *
                        binary_label.get_shape().as_list()[3]
                    ])  # 转化为一维
                unique_labels, unique_id, counts = tf.unique_with_counts(
                    binary_label_plain)
                """返回值
                一个张量 y,该张量包含出现在 x 中的以相同顺序排序的 x 的所有的唯一元素.
                一个与 x 具有相同大小的张量 idx,包含唯一的输出 y 中 x 的每个值的索引.
                一个张量 count,其中包含 x 中 y 的每个元素的计数
                """
                counts = tf.cast(counts, tf.float32)
                inverse_weights = tf.divide(
                    1.0,
                    tf.log(
                        tf.add(tf.divide(counts, tf.reduce_sum(counts)),
                               tf.constant(
                                   1.02))))  # bounded inverse class weight
                # 1/log(counts/all_counts + 1.02)

                #  Loss 使用交叉熵,为了解决样本分布不均衡的问题(属于车道线的像素远少于属于背景的像素)
                binary_segmenatation_loss = self._compute_class_weighted_cross_entropy_loss(
                    onehot_labels=binary_label_onehot,
                    logits=binary_seg_logits,
                    classes_weights=inverse_weights)

            # calculate class weighted instance seg loss
            with tf.variable_scope(name_or_scope='instance_seg'):

                pix_bn = self.layerbn(inputdata=instance_seg_logits,
                                      is_training=self._is_training,
                                      name='pix_bn')
                pix_relu = self.relu(inputdata=pix_bn, name='pix_relu')
                pix_embedding = self.conv2d(
                    inputdata=pix_relu,
                    out_channel=CFG.TRAIN.EMBEDDING_FEATS_DIMS,  # 4
                    kernel_size=1,
                    use_bias=False,
                    name='pix_embedding_conv')
                pix_image_shape = (pix_embedding.get_shape().as_list()[1],
                                   pix_embedding.get_shape().as_list()[2])
                instance_segmentation_loss, l_var, l_dist, l_reg = \
                    lanenet_discriminative_loss.discriminative_loss(
                        pix_embedding, instance_label, CFG.TRAIN.EMBEDDING_FEATS_DIMS,
                        pix_image_shape, 0.5, 3.0, 1.0, 1.0, 0.001
                    )

            l2_reg_loss = tf.constant(0.0, tf.float32)
            for vv in tf.trainable_variables():
                if 'bn' in vv.name or 'gn' in vv.name:
                    continue
                else:
                    l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
            l2_reg_loss *= 0.001
            total_loss = binary_segmenatation_loss + instance_segmentation_loss + l2_reg_loss

            ret = {
                'total_loss': total_loss,
                'binary_seg_logits': binary_seg_logits,
                'instance_seg_logits': pix_embedding,
                'binary_seg_loss': binary_segmenatation_loss,
                'discriminative_loss': instance_segmentation_loss
            }

        return ret
Пример #11
0
    def compute_loss(self, input_tensor, binary_label, instance_label,
                     ignore_label, name):
        """
        计算LaneNet模型损失函数
        :param input_tensor:
        :param binary_label:
        :param instance_label:
        :param name:
        :return:
        """
        with tf.variable_scope(name):
            # 前向传播获取logits
            inference_ret = self._build_model(input_tensor=input_tensor,
                                              name='inference')

            # 计算discriminative loss损失函数
            decode_deconv = inference_ret['deconv']
            # 像素嵌入
            pix_embedding = self.conv2d(inputdata=decode_deconv,
                                        out_channel=4,
                                        kernel_size=1,
                                        use_bias=False,
                                        name='pix_embedding_conv')
            pix_embedding = self.relu(inputdata=pix_embedding,
                                      name='pix_embedding_relu')

            # 计算二值分割损失函数
            decode_logits = inference_ret['logits']
            # decode_logits = tf.concat([pix_embedding, decode_deconv], axis=-1)
            # decode_logits = tf.concat([pix_embedding, decode_deconv], axis=-1)
            # decode_logits = self.conv2d(inputdata=decode_logits, out_channel=2,
            #                             kernel_size=1, use_bias=False, name='score_final')

            zeros = tf.zeros(tf.shape(binary_label))
            # binary_label = tf.cast(binary_label, tf.int64)
            zeros = tf.cast(zeros, tf.int64)
            binary_label_f = tf.where(tf.equal(binary_label, ignore_label),
                                      zeros, binary_label)

            binary_label_plain = tf.reshape(
                binary_label_f,
                shape=[
                    binary_label_f.get_shape().as_list()[0] *
                    binary_label_f.get_shape().as_list()[1] *
                    binary_label_f.get_shape().as_list()[2]
                ])
            # 加入class weights
            unique_labels, unique_id, counts = tf.unique_with_counts(
                binary_label_plain)
            counts = tf.cast(counts, tf.float32)
            inverse_weights = tf.divide(
                1.0,
                tf.log(
                    tf.add(tf.divide(tf.constant(1.0), counts),
                           tf.constant(1.02))))

            inverse_weights = tf.gather(inverse_weights, binary_label_f)
            zeros = tf.zeros(tf.shape(inverse_weights))
            inverse_weights = tf.where(binary_label == ignore_label, zeros,
                                       inverse_weights)

            binary_segmenatation_loss = tf.losses.sparse_softmax_cross_entropy(
                labels=binary_label_f,
                logits=decode_logits,
                weights=inverse_weights)
            # binary_segmenatation_loss = tf.Print(binary_segmenatation_loss, [binary_segmenatation_loss], summarize=10,
            #                                      message="binary losses: ")
            binary_segmenatation_loss = tf.reduce_mean(
                binary_segmenatation_loss)

            # 计算discriminative loss
            image_shape = (pix_embedding.get_shape().as_list()[1],
                           pix_embedding.get_shape().as_list()[2])
            disc_loss, l_var, l_dist, l_reg = \
                lanenet_discriminative_loss.discriminative_loss(
                    pix_embedding, instance_label, 4, image_shape, 0.5, 3.0, 1.0, 1.0, 0.001)

            # asd = tf.Print(disc_loss, [disc_loss, l_var, l_dist, l_reg],
            #                      message="disc_loss, l_var, l_dist, l_reg: ")
            # asd *= 0
            # tf.losses.add_loss(asd, "")

            # 合并损失
            if self._net_flag != "mobilenet":
                # bad way to do reg loss
                l2_reg_loss = tf.constant(0.0, tf.float32)
                for vv in tf.trainable_variables():
                    if 'bn' in vv.name:
                        continue
                    else:
                        l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
                l2_reg_loss *= 0.001
                total_loss = 0.5 * binary_segmenatation_loss + 0.5 * disc_loss + l2_reg_loss

            elif self._net_flag == "mobilenet":
                reg_losses = tf.contrib.slim.losses.get_regularization_losses()
                reg_loss_encode = tf.add_n(reg_losses, name="reg_loss_encode")

                decode_var_list = []
                for decode_var in tf.trainable_variables():
                    if 'decode' in decode_var.name:
                        decode_var_list.append(tf.nn.l2_loss(decode_var))
                reg_loss_decode = tf.add_n(decode_var_list)

                reg_loss = tf.add(reg_loss_encode,
                                  reg_loss_decode,
                                  name="reg_loss")

                tf.losses.add_loss(binary_segmenatation_loss,
                                   "binary_segmenatation_loss")
                tf.losses.add_loss(disc_loss, "disc_loss")
                tf.losses.add_loss(reg_loss, "reg_loss")

                total_loss = 0.6 * binary_segmenatation_loss + 0.4 * disc_loss + reg_loss * 0.001
                tf.losses.add_loss(total_loss, "total_loss")

            # tf.Print(total_loss, [tf.shape(total_loss)], message="total_loss: ")

            ret = {
                'total_loss': total_loss,
                'binary_seg_logits': decode_logits,
                'instance_seg_logits': pix_embedding,
                'binary_seg_loss': binary_segmenatation_loss,
                'discriminative_loss': disc_loss
            }

            return ret
Пример #12
0
    def compute_loss(self, input_tensor, binary_label, instance_label, name):
        """
        计算LaneNet模型损失函数
        :param input_tensor:
        :param binary_label:
        :param instance_label:
        :param name:
        :return:
        """
        with tf.variable_scope(name):
            # 前向传播获取logits
            inference_ret = self._build_model(input_tensor=input_tensor,
                                              name='inference')
            # 计算二值分割损失函数
            decode_logits = inference_ret['logits']
            binary_label_plain = tf.reshape(  # expand the binary label into a 1-D tensor
                binary_label,
                shape=[
                    binary_label.get_shape().as_list()[0] *
                    binary_label.get_shape().as_list()[1] *
                    binary_label.get_shape().as_list()[2]
                ])
            # 加入class weights
            unique_labels, unique_id, counts = tf.unique_with_counts(
                binary_label_plain)
            counts = tf.cast(counts, tf.float32)

            # original inv_weights
            # inverse_weights = tf.divide(1.0,
            #                             tf.log(tf.add(tf.divide(tf.constant(1.0), counts),
            #                                           tf.constant(1.02))))
            # inverse_weight = 1 / (log(1/counts + 1.02))  # There might be some problem with this function

            # modified inv_weights
            sum = tf.reduce_sum(counts)
            weights = tf.divide(counts, sum)
            inverse_weights = tf.multiply(tf.constant(1.0),
                                          tf.divide(1, weights))  # 25, 6.25,

            inverse_weights = tf.gather(inverse_weights, binary_label)

            inverse_weights = tf.divide(
                25.0,  # 1.0
                tf.log(
                    tf.add(tf.divide(tf.constant(1.0), inverse_weights),
                           tf.constant(1.02))))

            binary_segmentation_loss = tf.losses.sparse_softmax_cross_entropy(
                labels=binary_label,
                logits=decode_logits,
                weights=inverse_weights)
            binary_segmentation_loss = tf.reduce_mean(binary_segmentation_loss)

            # 计算discriminative loss损失函数
            decode_deconv = inference_ret['deconv']
            # 像素嵌入
            pix_embedding = self.conv2d(inputdata=decode_deconv,
                                        out_channel=4,
                                        kernel_size=1,
                                        use_bias=False,
                                        name='pix_embedding_conv')
            pix_embedding = self.relu(inputdata=pix_embedding,
                                      name='pix_embedding_relu')
            # 计算discriminative loss
            image_shape = (pix_embedding.get_shape().as_list()[1],
                           pix_embedding.get_shape().as_list()[2])
            disc_loss, l_var, l_dist, l_reg = \
                lanenet_discriminative_loss.discriminative_loss(
                    pix_embedding, instance_label, 4, image_shape, 0.5, 3.0, 1.0, 1.0, 0.001)

            # 合并损失
            l2_reg_loss = tf.constant(0.0, tf.float32)
            for vv in tf.trainable_variables():
                if 'bn' in vv.name:
                    continue  # batch para isn't regulated
                else:
                    l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
            l2_reg_loss *= 0.001  # lambda=0.001
            total_loss = 0.5 * binary_segmentation_loss + 0.5 * disc_loss + l2_reg_loss

            ret = {
                'total_loss': total_loss,
                'binary_seg_logits': decode_logits,
                'instance_seg_logits': pix_embedding,
                'binary_seg_loss': binary_segmentation_loss,
                'discriminative_loss': disc_loss,
            }

            return ret