def compute_loss(self, input_tensor, binary_label, instance_label, name): """ 计算LaneNet模型损失函数 :param input_tensor: :param binary_label: :param instance_label: :param name: :return: """ with tf.variable_scope(name): # 前向传播获取logits inference_ret = self._build_model(input_tensor=input_tensor, name='inference') # 计算二值分割损失函数 decode_logits = inference_ret['logits'] binary_segmenatation_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=decode_logits, labels=tf.squeeze(binary_label, squeeze_dims=[3]), name='entropy_loss') binary_segmenatation_loss = tf.reduce_mean( binary_segmenatation_loss) # 计算discriminative loss损失函数 decode_deconv = inference_ret['deconv'] # 像素嵌入 pix_embedding = self.conv2d(inputdata=decode_deconv, out_channel=3, kernel_size=1, use_bias=False, name='pix_embedding_conv') pix_embedding = self.relu(inputdata=pix_embedding, name='pix_embedding_relu') # 计算discriminative loss image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2]) disc_loss, l_var, l_dist, l_reg = \ lanenet_discriminative_loss.discriminative_loss( pix_embedding, instance_label, 3, image_shape, 0.5, 1.5, 1.0, 1.0, 0.001) # 合并损失 total_loss = 0.5 * binary_segmenatation_loss + 0.5 * disc_loss ret = { 'total_loss': total_loss, 'binary_seg_logits': decode_logits, 'instance_seg_logits': pix_embedding, 'binary_seg_loss': binary_segmenatation_loss, 'discriminative_loss': disc_loss } return ret
def compute_loss(self, input_tensor, label, name): """ 计算损失函数 :param input_tensor: :param label: 1D label image with different n lane with pix value from [1] to [n], background pix value is [0] :param name: :return: """ with tf.variable_scope(name): # 前向传播获取logits inference_ret = self.build_model(input_tensor=input_tensor, name='inference') # 计算损失 decode_deconv = inference_ret['deconv'] # 像素嵌入,如果要是灰度图像的话,这里的3要改成1吧。。。。。。。 pix_embedding = self.conv2d(inputdata=decode_deconv, out_channel=3, kernel_size=1, use_bias=False, name='pix_embedding_conv') pix_embedding = self.relu(inputdata=pix_embedding, name='pix_embedding_relu') # 计算discriminative loss,如果要是灰度图像的话,这里的3要改成1吧。。。。。。。 image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2]) disc_loss, l_var, l_dist, l_reg = \ lanenet_discriminative_loss.discriminative_loss( pix_embedding, label, 3, image_shape, 0.5, 1.5, 1.0, 1.0, 0.001) ret = { 'total_loss': disc_loss, 'loss_var': l_var, 'loss_dist': l_dist, 'loss_reg': l_reg, 'binary_seg_logits': decode_deconv, 'embedding': pix_embedding } return ret
def compute_loss(self, binary_seg_logits, binary_label, instance_seg_logits, instance_label, name, reuse): """ compute lanenet loss :param binary_seg_logits: :param binary_label: :param instance_seg_logits: :param instance_label: :param name: :param reuse: :return: """ with tf.variable_scope(name_or_scope=name, reuse=reuse): # calculate class weighted binary seg loss with tf.variable_scope(name_or_scope='binary_seg'): binary_label_onehot = tf.one_hot(tf.reshape( tf.cast(binary_label, tf.int32), shape=[ binary_label.get_shape().as_list()[0], binary_label.get_shape().as_list()[1], binary_label.get_shape().as_list()[2] ]), depth=CFG.TRAIN.CLASSES_NUMS, axis=-1) binary_label_plain = tf.reshape( binary_label, shape=[ binary_label.get_shape().as_list()[0] * binary_label.get_shape().as_list()[1] * binary_label.get_shape().as_list()[2] * binary_label.get_shape().as_list()[3] ]) unique_labels, unique_id, counts = tf.unique_with_counts( binary_label_plain) counts = tf.cast(counts, tf.float32) inverse_weights = tf.divide( 1.0, tf.log( tf.add(tf.divide(counts, tf.reduce_sum(counts)), tf.constant(1.02)))) binary_segmenatation_loss = self._compute_class_weighted_cross_entropy_loss( onehot_labels=binary_label_onehot, logits=binary_seg_logits, classes_weights=inverse_weights) # calculate class weighted instance seg loss with tf.variable_scope(name_or_scope='instance_seg'): pix_bn = tf.layers.batch_normalization( inputs=instance_seg_logits, training=self._is_training, name='pix_bn') pix_relu = tf.nn.relu(pix_bn, name='pix_relu') pix_embedding = tf.layers.conv2d( inputs=pix_relu, filters=CFG.TRAIN.EMBEDDING_FEATS_DIMS, padding='SAME', kernel_size=1, use_bias=False, name='pix_embedding_conv') pix_image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2]) instance_segmentation_loss, l_var, l_dist, l_reg = \ lanenet_discriminative_loss.discriminative_loss( pix_embedding, instance_label, CFG.TRAIN.EMBEDDING_FEATS_DIMS, pix_image_shape, 0.5, 3.0, 1.0, 1.0, 0.001 ) l2_reg_loss = tf.constant(0.0, tf.float32) for vv in tf.trainable_variables(): if 'bn' in vv.name or 'gn' in vv.name: continue else: l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv)) l2_reg_loss *= 0.001 total_loss = binary_segmenatation_loss + instance_segmentation_loss + l2_reg_loss ret = { 'total_loss': total_loss, 'binary_seg_logits': binary_seg_logits, 'instance_seg_logits': pix_embedding, 'binary_seg_loss': binary_segmenatation_loss, 'discriminative_loss': instance_segmentation_loss } return ret
def compute_loss(self, input_tensor, binary_label, instance_label, name): """ 计算LaneNet模型损失函数 :param input_tensor: :param binary_label: :param instance_label: :param name: :return: """ with tf.variable_scope(name): # 前向传播获取logits inference_ret = self._build_model(input_tensor=input_tensor, name='inference') decode_logits = inference_ret['logits'] decode_deconv = inference_ret['deconv'] # 计算二值分割损失函数 binary_label_plain = tf.reshape( binary_label, shape=[ binary_label.get_shape().as_list()[0] * binary_label.get_shape().as_list()[1] * binary_label.get_shape().as_list()[2] ]) # 加入class weights unique_labels, unique_id, counts = tf.unique_with_counts( binary_label_plain) counts = tf.cast(counts, tf.float32) inverse_weights = tf.divide( 1.0, tf.log( tf.add(tf.divide(tf.constant(1.0), counts), tf.constant(1.02)))) inverse_weights = tf.concat( [tf.constant([5.]), inverse_weights[1:]], axis=0) inverse_weights = tf.gather(inverse_weights, binary_label) binary_segmenatation_loss = tf.losses.sparse_softmax_cross_entropy( labels=binary_label, logits=decode_logits, weights=inverse_weights) binary_segmenatation_loss = tf.reduce_mean( binary_segmenatation_loss) # 计算discriminative loss损失函数 # 像素嵌入 pix_embedding = self.conv2d(inputdata=decode_deconv, out_channel=4, kernel_size=1, use_bias=False, name='pix_embedding_conv') pix_embedding = self.relu(inputdata=pix_embedding, name='pix_embedding_relu') # 计算discriminative loss image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2]) disc_loss, l_var, l_dist, l_reg = \ lanenet_discriminative_loss.discriminative_loss( pix_embedding, instance_label, 4, image_shape, 0.5, 3.0, 1.0, 1.0, 0.001) # 合并损失 l2_reg_loss = tf.constant(0.0, tf.float32) for vv in tf.trainable_variables(): if 'bn' in vv.name or 'batchnorm' in vv.name or 'batch_norm' in vv.name\ and 'alpha' in vv.name: continue else: l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv)) l2_reg_loss *= 0.001 total_loss = 0.5 * binary_segmenatation_loss + 0.5 * disc_loss + l2_reg_loss # total_loss = binary_segmenatation_loss ret = { 'total_loss': total_loss, 'binary_seg_logits': decode_logits, 'instance_seg_logits': pix_embedding, 'binary_seg_loss': binary_segmenatation_loss, 'discriminative_loss': disc_loss } return ret
def compute_loss(self, binary_seg_logits, binary_label, instance_seg_logits, instance_label, name, reuse): """ compute lanenet loss :param binary_seg_logits: :param binary_label: :param instance_seg_logits: :param instance_label: :param name: :param reuse: :return: """ with tf.variable_scope(name_or_scope=name, reuse=reuse): # calculate class weighted binary seg loss with tf.variable_scope(name_or_scope='binary_seg'): binary_label_onehot = tf.one_hot(tf.reshape( tf.cast(binary_label, tf.int32), shape=[ binary_label.get_shape().as_list()[0], binary_label.get_shape().as_list()[1], binary_label.get_shape().as_list()[2] ]), depth=self._class_nums, axis=-1) binary_label_plain = tf.reshape( binary_label, shape=[ binary_label.get_shape().as_list()[0] * binary_label.get_shape().as_list()[1] * binary_label.get_shape().as_list()[2] * binary_label.get_shape().as_list()[3] ]) unique_labels, unique_id, counts = tf.unique_with_counts( binary_label_plain) counts = tf.cast(counts, tf.float32) inverse_weights = tf.divide( 1.0, tf.log( tf.add(tf.divide(counts, tf.reduce_sum(counts)), tf.constant(1.02)))) if self._binary_loss_type == 'cross_entropy': binary_segmenatation_loss = self._compute_class_weighted_cross_entropy_loss( onehot_labels=binary_label_onehot, logits=binary_seg_logits, classes_weights=inverse_weights) elif self._binary_loss_type == 'focal': binary_segmenatation_loss = self._multi_category_focal_loss( onehot_labels=binary_label_onehot, logits=binary_seg_logits, classes_weights=inverse_weights) else: raise NotImplementedError # calculate class weighted instance seg loss with tf.variable_scope(name_or_scope='instance_seg'): pix_bn = self.layerbn(inputdata=instance_seg_logits, is_training=self._is_training, name='pix_bn') pix_relu = self.relu(inputdata=pix_bn, name='pix_relu') pix_embedding = self.conv2d(inputdata=pix_relu, out_channel=self._embedding_dims, kernel_size=1, use_bias=False, name='pix_embedding_conv') pix_image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2]) instance_segmentation_loss, l_var, l_dist, l_reg = \ lanenet_discriminative_loss.discriminative_loss( pix_embedding, instance_label, self._embedding_dims, pix_image_shape, 0.5, 3.0, 1.0, 1.0, 0.001 ) l2_reg_loss = tf.constant(0.0, tf.float32) for vv in tf.trainable_variables(): if 'bn' in vv.name or 'gn' in vv.name: continue else: l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv)) l2_reg_loss *= 0.001 total_loss = binary_segmenatation_loss + instance_segmentation_loss + l2_reg_loss ret = { 'total_loss': total_loss, 'binary_seg_logits': binary_seg_logits, 'instance_seg_logits': pix_embedding, 'binary_seg_loss': binary_segmenatation_loss, 'discriminative_loss': instance_segmentation_loss } return ret
def compute_loss(self, input_tensor, binary_label, instance_label, name): """ 计算LaneNet模型损失函数 :param input_tensor: :param binary_label: :param instance_label: :param name: :return: """ with tf.variable_scope(name): # 前向传播获取logits inference_ret = self._build_model(input_tensor=input_tensor, name='inference') # 计算二值分割损失函数 decode_logits = inference_ret['logits'] binary_label_plain = tf.reshape( binary_label, shape=[binary_label.get_shape().as_list()[0] * binary_label.get_shape().as_list()[1] * binary_label.get_shape().as_list()[2]]) # 加入class weights unique_labels, unique_id, counts = tf.unique_with_counts(binary_label_plain) counts = tf.cast(counts, tf.float32) inverse_weights = tf.divide(1.0, tf.log(tf.add(tf.divide(tf.constant(1.0), counts), tf.constant(1.02)))) inverse_weights = tf.gather(inverse_weights, binary_label) binary_segmenatation_loss = tf.losses.sparse_softmax_cross_entropy( labels=binary_label, logits=decode_logits, weights=inverse_weights) binary_segmenatation_loss = tf.reduce_mean(binary_segmenatation_loss) # 计算discriminative loss损失函数 decode_deconv = inference_ret['deconv'] # 像素嵌入 pix_embedding = self.conv2d(inputdata=decode_deconv, out_channel=4, kernel_size=1, use_bias=False, name='pix_embedding_conv') pix_embedding = self.relu(inputdata=pix_embedding, name='pix_embedding_relu') # 计算discriminative loss image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2]) disc_loss, l_var, l_dist, l_reg = \ lanenet_discriminative_loss.discriminative_loss( pix_embedding, instance_label, 4, image_shape, 0.5, 3.0, 1.0, 1.0, 0.001) # 合并损失 l2_reg_loss = tf.constant(0.0, tf.float32) for vv in tf.trainable_variables(): if 'bn' in vv.name: continue else: l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv)) l2_reg_loss *= 0.001 total_loss = 0.5 * binary_segmenatation_loss + 0.5 * disc_loss + l2_reg_loss ret = { 'total_loss': total_loss, 'binary_seg_logits': decode_logits, 'instance_seg_logits': pix_embedding, 'binary_seg_loss': binary_segmenatation_loss, 'discriminative_loss': disc_loss } return ret
def compute_loss(self, binary_seg_logits, binary_label, instance_seg_logits, instance_label, name, reuse, need_layer_norm=True): """ compute lanenet loss :param binary_seg_logits: 256x512x2 :param binary_label: 256x512x1 :param instance_seg_logits: 256x512x64 :param instance_label: # 256x512x1 :param name: :param reuse: :return: """ with tf.variable_scope(name_or_scope=name, reuse=reuse): # calculate class weighted binary seg loss with tf.variable_scope(name_or_scope='binary_seg'): # binary_label_onehot = tf.one_hot( # tf.reshape( # tf.cast(binary_label, tf.int32), # shape=[binary_label.get_shape().as_list()[0], # binary_label.get_shape().as_list()[1], # binary_label.get_shape().as_list()[2]]), # depth=CFG.TRAIN.CLASSES_NUMS, # axis=-1 # ) # 256x512x1 -> 256x512x2(one-hot) binary_label_onehot = tf.one_hot( tf.cast(binary_label, tf.int32)[:, :, :, 0], depth=CFG.TRAIN.CLASSES_NUMS, axis=-1) # 256x512x1 -> 256x512x2(one-hot) # binary_label_plain = tf.reshape( # binary_label, # shape=[binary_label.get_shape().as_list()[0] * # binary_label.get_shape().as_list()[1] * # binary_label.get_shape().as_list()[2] * # binary_label.get_shape().as_list()[3]]) binary_label_plain = tf.reshape(binary_label, shape=[ -1, ]) # unique_labels, unique_id, counts = tf.unique_with_counts( binary_label_plain) counts = tf.cast(counts, tf.float32) # 每个类别的像素数量 inverse_weights = tf.divide( 1.0, tf.log( tf.add(tf.divide(counts, tf.reduce_sum(counts)), tf.constant( 1.02)))) # 1/log(counts/all_counts + 1.02) binary_segmentation_loss = self._compute_class_weighted_cross_entropy_loss( onehot_labels=binary_label_onehot, logits=binary_seg_logits, classes_weights=inverse_weights) # calculate class weighted instance seg loss with tf.variable_scope(name_or_scope='instance_seg'): if need_layer_norm: instance_seg_logits = self.layerbn( inputdata=instance_seg_logits, is_training=self._is_training, name='pix_bn') pix_bn = instance_seg_logits pix_relu = self.relu(inputdata=pix_bn, name='pix_relu') pix_embedding = self.conv2d( inputdata=pix_relu, out_channel=CFG.TRAIN.EMBEDDING_FEATS_DIMS, kernel_size=1, use_bias=False, name='pix_embedding_conv') instance_segmentation_loss, l_var, l_dist, l_reg = \ lanenet_discriminative_loss.discriminative_loss( pix_embedding, instance_label, CFG.TRAIN.EMBEDDING_FEATS_DIMS, delta_v=0.5, delta_d=3.0, param_var=1.0, param_dist=1.0, param_reg=0.001 ) l2_reg_loss = tf.constant(0.0, tf.float32) for vv in tf.trainable_variables(): if 'bn' in vv.name or 'batchnorm' in vv.name or 'batch_norm' in vv.name \ or 'batch_normalization' in vv.name or 'gn' in vv.name: continue else: l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv)) l2_reg_loss *= 0.001 total_loss = binary_segmentation_loss + instance_segmentation_loss + l2_reg_loss ret = { 'total_loss': total_loss, 'binary_seg_logits': binary_seg_logits, 'instance_seg_logits': pix_embedding, 'binary_seg_loss': binary_segmentation_loss, 'discriminative_loss': instance_segmentation_loss, 'l2_reg_loss': l2_reg_loss } return ret
def compute_loss(self, binary_seg_logits, binary_label, instance_seg_logits, instance_label, name, reuse): """ compute lanenet loss :param binary_seg_logits: :param binary_label: :param instance_seg_logits: :param instance_label: :param name: :param reuse: :return: """ with tf.variable_scope(name_or_scope=name, reuse=reuse): # calculate class weighted binary seg loss with tf.variable_scope(name_or_scope='binary_seg'): binary_label_onehot = tf.one_hot(tf.reshape( tf.cast(binary_label, tf.int32), shape=[ binary_label.get_shape().as_list()[0], binary_label.get_shape().as_list()[1], binary_label.get_shape().as_list()[2] ]), depth=2, axis=-1) classes_weights = [1.4506131276238088, 21.525424601474068] binary_segmenatation_loss = self._compute_class_weighted_cross_entropy_loss( onehot_labels=binary_label_onehot, logits=binary_seg_logits, classes_weights=classes_weights) # calculate class weighted instance seg loss with tf.variable_scope(name_or_scope='instance_seg'): pix_bn = self.layerbn(inputdata=instance_seg_logits, is_training=self._is_training, name='pix_bn') pix_relu = self.relu(inputdata=pix_bn, name='pix_relu') pix_embedding = self.conv2d( inputdata=pix_relu, out_channel=CFG.TRAIN.EMBEDDING_FEATS_DIMS, kernel_size=1, use_bias=False, name='pix_embedding_conv') pix_image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2]) disc_loss, l_var, l_dist, l_reg = \ lanenet_discriminative_loss.discriminative_loss( pix_embedding, instance_label, CFG.TRAIN.EMBEDDING_FEATS_DIMS, pix_image_shape, 0.5, 3.5, 1.0, 1.0, 0.001 ) l2_reg_loss = tf.constant(0.0, tf.float32) for vv in tf.trainable_variables(): if 'bn' in vv.name or 'gn' in vv.name: continue else: l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv)) l2_reg_loss *= 0.001 total_loss = 0.5 * binary_segmenatation_loss + 0.5 * disc_loss + l2_reg_loss ret = { 'total_loss': total_loss, 'binary_seg_logits': binary_seg_logits, 'instance_seg_logits': pix_embedding, 'binary_seg_loss': binary_segmenatation_loss, 'discriminative_loss': disc_loss } return ret
def compute_loss(self, input_tensor, binary_label, instance_label, name): """ 计算LaneNet模型损失函数 :param input_tensor:原图[256,512,3] :param binary_label:[256,512,1] :param instance_label:[256,512] :param name: :return: """ with tf.variable_scope(name): # 前向传播获取logits inference_ret = self._build_model(input_tensor=input_tensor, name='inference') # 计算二值分割损失函数 decode_logits = inference_ret['logits']# 256,512,2 binary_label_plain = tf.reshape( binary_label, shape=[binary_label.get_shape().as_list()[0] * binary_label.get_shape().as_list()[1] * binary_label.get_shape().as_list()[2]])# 拉成一维向量 0/1 256x512x1 # 加入class weights # unique_with_counts函数返回值,对1维张量X进行统计返回:(X中所有不同的数字集合Y,X中每个元素在Y的索引,Y中每个种类在X中出现的次数) unique_labels, unique_id, counts = tf.unique_with_counts(binary_label_plain) counts = tf.cast(counts, tf.float32)# 每个类别的像素数量 inverse_weights = tf.divide(1.0, tf.log(tf.add(tf.divide(tf.constant(1.0), counts), tf.constant(1.02)))) # 1/log(counts/all_counts + 1.02) """ # tf.gather:用一个一维的索引数组,将张量中对应索引的向量提取出来 b = tf.Variable([1,2,3,4,5,6,7,8,9,10]) index_b = tf.Variable([2,4,6,8]) sess.run(tf.gather(b, index_b))# [3 5 7 9] """ inverse_weights = tf.gather(inverse_weights, binary_label) # weights:loss的系数.这必须是标量或可广播的labels(即相同的秩,每个维度是1或者是相同的). binary_segmenatation_loss = tf.losses.sparse_softmax_cross_entropy( labels=binary_label, logits=decode_logits, weights=inverse_weights) binary_segmenatation_loss = tf.reduce_mean(binary_segmenatation_loss) # 计算 discriminative loss损失函数 decode_deconv = inference_ret['deconv'] # 像素嵌入 pix_embedding = self.conv2d(inputdata=decode_deconv, out_channel=4, kernel_size=1, use_bias=False, name='pix_embedding_conv') pix_embedding = self.relu(inputdata=pix_embedding, name='pix_embedding_relu') # 计算discriminative loss 详细见lanenet_discriminative_loss.py image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2]) disc_loss, l_var, l_dist, l_reg = \ lanenet_discriminative_loss.discriminative_loss( pix_embedding, instance_label, 4, image_shape, 0.5, 3.0, 1.0, 1.0, 0.001) # 合并损失 l2_reg_loss = tf.constant(0.0, tf.float32) for vv in tf.trainable_variables(): if 'bn' in vv.name: continue else: l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv)) l2_reg_loss *= 0.001 total_loss = 0.5 * binary_segmenatation_loss + 0.5 * disc_loss + l2_reg_loss ret = { 'total_loss': total_loss, 'binary_seg_logits': decode_logits, 'instance_seg_logits': pix_embedding, 'binary_seg_loss': binary_segmenatation_loss, 'discriminative_loss': disc_loss } return ret
def compute_loss(self, binary_seg_logits, binary_label, instance_seg_logits, instance_label, name, reuse): """ compute lanenet loss :param binary_seg_logits: :param binary_label: :param instance_seg_logits: :param instance_label: :param name: :param reuse: :return: """ with tf.variable_scope(name_or_scope=name, reuse=reuse): # calculate class weighted binary seg loss with tf.variable_scope(name_or_scope='binary_seg'): binary_label_onehot = tf.one_hot( tf.reshape(tf.cast(binary_label, tf.int32), shape=[ binary_label.get_shape().as_list()[0], binary_label.get_shape().as_list()[1], binary_label.get_shape().as_list()[2] ]), depth=CFG.TRAIN.CLASSES_NUMS, # 2 axis=-1) """ indices是一个列表,指定张量中独热向量的独热位置,或者说indeces是非负整数表示的标签列表。len(indices)就是分类的类别数。 tf.one_hot返回的张量的阶数为indeces的阶数+1。 当indices的某个分量取-1时,即对应的向量没有独热值。 depth是每个独热向量的维度 on_value是独热值 off_value是非独热值 axis指定第几阶为depth维独热向量,默认为-1,即,指定张量的最后一维为独热向量 example: labels = [0, 2, -1, 1] # labels是shape=(4,)的张量。则返回的targets是shape=(len(labels), depth)张量。 # 且这种情况下,axis=-1等价于axis=1 targets = tf.one_hot(indices=labels, depth=5, on_value=1.0, off_value=0.0, axis=-1) with tf.Session() as sess: print(sess.run(targets)) [[ 1. 0. 0. 0. 0.] [ 0. 0. 1. 0. 0.] [ 0. 0. 0. 0. 0.] [ 0. 1. 0. 0. 0.]] """ binary_label_plain = tf.reshape( binary_label, shape=[ binary_label.get_shape().as_list()[0] * binary_label.get_shape().as_list()[1] * binary_label.get_shape().as_list()[2] * binary_label.get_shape().as_list()[3] ]) # 转化为一维 unique_labels, unique_id, counts = tf.unique_with_counts( binary_label_plain) """返回值 一个张量 y,该张量包含出现在 x 中的以相同顺序排序的 x 的所有的唯一元素. 一个与 x 具有相同大小的张量 idx,包含唯一的输出 y 中 x 的每个值的索引. 一个张量 count,其中包含 x 中 y 的每个元素的计数 """ counts = tf.cast(counts, tf.float32) inverse_weights = tf.divide( 1.0, tf.log( tf.add(tf.divide(counts, tf.reduce_sum(counts)), tf.constant( 1.02)))) # bounded inverse class weight # 1/log(counts/all_counts + 1.02) # Loss 使用交叉熵,为了解决样本分布不均衡的问题(属于车道线的像素远少于属于背景的像素) binary_segmenatation_loss = self._compute_class_weighted_cross_entropy_loss( onehot_labels=binary_label_onehot, logits=binary_seg_logits, classes_weights=inverse_weights) # calculate class weighted instance seg loss with tf.variable_scope(name_or_scope='instance_seg'): pix_bn = self.layerbn(inputdata=instance_seg_logits, is_training=self._is_training, name='pix_bn') pix_relu = self.relu(inputdata=pix_bn, name='pix_relu') pix_embedding = self.conv2d( inputdata=pix_relu, out_channel=CFG.TRAIN.EMBEDDING_FEATS_DIMS, # 4 kernel_size=1, use_bias=False, name='pix_embedding_conv') pix_image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2]) instance_segmentation_loss, l_var, l_dist, l_reg = \ lanenet_discriminative_loss.discriminative_loss( pix_embedding, instance_label, CFG.TRAIN.EMBEDDING_FEATS_DIMS, pix_image_shape, 0.5, 3.0, 1.0, 1.0, 0.001 ) l2_reg_loss = tf.constant(0.0, tf.float32) for vv in tf.trainable_variables(): if 'bn' in vv.name or 'gn' in vv.name: continue else: l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv)) l2_reg_loss *= 0.001 total_loss = binary_segmenatation_loss + instance_segmentation_loss + l2_reg_loss ret = { 'total_loss': total_loss, 'binary_seg_logits': binary_seg_logits, 'instance_seg_logits': pix_embedding, 'binary_seg_loss': binary_segmenatation_loss, 'discriminative_loss': instance_segmentation_loss } return ret
def compute_loss(self, input_tensor, binary_label, instance_label, ignore_label, name): """ 计算LaneNet模型损失函数 :param input_tensor: :param binary_label: :param instance_label: :param name: :return: """ with tf.variable_scope(name): # 前向传播获取logits inference_ret = self._build_model(input_tensor=input_tensor, name='inference') # 计算discriminative loss损失函数 decode_deconv = inference_ret['deconv'] # 像素嵌入 pix_embedding = self.conv2d(inputdata=decode_deconv, out_channel=4, kernel_size=1, use_bias=False, name='pix_embedding_conv') pix_embedding = self.relu(inputdata=pix_embedding, name='pix_embedding_relu') # 计算二值分割损失函数 decode_logits = inference_ret['logits'] # decode_logits = tf.concat([pix_embedding, decode_deconv], axis=-1) # decode_logits = tf.concat([pix_embedding, decode_deconv], axis=-1) # decode_logits = self.conv2d(inputdata=decode_logits, out_channel=2, # kernel_size=1, use_bias=False, name='score_final') zeros = tf.zeros(tf.shape(binary_label)) # binary_label = tf.cast(binary_label, tf.int64) zeros = tf.cast(zeros, tf.int64) binary_label_f = tf.where(tf.equal(binary_label, ignore_label), zeros, binary_label) binary_label_plain = tf.reshape( binary_label_f, shape=[ binary_label_f.get_shape().as_list()[0] * binary_label_f.get_shape().as_list()[1] * binary_label_f.get_shape().as_list()[2] ]) # 加入class weights unique_labels, unique_id, counts = tf.unique_with_counts( binary_label_plain) counts = tf.cast(counts, tf.float32) inverse_weights = tf.divide( 1.0, tf.log( tf.add(tf.divide(tf.constant(1.0), counts), tf.constant(1.02)))) inverse_weights = tf.gather(inverse_weights, binary_label_f) zeros = tf.zeros(tf.shape(inverse_weights)) inverse_weights = tf.where(binary_label == ignore_label, zeros, inverse_weights) binary_segmenatation_loss = tf.losses.sparse_softmax_cross_entropy( labels=binary_label_f, logits=decode_logits, weights=inverse_weights) # binary_segmenatation_loss = tf.Print(binary_segmenatation_loss, [binary_segmenatation_loss], summarize=10, # message="binary losses: ") binary_segmenatation_loss = tf.reduce_mean( binary_segmenatation_loss) # 计算discriminative loss image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2]) disc_loss, l_var, l_dist, l_reg = \ lanenet_discriminative_loss.discriminative_loss( pix_embedding, instance_label, 4, image_shape, 0.5, 3.0, 1.0, 1.0, 0.001) # asd = tf.Print(disc_loss, [disc_loss, l_var, l_dist, l_reg], # message="disc_loss, l_var, l_dist, l_reg: ") # asd *= 0 # tf.losses.add_loss(asd, "") # 合并损失 if self._net_flag != "mobilenet": # bad way to do reg loss l2_reg_loss = tf.constant(0.0, tf.float32) for vv in tf.trainable_variables(): if 'bn' in vv.name: continue else: l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv)) l2_reg_loss *= 0.001 total_loss = 0.5 * binary_segmenatation_loss + 0.5 * disc_loss + l2_reg_loss elif self._net_flag == "mobilenet": reg_losses = tf.contrib.slim.losses.get_regularization_losses() reg_loss_encode = tf.add_n(reg_losses, name="reg_loss_encode") decode_var_list = [] for decode_var in tf.trainable_variables(): if 'decode' in decode_var.name: decode_var_list.append(tf.nn.l2_loss(decode_var)) reg_loss_decode = tf.add_n(decode_var_list) reg_loss = tf.add(reg_loss_encode, reg_loss_decode, name="reg_loss") tf.losses.add_loss(binary_segmenatation_loss, "binary_segmenatation_loss") tf.losses.add_loss(disc_loss, "disc_loss") tf.losses.add_loss(reg_loss, "reg_loss") total_loss = 0.6 * binary_segmenatation_loss + 0.4 * disc_loss + reg_loss * 0.001 tf.losses.add_loss(total_loss, "total_loss") # tf.Print(total_loss, [tf.shape(total_loss)], message="total_loss: ") ret = { 'total_loss': total_loss, 'binary_seg_logits': decode_logits, 'instance_seg_logits': pix_embedding, 'binary_seg_loss': binary_segmenatation_loss, 'discriminative_loss': disc_loss } return ret
def compute_loss(self, input_tensor, binary_label, instance_label, name): """ 计算LaneNet模型损失函数 :param input_tensor: :param binary_label: :param instance_label: :param name: :return: """ with tf.variable_scope(name): # 前向传播获取logits inference_ret = self._build_model(input_tensor=input_tensor, name='inference') # 计算二值分割损失函数 decode_logits = inference_ret['logits'] binary_label_plain = tf.reshape( # expand the binary label into a 1-D tensor binary_label, shape=[ binary_label.get_shape().as_list()[0] * binary_label.get_shape().as_list()[1] * binary_label.get_shape().as_list()[2] ]) # 加入class weights unique_labels, unique_id, counts = tf.unique_with_counts( binary_label_plain) counts = tf.cast(counts, tf.float32) # original inv_weights # inverse_weights = tf.divide(1.0, # tf.log(tf.add(tf.divide(tf.constant(1.0), counts), # tf.constant(1.02)))) # inverse_weight = 1 / (log(1/counts + 1.02)) # There might be some problem with this function # modified inv_weights sum = tf.reduce_sum(counts) weights = tf.divide(counts, sum) inverse_weights = tf.multiply(tf.constant(1.0), tf.divide(1, weights)) # 25, 6.25, inverse_weights = tf.gather(inverse_weights, binary_label) inverse_weights = tf.divide( 25.0, # 1.0 tf.log( tf.add(tf.divide(tf.constant(1.0), inverse_weights), tf.constant(1.02)))) binary_segmentation_loss = tf.losses.sparse_softmax_cross_entropy( labels=binary_label, logits=decode_logits, weights=inverse_weights) binary_segmentation_loss = tf.reduce_mean(binary_segmentation_loss) # 计算discriminative loss损失函数 decode_deconv = inference_ret['deconv'] # 像素嵌入 pix_embedding = self.conv2d(inputdata=decode_deconv, out_channel=4, kernel_size=1, use_bias=False, name='pix_embedding_conv') pix_embedding = self.relu(inputdata=pix_embedding, name='pix_embedding_relu') # 计算discriminative loss image_shape = (pix_embedding.get_shape().as_list()[1], pix_embedding.get_shape().as_list()[2]) disc_loss, l_var, l_dist, l_reg = \ lanenet_discriminative_loss.discriminative_loss( pix_embedding, instance_label, 4, image_shape, 0.5, 3.0, 1.0, 1.0, 0.001) # 合并损失 l2_reg_loss = tf.constant(0.0, tf.float32) for vv in tf.trainable_variables(): if 'bn' in vv.name: continue # batch para isn't regulated else: l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv)) l2_reg_loss *= 0.001 # lambda=0.001 total_loss = 0.5 * binary_segmentation_loss + 0.5 * disc_loss + l2_reg_loss ret = { 'total_loss': total_loss, 'binary_seg_logits': decode_logits, 'instance_seg_logits': pix_embedding, 'binary_seg_loss': binary_segmentation_loss, 'discriminative_loss': disc_loss, } return ret