def add_softmax_cross_entropy_loss_for_each_scale(scales_to_logits, labels, num_classes, ignore_label, loss_weight=1.0, upsample_logits=True, scope=None): """Adds softmax cross entropy loss for logits of each scale. Args: scales_to_logits: A map from logits names for different scales to logits. The logits have shape [batch, logits_height, logits_width, num_classes]. labels: Groundtruth labels with shape [batch, image_height, image_width, 1]. num_classes: Integer, number of target classes. ignore_label: Integer, label to ignore. loss_weight: Float, loss weight. upsample_logits: Boolean, upsample logits or not. scope: String, the scope for the loss. Raises: ValueError: Label or logits is None. """ if labels is None: raise ValueError('No label for softmax cross entropy loss.') total_loss = 0 for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = '%s_%s' % (scope, scale) if upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear(logits, preprocess_utils.resolve_shape( labels, 4)[1:3], align_corners=True) scaled_labels = labels else: # Label is downsampled to the same size as logits. scaled_labels = tf.image.resize_nearest_neighbor( labels, preprocess_utils.resolve_shape(logits, 4)[1:3], align_corners=True) scaled_labels = tf.reshape(scaled_labels, shape=[-1]) not_ignore_mask = tf.to_float(tf.not_equal(scaled_labels, ignore_label)) * loss_weight one_hot_labels = slim.one_hot_encoding(scaled_labels, num_classes, on_value=1.0, off_value=0.0) total_loss += tf.losses.softmax_cross_entropy( one_hot_labels, tf.reshape(logits, shape=[-1, num_classes]), weights=not_ignore_mask, scope=loss_scope) return total_loss
def add_focal_loss_for_each_scale(scales_to_logits, labels, num_classes, ignore_label, alpha=1, gamma=2, loss_weight=1.0, upsample_logits=True, scope=None): """ Focal loss implementation reference: https://github.com/kornia/kornia/blob/master/kornia/losses/focal.py """ if labels is None: raise ValueError('No label for focal loss.') for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = '%s_%s' % (scope, scale) if upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear( logits, preprocess_utils.resolve_shape(labels, 4)[1:3], align_corners=True) scaled_labels = labels else: # Label is downsampled to the same size as logits. scaled_labels = tf.image.resize_nearest_neighbor( labels, preprocess_utils.resolve_shape(logits, 4)[1:3], align_corners=True) scaled_labels = tf.reshape(scaled_labels, shape=[-1]) not_ignore_mask = tf.to_float(tf.not_equal(scaled_labels, ignore_label)) * loss_weight one_hot_labels = tf.one_hot( scaled_labels, num_classes, on_value=1.0, off_value=0.0) logits = tf.reshape(logits, shape=[-1, num_classes]) weights = not_ignore_mask epsilon = 1.e-9 with tf.name_scope(loss_scope, 'softmax_focal_loss', [logits, one_hot_labels, weights]): one_hot_labels = tf.stop_gradient( one_hot_labels, name='labels_stop_gradient') # Compute softmax over class axis logits_soft = tf.nn.softmax(logits, axis=-1, name="softmax_before_focal_loss") + epsilon # compute the actual focal loss weight = tf.pow(tf.subtract(1., logits_soft), gamma) fl = -alpha * tf.multiply(one_hot_labels, tf.multiply(weight, tf.log(logits_soft))) fl = tf.reduce_sum(fl, axis=-1) total_loss = tf.reduce_mean(fl) tf.losses.add_loss(total_loss)
def add_softmax_cross_entropy_loss_for_each_scale(scales_to_logits, labels, num_classes, ignore_label, loss_weight=1.0, upsample_logits=True, scope=None): """Adds softmax cross entropy loss for logits of each scale. Args: scales_to_logits: A map from logits names for different scales to logits. The logits have shape [batch, logits_height, logits_width, num_classes]. labels: Groundtruth labels with shape [batch, image_height, image_width, 1]. num_classes: Integer, number of target classes. ignore_label: Integer, label to ignore. loss_weight: Float, loss weight. upsample_logits: Boolean, upsample logits or not. scope: String, the scope for the loss. Raises: ValueError: Label or logits is None. """ if labels is None: raise ValueError('No label for softmax cross entropy loss.') for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = '%s_%s' % (scope, scale) if upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear( logits, preprocess_utils.resolve_shape(labels, 4)[1:3], align_corners=True) scaled_labels = labels else: # Label is downsampled to the same size as logits. scaled_labels = tf.image.resize_nearest_neighbor( labels, preprocess_utils.resolve_shape(logits, 4)[1:3], align_corners=True) scaled_labels = tf.reshape(scaled_labels, shape=[-1]) not_ignore_mask = tf.to_float(tf.not_equal(scaled_labels, ignore_label)) * loss_weight one_hot_labels = slim.one_hot_encoding( scaled_labels, num_classes, on_value=1.0, off_value=0.0) tf.losses.softmax_cross_entropy( one_hot_labels, tf.reshape(logits, shape=[-1, num_classes]), weights=not_ignore_mask, scope=loss_scope)
def add_l1_loss_to_each_dimension(scales_to_logits, labels, upsample_logits=True, top_k_percent_pixels=1.0, scope=None): """Adds L1 loss to each channel logits of each scale. """ if labels is None: raise ValueError('No label for offset regression loss.') label1, label2, label3 = tf.split(labels, num_or_size_splits=3, axis=3) labels = tf.concat([label1, label2], 3) for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = '%s_%s' % (scope, scale) if upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear(logits, preprocess_utils.resolve_shape( labels, 4)[1:3], align_corners=True) scaled_labels = labels offset_loss = 'instance_offset_loss' default_loss_scope = 'instance_offset_l1_loss' with tf.name_scope(loss_scope, default_loss_scope, [logits, scaled_labels, offset_loss]): center_loss = tf.abs( tf.subtract(logits, tf.stop_gradient(scaled_labels))) center_loss = tf.reduce_mean(center_loss, 0) num_present = tf.reduce_sum( tf.cast(tf.not_equal(center_loss, 0), tf.float32)) loss = tf.reduce_sum(center_loss) loss = _div_maybe_zero(loss, num_present) '''scaled_labels = tf.reshape(scaled_labels, shape=[-1]) logits = tf.reshape(logits, shape=[-1]) center_loss = tf.subtract(logits, tf.stop_gradient(scaled_labels, name='train_labels_stop_gradient')) num_pixels = tf.not_equal(center_loss, 0) center_loss = tf.abs(center_loss, name='l1_loss') total_loss = tf.reduce_sum(center_loss) num_present = tf.reduce_sum(tf.cast(num_pixels, tf.float32)) loss = _div_maybe_zero(total_loss, num_present)''' tf.losses.add_loss(loss)
def scale_logits_to_labels(logits, labels, upsample_logits): """ Scaled logits and labels to the same scale.""" if upsample_logits: # Label is not downsampled, and instead we upsample logits. scaled_logits = tf.image.resize_bilinear( logits, preprocess_utils.resolve_shape(labels, 4)[1:3], align_corners=True) scaled_labels = labels else: # Label is downsampled to the same size as logits. scaled_labels = tf.image.resize_nearest_neighbor( labels, preprocess_utils.resolve_shape(logits, 4)[1:3], align_corners=True) scaled_logits = logits assert scaled_labels.get_shape()[1:3] == scaled_logits.get_shape( )[1: 3], 'The potentially reshaped logits and labels should match in shapes!' # assert scaled_labels.dtype == scaled_logits.dtype, 'The potentially reshaped logits and labels should match in types!' return scaled_logits, scaled_labels
def add_heatmap_regression(scales_to_logits, labels, upsample_logits=True, top_k_percent_pixels=1.0, scope=None): """Adds heatmap regression loss for logits of each scale. """ if labels is None: raise ValueError('No label for heatmap regression loss.') # If input groundtruth is a matting map of confidence, check if the input # labels are floating point values. for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = '%s_%s' % (scope, scale) if upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear(logits, preprocess_utils.resolve_shape( labels, 4)[1:3], align_corners=True) scaled_labels = labels center_loss = 'instance_center_loss' default_loss_scope = 'instance_center_l2_loss' with tf.name_scope(loss_scope, default_loss_scope, [logits, scaled_labels, center_loss]): center_loss = tf.squared_difference( logits, tf.stop_gradient(scaled_labels)) center_loss = tf.reduce_mean(center_loss, 0) # num_present = tf.reduce_sum(tf.cast(tf.not_equal(center_loss, 0), tf.float64)) num_present = tf.reduce_sum( tf.cast(tf.not_equal(center_loss, 0), tf.float32)) loss = tf.reduce_sum(center_loss) loss = _div_maybe_zero(loss, num_present) '''scaled_labels = tf.reshape(scaled_labels, shape=[-1]) logits = tf.reshape(logits, shape=[-1]) center_loss = tf.subtract(tf.stop_gradient( scaled_labels, name='train_labels_stop_gradient'), logits) center_loss = tf.square(center_loss, name='l2_loss_squaring') total_loss = tf.reduce_sum(center_loss) num_present = tf.reduce_sum(tf.cast(tf.not_equal(center_loss, 0), tf.float32)) loss = _div_maybe_zero(total_loss, num_present)''' tf.losses.add_loss(loss)
def _log_summaries(input_image, label, num_of_classes, output, ignore_label): """Logs the summaries for the model. Args: input_image: Input image of the model. Its shape is [batch_size, height, width, channel]. label: Label of the image. Its shape is [batch_size, height, width]. num_of_classes: The number of classes of the dataset. output: Output of the model. Its shape is [batch_size, height, width]. """ # Add summaries for model variables. for model_var in tf.model_variables(): tf.summary.histogram(model_var.op.name, model_var) logits = tf.image.resize_bilinear( output, preprocess_utils.resolve_shape(label, 4)[1:3]) pred = tf.argmax(logits, 3) label = tf.squeeze(label, [3]) weights = tf.to_float(tf.not_equal(label, ignore_label)) label_valid = tf.where(tf.equal(label, ignore_label), tf.zeros_like(label), label) acc = tf.metrics.accuracy(label_valid, pred, weights=weights) miou, update_op = tf.metrics.mean_iou(label_valid, pred, num_of_classes, weights=weights) tf.add_to_collections(tf.GraphKeys.UPDATE_OPS, update_op) miou = tf.Print(miou, [miou], 'mIoU :') acc = tf.Print(acc[1], [acc[1]], 'ACC is :') tf.summary.scalar('px_accuracy/train_px_accuracy', acc) tf.summary.scalar('mean_iou/train_mean_iou', miou) # Add summaries for images, labels, semantic predictions. if FLAGS.save_summaries_images: img = tf.cast(input_image, tf.uint8) summary_label = get_dataset_colormap.label_to_color2( label, FLAGS.dataset) summary_predictions = get_dataset_colormap.label_to_color2( pred, FLAGS.dataset) img = tf.concat(axis=2, values=[img, summary_label, summary_predictions]) tf.summary.image('samples', img, max_outputs=6)
def _log_summaries(input_image, label, num_of_classes, output): """Logs the summaries for the model. Args: input_image: Input image of the model. Its shape is [batch_size, height, width, channel]. label: Label of the image. Its shape is [batch_size, height, width]. num_of_classes: The number of classes of the dataset. output: Output of the model. Its shape is [batch_size, height, width]. """ # Add summaries for model variables. for model_var in tf.model_variables(): tf.summary.histogram(model_var.op.name, model_var) # Add summaries for images, labels, semantic predictions. if FLAGS.save_summaries_images: print("adding summaries") tf.summary.image('samples/%s' % common.IMAGE, input_image) # Scale up summary image pixel values for better visualization. pixel_scaling = max(1, 255 // num_of_classes) summary_label = tf.cast(label * pixel_scaling, tf.uint8) tf.summary.image('samples/%s' % common.LABEL, summary_label) predictions = tf.expand_dims(tf.argmax(output, 3), -1) summary_predictions = tf.cast(predictions * pixel_scaling, tf.uint8) tf.summary.image('samples/%s' % common.OUTPUT_TYPE, summary_predictions) # Image summary. images_summary = tf.py_func(train_utils.inv_preprocess, [input_image, 1], tf.uint8) labels_summary = tf.py_func(train_utils.decode_labels, [tf.squeeze(label, [3])[0], 1], tf.uint8) out = tf.image.resize_bilinear(output, preprocess_utils.resolve_shape(label, 4)[1:3], align_corners=True) preds = tf.expand_dims(tf.argmax(out, 3), -1) # sess = tf.Session() # print(sess.run(out)) # print() # print(sess.run(preds)) # spreds = tf.cast(preds * pixel_scaling, tf.uint8) # spreds = tf.cast(preds, tf.uint8) preds_summary = tf.py_func(train_utils.decode_labels, [tf.squeeze(preds, [3])[0], 1], tf.uint8) tf.summary.image('images', tf.concat([images_summary, tf.expand_dims (labels_summary, 0), tf.expand_dims(preds_summary, 0)], axis=2), max_outputs=2) # Concatenate row-wise.
def add_softmax_cross_entropy_loss_for_each_scale(scales_to_logits, labels, num_classes, ignore_label, loss_weight=1.0, upsample_logits=True, hard_example_mining_step=0, top_k_percent_pixels=1.0, gt_is_matting_map=False, scope=None): """Adds softmax cross entropy loss for logits of each scale. Args: scales_to_logits: A map from logits names for different scales to logits. The logits have shape [batch, logits_height, logits_width, num_classes]. labels: Groundtruth labels with shape [batch, image_height, image_width, 1]. num_classes: Integer, number of target classes. ignore_label: Integer, label to ignore. loss_weight: A float or a list of loss weights. If it is a float, it means all the labels have the same weight. If it is a list of weights, then each element in the list represents the weight for the label of its index, for example, loss_weight = [0.1, 0.5] means the weight for label 0 is 0.1 and the weight for label 1 is 0.5. upsample_logits: Boolean, upsample logits or not. hard_example_mining_step: An integer, the training step in which the hard exampling mining kicks off. Note that we gradually reduce the mining percent to the top_k_percent_pixels. For example, if hard_example_mining_step = 100K and top_k_percent_pixels = 0.25, then mining percent will gradually reduce from 100% to 25% until 100K steps after which we only mine top 25% pixels. top_k_percent_pixels: A float, the value lies in [0.0, 1.0]. When its value < 1.0, only compute the loss for the top k percent pixels (e.g., the top 20% pixels). This is useful for hard pixel mining. gt_is_matting_map: If true, the groundtruth is a matting map of confidence score. If false, the groundtruth is an integer valued class mask. scope: String, the scope for the loss. Raises: ValueError: Label or logits is None, or groundtruth is matting map while label is not floating value. """ if labels is None: raise ValueError('No label for softmax cross entropy loss.') # If input groundtruth is a matting map of confidence, check if the input # labels are floating point values. if gt_is_matting_map and not labels.dtype.is_floating: raise ValueError('Labels must be floats if groundtruth is a matting map.') for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = '%s_%s' % (scope, scale) if upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear( logits, preprocess_utils.resolve_shape(labels, 4)[1:3], align_corners=True) scaled_labels = labels else: # Label is downsampled to the same size as logits. # When gt_is_matting_map = true, label downsampling with nearest neighbor # method may introduce artifacts. However, to avoid ignore_label from # being interpolated with other labels, we still perform nearest neighbor # interpolation. # TODO(huizhongc): Change to bilinear interpolation by processing padded # and non-padded label separately. if gt_is_matting_map: tf.logging.warning( 'Label downsampling with nearest neighbor may introduce artifacts.') scaled_labels = tf.image.resize_nearest_neighbor( labels, preprocess_utils.resolve_shape(logits, 4)[1:3], align_corners=True) scaled_labels = tf.reshape(scaled_labels, shape=[-1]) ignore_weight = 0 label0_weight = 1 # background clean label1_weight = 5 # class 1 opaque label2_weight = 25 # class 2 transparent not_ignore_mask = tf.to_float(tf.equal(scaled_labels, 0)) * label0_weight + tf.to_float(tf.equal(scaled_labels, 1)) * label1_weight +tf.to_float(tf.equal(scaled_labels, 2)) * label2_weight + tf.to_float(tf.equal(scaled_labels, ignore_label)) * ignore_weight weights = utils.get_label_weight_mask( scaled_labels, ignore_label, num_classes, label_weights=loss_weight) # Dimension of keep_mask is equal to the total number of pixels. keep_mask = tf.cast( tf.not_equal(scaled_labels, ignore_label), dtype=tf.float32) train_labels = None logits = tf.reshape(logits, shape=[-1, num_classes]) if gt_is_matting_map: # When the groundtruth is integer label mask, we can assign class # dependent label weights to the loss. When the groundtruth is image # matting confidence, we do not apply class-dependent label weight (i.e., # label_weight = 1.0). if loss_weight != 1.0: raise ValueError( 'loss_weight must equal to 1 if groundtruth is matting map.') # Assign label value 0 to ignore pixels. The exact label value of ignore # pixel does not matter, because those ignore_value pixel losses will be # multiplied to 0 weight. train_labels = scaled_labels * keep_mask train_labels = tf.expand_dims(train_labels, 1) train_labels = tf.concat([1 - train_labels, train_labels], axis=1) else: train_labels = tf.one_hot( scaled_labels, num_classes, on_value=1.0, off_value=0.0) default_loss_scope = ('softmax_all_pixel_loss' if top_k_percent_pixels == 1.0 else 'softmax_hard_example_mining') with tf.name_scope(loss_scope, default_loss_scope, [logits, train_labels, weights]): # Compute the loss for all pixels. pixel_losses = tf.nn.softmax_cross_entropy_with_logits_v2( labels=tf.stop_gradient( train_labels, name='train_labels_stop_gradient'), logits=logits, name='pixel_losses') weighted_pixel_losses = tf.multiply(pixel_losses, weights) if top_k_percent_pixels == 1.0: total_loss = tf.reduce_sum(weighted_pixel_losses) num_present = tf.reduce_sum(keep_mask) loss = _div_maybe_zero(total_loss, num_present) tf.losses.add_loss(loss) else: num_pixels = tf.to_float(tf.shape(logits)[0]) # Compute the top_k_percent pixels based on current training step. if hard_example_mining_step == 0: # Directly focus on the top_k pixels. top_k_pixels = tf.to_int32(top_k_percent_pixels * num_pixels) else: # Gradually reduce the mining percent to top_k_percent_pixels. global_step = tf.to_float(tf.train.get_or_create_global_step()) ratio = tf.minimum(1.0, global_step / hard_example_mining_step) top_k_pixels = tf.to_int32( (ratio * top_k_percent_pixels + (1.0 - ratio)) * num_pixels) top_k_losses, _ = tf.nn.top_k(weighted_pixel_losses, k=top_k_pixels, sorted=True, name='top_k_percent_pixels') total_loss = tf.reduce_sum(top_k_losses) num_present = tf.reduce_sum( tf.to_float(tf.not_equal(top_k_losses, 0.0))) loss = _div_maybe_zero(total_loss, num_present) tf.losses.add_loss(loss)
def add_softmax_cross_entropy_loss_for_each_scale(scales_to_logits, labels, num_classes, ignore_label, loss_weight=1.0, upsample_logits=True, hard_example_mining_step=0, top_k_percent_pixels=1.0, scope=None): """Adds softmax cross entropy loss for logits of each scale. Args: scales_to_logits: A map from logits names for different scales to logits. The logits have shape [batch, logits_height, logits_width, num_classes]. labels: Groundtruth labels with shape [batch, image_height, image_width, 1]. num_classes: Integer, number of target classes. ignore_label: Integer, label to ignore. loss_weight: Float, loss weight. upsample_logits: Boolean, upsample logits or not. hard_example_mining_step: An integer, the training step in which the hard exampling mining kicks off. Note that we gradually reduce the mining percent to the top_k_percent_pixels. For example, if hard_example_mining_step = 100K and top_k_percent_pixels = 0.25, then mining percent will gradually reduce from 100% to 25% until 100K steps after which we only mine top 25% pixels. top_k_percent_pixels: A float, the value lies in [0.0, 1.0]. When its value < 1.0, only compute the loss for the top k percent pixels (e.g., the top 20% pixels). This is useful for hard pixel mining. scope: String, the scope for the loss. Raises: ValueError: Label or logits is None. """ if labels is None: raise ValueError('No label for softmax cross entropy loss.') for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = '%s_%s' % (scope, scale) if upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear( logits, preprocess_utils.resolve_shape(labels, 4)[1:3], align_corners=True) scaled_labels = labels else: # Label is downsampled to the same size as logits. scaled_labels = tf.image.resize_nearest_neighbor( labels, preprocess_utils.resolve_shape(logits, 4)[1:3], align_corners=True) scaled_labels = tf.reshape(scaled_labels, shape=[-1]) irgore_weight = 0 label0_weight = 1 label1_weight = 5 not_ignore_mask = tf.to_float(tf.equal(scaled_labels, 0)) * label0_weight + tf.to_float(tf.equal(scaled_labels, 1)) * label1_weight + tf.to_float(tf.equal(scaled_labels, ignore_label)) * irgore_weight one_hot_labels = tf.one_hot( scaled_labels, num_classes, on_value=1.0, off_value=0.0) if top_k_percent_pixels == 1.0: # Compute the loss for all pixels. tf.losses.softmax_cross_entropy( one_hot_labels, tf.reshape(logits, shape=[-1, num_classes]), weights=not_ignore_mask, scope=loss_scope) else: logits = tf.reshape(logits, shape=[-1, num_classes]) weights = not_ignore_mask with tf.name_scope(loss_scope, 'softmax_hard_example_mining', [logits, one_hot_labels, weights]): one_hot_labels = tf.stop_gradient( one_hot_labels, name='labels_stop_gradient') pixel_losses = tf.nn.softmax_cross_entropy_with_logits_v2( labels=one_hot_labels, logits=logits, name='pixel_losses') weighted_pixel_losses = tf.multiply(pixel_losses, weights) num_pixels = tf.to_float(tf.shape(logits)[0]) # Compute the top_k_percent pixels based on current training step. if hard_example_mining_step == 0: # Directly focus on the top_k pixels. top_k_pixels = tf.to_int32(top_k_percent_pixels * num_pixels) else: # Gradually reduce the mining percent to top_k_percent_pixels. global_step = tf.to_float(tf.train.get_or_create_global_step()) ratio = tf.minimum(1.0, global_step / hard_example_mining_step) top_k_pixels = tf.to_int32( (ratio * top_k_percent_pixels + (1.0 - ratio)) * num_pixels) top_k_losses, _ = tf.nn.top_k(weighted_pixel_losses, k=top_k_pixels, sorted=True, name='top_k_percent_pixels') total_loss = tf.reduce_sum(top_k_losses) num_present = tf.reduce_sum( tf.to_float(tf.not_equal(top_k_losses, 0.0))) loss = _div_maybe_zero(total_loss, num_present) tf.losses.add_loss(loss)
def add_softmax_cross_entropy_loss_for_each_scale(scales_to_logits, labels, num_classes, ignore_label, loss_weight=1.0, upsample_logits=True, hard_example_mining_step=0, top_k_percent_pixels=1.0, scope=None): """Adds softmax cross entropy loss for logits of each scale. Args: scales_to_logits: A map from logits names for different scales to logits. The logits have shape [batch, logits_height, logits_width, num_classes]. labels: Groundtruth labels with shape [batch, image_height, image_width, 1]. num_classes: Integer, number of target classes. ignore_label: Integer, label to ignore. loss_weight: Float, loss weight. upsample_logits: Boolean, upsample logits or not. hard_example_mining_step: An integer, the training step in which the hard exampling mining kicks off. Note that we gradually reduce the mining percent to the top_k_percent_pixels. For example, if hard_example_mining_step = 100K and top_k_percent_pixels = 0.25, then mining percent will gradually reduce from 100% to 25% until 100K steps after which we only mine top 25% pixels. top_k_percent_pixels: A float, the value lies in [0.0, 1.0]. When its value < 1.0, only compute the loss for the top k percent pixels (e.g., the top 20% pixels). This is useful for hard pixel mining. scope: String, the scope for the loss. Raises: ValueError: Label or logits is None. """ if labels is None: raise ValueError('No label for softmax cross entropy loss.') for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = '%s_%s' % (scope, scale) if upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear( logits, preprocess_utils.resolve_shape(labels, 4)[1:3], align_corners=True) scaled_labels = labels else: # Label is downsampled to the same size as logits. scaled_labels = tf.image.resize_nearest_neighbor( labels, preprocess_utils.resolve_shape(logits, 4)[1:3], align_corners=True) scaled_labels = tf.reshape(scaled_labels, shape=[-1]) not_ignore_mask = tf.to_float(tf.not_equal(scaled_labels, ignore_label)) * loss_weight one_hot_labels = tf.one_hot( scaled_labels, num_classes, on_value=1.0, off_value=0.0) if top_k_percent_pixels == 1.0: # Compute the loss for all pixels. tf.losses.softmax_cross_entropy( one_hot_labels, tf.reshape(logits, shape=[-1, num_classes]), weights=not_ignore_mask, scope=loss_scope) else: logits = tf.reshape(logits, shape=[-1, num_classes]) weights = not_ignore_mask with tf.name_scope(loss_scope, 'softmax_hard_example_mining', [logits, one_hot_labels, weights]): one_hot_labels = tf.stop_gradient( one_hot_labels, name='labels_stop_gradient') pixel_losses = tf.nn.softmax_cross_entropy_with_logits_v2( labels=one_hot_labels, logits=logits, name='pixel_losses') weighted_pixel_losses = tf.multiply(pixel_losses, weights) num_pixels = tf.to_float(tf.shape(logits)[0]) # Compute the top_k_percent pixels based on current training step. if hard_example_mining_step == 0: # Directly focus on the top_k pixels. top_k_pixels = tf.to_int32(top_k_percent_pixels * num_pixels) else: # Gradually reduce the mining percent to top_k_percent_pixels. global_step = tf.to_float(tf.train.get_or_create_global_step()) ratio = tf.minimum(1.0, global_step / hard_example_mining_step) top_k_pixels = tf.to_int32( (ratio * top_k_percent_pixels + (1.0 - ratio)) * num_pixels) top_k_losses, _ = tf.nn.top_k(weighted_pixel_losses, k=top_k_pixels, sorted=True, name='top_k_percent_pixels') total_loss = tf.reduce_sum(top_k_losses) num_present = tf.reduce_sum( tf.to_float(tf.not_equal(top_k_losses, 0.0))) loss = _div_maybe_zero(total_loss, num_present) tf.losses.add_loss(loss)
def add_edge_loss_for_each_scale(scales_to_logits, labels, num_classes, ignore_label, loss_weight=1.0, upsample_logits=True, scope=None, edge_filters=[], norm='l2', smoothing=False): """Adds edge loss for logits of each scale. Args: scales_to_logits: A map from logits names for different scales to logits. The logits have shape [batch, logits_height, logits_width, num_classes]. labels: Groundtruth labels with shape [batch, image_height, image_width, 1]. num_classes: Integer, number of target classes. ignore_label: Integer, label to ignore. loss_weight: Float, loss weight. upsample_logits: Boolean, upsample logits or not. scope: String, the scope for the loss. Raises: ValueError: Label or logits is None. """ if labels is None: raise ValueError('No label for softmax cross entropy loss.') print("edge_filters", edge_filters) print("scales_to_logits") print("scope", scope) for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = 'edge_loss_%s_%s' % (scope, scale) print("loss_scope", loss_scope) if upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear(logits, preprocess_utils.resolve_shape( labels, 4)[1:3], align_corners=True) scaled_labels = labels else: # Label is downsampled to the same size as logits. scaled_labels = tf.image.resize_nearest_neighbor( labels, preprocess_utils.resolve_shape(logits, 4)[1:3], align_corners=True) y_pred = tf.sigmoid(logits) not_ignore_mask = tf.to_float(tf.not_equal(scaled_labels, ignore_label)) * loss_weight scaled_labels = tf.reshape(scaled_labels, shape=[-1]) one_hot_labels = slim.one_hot_encoding(scaled_labels, num_classes, on_value=1.0, off_value=0.0) y_true = tf.reshape(one_hot_labels, y_pred.shape) relevant_mask = tf.to_float(tf.not_equal(one_hot_labels, 0)) relevant_mask = tf.reshape(relevant_mask, y_pred.shape) weights = relevant_mask * not_ignore_mask sobel_x_kernel = tf.reshape(tf.constant( [[1, 2, 1], [0, 0, 0], [-1, -2, -1]], dtype=tf.float32), shape=[3, 3, 1, 1], name='sobel_x_kernel') sobel_y_kernel = tf.reshape(tf.constant( [[1, 0, -1], [2, 0, -2], [1, 0, -1]], dtype=tf.float32), shape=[3, 3, 1, 1], name='sobel_y_kernel') # laplace kernel laplacian_kernel = tf.reshape(tf.constant( [[1, 1, 1], [1, -8, 1], [1, 1, 1]], dtype=tf.float32), shape=[3, 3, 1, 1], name='laplacian_kernel') gaussian_kernel = tf.reshape(tf.constant( [[0.077847, 0.123317, 0.077847], [0.123317, 0.195346, 0.1233179], [0.077847, 0.123317, 0.077847]], dtype=tf.float32), shape=[3, 3, 1, 1], name='gaussian_kernel') filter_map = { "sobel-x": sobel_x_kernel, "sobel-y": sobel_y_kernel, "laplace": laplacian_kernel } lp_norm_map = {"l1": 1, "l2": 2, "l3": 3, "l4": 4, "l5": 5} if norm not in lp_norm_map: raise ValueError( "The `norm` '{0}' is not supported. Supported values are: [l1...l5]" .format(norm)) edge_filters = tf.concat([filter_map[x] for x in edge_filters], axis=-1) def conv_single_channel(x): x = tf.expand_dims(x, -1) conv = tf.nn.conv2d(input=x, filter=edge_filters, strides=[1, 1, 1, 1], padding='SAME') conv = tf.squeeze(conv, -1) return conv y_pred_edges = tf.transpose( tf.map_fn(conv_single_channel, tf.transpose(y_pred, (3, 0, 1, 2))), (1, 2, 3, 0)) if smoothing: # First filter with gaussian to smooth edges of groundtruth y_true = tf.nn.conv2d(input=y_true, filter=gaussian_kernel, strides=[1, 1, 1, 1], padding='SAME') y_true_edges = tf.transpose( tf.map_fn(conv_single_channel, tf.transpose(y_true, (3, 0, 1, 2))), (1, 2, 3, 0)) def append_magnitude(edges, name=None): magnitude = tf.expand_dims(tf.sqrt(edges[:, :, :, 0]**2 + edges[:, :, :, 1]**2), axis=-1) return tf.concat([edges, magnitude], axis=-1, name=name) def lp_loss(y_true, y_pred, p): return tf.pow(tf.abs(y_pred - y_true), p) def smoothness_loss(y_true, y_pred, p): weight_smoothness = tf.exp(tf.negative(tf.abs(y_true))) smoothness = y_pred * weight_smoothness smoothness = smoothness[:, :, :, 0] + smoothness[:, :, :, 1] return tf.reduce_mean(tf.pow(tf.abs(smoothness), p)) weights = not_ignore_mask * relevant_mask print("weights", weights.shape) with tf.name_scope(loss_scope, "mean_squared_error", (y_pred_edges, y_true_edges, weights)) as scope: # calculate the edge agreement loss per pixel pixel_wise_edge_loss = lp_loss(y_true=y_true_edges, y_pred=y_pred_edges, p=lp_norm_map[norm]) print("pixel_wise_edge_loss", pixel_wise_edge_loss.shape) error = tf.losses.compute_weighted_loss( pixel_wise_edge_loss, weights=weights, scope=scope, loss_collection=tf.GraphKeys.LOSSES, reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS) return error
def add_dynamic_softmax_cross_entropy_loss_for_each_scale( scales_to_logits, labels, ignore_label, loss_weight=1.0, upsample_logits=True, scope=None, top_k_percent_pixels=1.0, hard_example_mining_step=100000): """Adds softmax cross entropy loss per scale for logits with varying classes. Also adds summaries for mIoU. Args: scales_to_logits: A map from logits names for different scales to logits. The logits are a list of length batch_size of tensors of shape [time, logits_height, logits_width, num_classes]. labels: Groundtruth labels with shape [batch_size * time, image_height, image_width, 1]. ignore_label: Integer, label to ignore. loss_weight: Float, loss weight. upsample_logits: Boolean, upsample logits or not. scope: String, the scope for the loss. top_k_percent_pixels: A float, the value lies in [0.0, 1.0]. When its value < 1.0, only compute the loss for the top k percent pixels (e.g., the top 20% pixels). This is useful for hard pixel mining. hard_example_mining_step: An integer, the training step in which the hard exampling mining kicks off. Note that we gradually reduce the mining percent to the top_k_percent_pixels. For example, if hard_example_mining_step=100K and top_k_percent_pixels=0.25, then mining percent will gradually reduce from 100% to 25% until 100K steps after which we only mine top 25% pixels. Raises: ValueError: Label or logits is None. """ if labels is None: raise ValueError('No label for softmax cross entropy loss.') if top_k_percent_pixels < 0 or top_k_percent_pixels > 1: raise ValueError('Unexpected value of top_k_percent_pixels.') for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = '%s_%s' % (scope, scale) if upsample_logits: # Label is not downsampled, and instead we upsample logits. assert isinstance(logits, collections.Sequence) logits = [ tf.image.resize_bilinear(x, preprocess_utils.resolve_shape( labels, 4)[1:3], align_corners=True) for x in logits ] scaled_labels = labels else: # Label is downsampled to the same size as logits. assert isinstance(logits, collections.Sequence) scaled_labels = tf.image.resize_nearest_neighbor( labels, preprocess_utils.resolve_shape(logits[0], 4)[1:3], align_corners=True) batch_size = len(logits) num_time = preprocess_utils.resolve_shape(logits[0])[0] reshaped_labels = tf.reshape( scaled_labels, ([batch_size, num_time] + preprocess_utils.resolve_shape(scaled_labels)[1:])) for n, logits_n in enumerate(logits): labels_n = reshaped_labels[n] labels_n = tf.reshape(labels_n, shape=[-1]) not_ignore_mask = tf.to_float(tf.not_equal( labels_n, ignore_label)) * loss_weight num_classes_n = tf.shape(logits_n)[-1] one_hot_labels = slim.one_hot_encoding(labels_n, num_classes_n, on_value=1.0, off_value=0.0) logits_n_flat = tf.reshape(logits_n, shape=[-1, num_classes_n]) if top_k_percent_pixels == 1.0: tf.losses.softmax_cross_entropy(one_hot_labels, logits_n_flat, weights=not_ignore_mask, scope=loss_scope) else: # Only compute the loss for top k percent pixels. # First, compute the loss for all pixels. Note we do not put the loss # to loss_collection and set reduction = None to keep the shape. num_pixels = tf.to_float(tf.shape(logits_n_flat)[0]) pixel_losses = tf.losses.softmax_cross_entropy( one_hot_labels, logits_n_flat, weights=not_ignore_mask, scope='pixel_losses', loss_collection=None, reduction=tf.losses.Reduction.NONE) # Compute the top_k_percent pixels based on current training step. if hard_example_mining_step == 0: # Directly focus on the top_k pixels. top_k_pixels = tf.to_int32(top_k_percent_pixels * num_pixels) else: # Gradually reduce the mining percent to top_k_percent_pixels. global_step = tf.to_float( tf.train.get_or_create_global_step()) ratio = tf.minimum(1.0, global_step / hard_example_mining_step) top_k_pixels = tf.to_int32((ratio * top_k_percent_pixels + (1.0 - ratio)) * num_pixels) _, top_k_indices = tf.nn.top_k(pixel_losses, k=top_k_pixels, sorted=True, name='top_k_percent_pixels') # Compute the loss for the top k percent pixels. tf.losses.softmax_cross_entropy( tf.gather(one_hot_labels, top_k_indices), tf.gather(logits_n_flat, top_k_indices), weights=tf.gather(not_ignore_mask, top_k_indices), scope=loss_scope) pred_n = tf.argmax(logits_n, axis=-1, output_type=tf.int32)[..., tf.newaxis] labels_n = labels[n * num_time:(n + 1) * num_time] miou = eval_utils.calculate_multi_object_miou_tf(pred_n, labels_n) tf.summary.scalar('miou', miou)
def add_softmax_cross_entropy_loss_for_each_scale(scales_to_logits, labels, num_classes, ignore_label, loss_weight=1.0, upsample_logits=True, scope=None): """Adds softmax cross entropy loss for logits of each scale. Args: scales_to_logits: A map from logits names for different scales to logits. The logits have shape [batch, logits_height, logits_width, num_classes]. labels: Groundtruth labels with shape [batch, image_height, image_width, 1]. num_classes: Integer, number of target classes. ignore_label: Integer, label to ignore. loss_weight: Float, loss weight. upsample_logits: Boolean, upsample logits or not. scope: String, the scope for the loss. Raises: ValueError: Label or logits is None. """ if labels is None: raise ValueError('No label for softmax cross entropy loss.') for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = '%s_%s' % (scope, scale) if upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear( logits, preprocess_utils.resolve_shape(labels, 4)[1:3], align_corners=True) scaled_labels = labels else: # Label is downsampled to the same size as logits. scaled_labels = tf.image.resize_nearest_neighbor( labels, preprocess_utils.resolve_shape(logits, 4)[1:3], align_corners=True) scaled_labels = tf.reshape(scaled_labels, shape=[-1]) # not_ignore_mask = tf.to_float(tf.not_equal(scaled_labels,ignore_label)) * loss_weight loss_weight0 = 1.5 loss_weight1 = 2.3 loss_weight2 = 2.5 loss_weight3 = 2 loss_weight4 = 2 loss_weight5 = 2 loss_weight6 = 2 loss_weight7 = 2 loss_weight8 = 4.5 loss_weight9 = 0 loss_weight10 = 2 loss_weight11 = 0 loss_weight12 = 1.5 loss_weight13 = 0 loss_weight14 = 2 loss_weight15 = 2 loss_weight16 = 2 loss_weight17 = 0 loss_weight18 = 2.5 loss_weight19 = 5 loss_weight20 = 5 loss_weight21 = 10 loss_weight22 = 5 loss_weight23 = 0 loss_weight24 = 3 loss_weight25 = 10 loss_weight26 = 10 loss_weight27 = 10 loss_weight28 = 0 loss_weight29 = 0 loss_weight30 = 0 loss_weight31 = 20 loss_weight32 = 4 loss_weight33 = 12 loss_weight34 = 4 loss_weight35 = 4 loss_weight_ignore = 0 not_ignore_mask = tf.to_float(tf.equal(scaled_labels, 0)) * loss_weight0 + \ tf.to_float(tf.equal(scaled_labels, 1)) * loss_weight1 + \ tf.to_float(tf.equal(scaled_labels, 2)) * loss_weight2 + \ tf.to_float(tf.equal(scaled_labels, 3)) * loss_weight3 + \ tf.to_float(tf.equal(scaled_labels, 4)) * loss_weight4 + \ tf.to_float(tf.equal(scaled_labels, 5)) * loss_weight5 + \ tf.to_float(tf.equal(scaled_labels, 6)) * loss_weight6 + \ tf.to_float(tf.equal(scaled_labels, 7)) * loss_weight7 + \ tf.to_float(tf.equal(scaled_labels, 8)) * loss_weight8 + \ tf.to_float(tf.equal(scaled_labels, 9)) * loss_weight9 + \ tf.to_float(tf.equal(scaled_labels, 10)) * loss_weight10 + \ tf.to_float(tf.equal(scaled_labels, 11)) * loss_weight11 + \ tf.to_float(tf.equal(scaled_labels, 12)) * loss_weight12 + \ tf.to_float(tf.equal(scaled_labels, 13)) * loss_weight13 + \ tf.to_float(tf.equal(scaled_labels, 14)) * loss_weight14 + \ tf.to_float(tf.equal(scaled_labels, 15)) * loss_weight15 + \ tf.to_float(tf.equal(scaled_labels, 16)) * loss_weight16 + \ tf.to_float(tf.equal(scaled_labels, 17)) * loss_weight17 + \ tf.to_float(tf.equal(scaled_labels, 18)) * loss_weight18 + \ tf.to_float(tf.equal(scaled_labels, 19)) * loss_weight19 + \ tf.to_float(tf.equal(scaled_labels, 20)) * loss_weight20 + \ tf.to_float(tf.equal(scaled_labels, 21)) * loss_weight21 + \ tf.to_float(tf.equal(scaled_labels, 22)) * loss_weight22 + \ tf.to_float(tf.equal(scaled_labels, 23)) * loss_weight23 + \ tf.to_float(tf.equal(scaled_labels, 24)) * loss_weight24 + \ tf.to_float(tf.equal(scaled_labels, 25)) * loss_weight25 + \ tf.to_float(tf.equal(scaled_labels, 26)) * loss_weight26 + \ tf.to_float(tf.equal(scaled_labels, 27)) * loss_weight27 + \ tf.to_float(tf.equal(scaled_labels, 28)) * loss_weight28 + \ tf.to_float(tf.equal(scaled_labels, 29)) * loss_weight29 + \ tf.to_float(tf.equal(scaled_labels, 30)) * loss_weight30 + \ tf.to_float(tf.equal(scaled_labels, 31)) * loss_weight31 + \ tf.to_float(tf.equal(scaled_labels, 32)) * loss_weight32 + \ tf.to_float(tf.equal(scaled_labels, 33)) * loss_weight33 + \ tf.to_float(tf.equal(scaled_labels, 34)) * loss_weight34 + \ tf.to_float(tf.equal(scaled_labels, 35)) * loss_weight35 + \ tf.to_float(tf.equal(scaled_labels, ignore_label)) * loss_weight_ignore one_hot_labels = slim.one_hot_encoding( scaled_labels, num_classes, on_value=1.0, off_value=0.0) tf.losses.softmax_cross_entropy( one_hot_labels, tf.reshape(logits, shape=[-1, num_classes]), weights=not_ignore_mask, scope=loss_scope)