def add_sigmoid_cross_entropy_loss_for_each_scale(scales_to_logits, labels, ignore_label, loss_weight=1.0, upsample_logits=True, scope=None): """Adds sigmoid cross entropy loss for logits of each scale. Implemented based on deeplab's add_softmax_cross_entropy_loss_for_each_scale in deeplab/utils/train_utils.py. Args: scales_to_logits: A map from logits names for different scales to logits. The logits have shape [batch, logits_height, logits_width, num_classes]. labels: Groundtruth labels with shape [batch, image_height, image_width, 1]. ignore_label: Integer, label to ignore. loss_weight: Float, loss weight. upsample_logits: Boolean, upsample logits or not. scope: String, the scope for the loss. Raises: ValueError: Label or logits is None. """ if labels is None: raise ValueError('No label for softmax cross entropy loss.') for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = '%s_%s' % (scope, scale) if upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear(logits, preprocess_utils.resolve_shape( labels, 4)[1:3], align_corners=True) scaled_labels = labels else: # Label is downsampled to the same size as logits. scaled_labels = tf.image.resize_nearest_neighbor( labels, preprocess_utils.resolve_shape(logits, 4)[1:3], align_corners=True) logits = logits[:, :, :, 1] scaled_labels = tf.to_float(scaled_labels) scaled_labels = tf.squeeze(scaled_labels) not_ignore_mask = tf.to_float(tf.not_equal(scaled_labels, ignore_label)) * loss_weight losses = tf.nn.weighted_cross_entropy_with_logits( scaled_labels, logits, FLAGS.sigmoid_recall_weight) # Loss added later in model_fn by tf.losses.get_total_loss() tf.losses.compute_weighted_loss(losses, weights=not_ignore_mask, scope=loss_scope)
def add_softmax_cross_entropy_loss_for_each_scale(scales_to_logits, labels, num_classes, ignore_label, loss_weight=1.0, upsample_logits=True, scope=None): """Adds softmax cross entropy loss for logits of each scale. Args: scales_to_logits: A map from logits names for different scales to logits. The logits have shape [batch, logits_height, logits_width, num_classes]. labels: Groundtruth labels with shape [batch, image_height, image_width, 1]. num_classes: Integer, number of target classes. ignore_label: Integer, label to ignore. loss_weight: Float, loss weight. upsample_logits: Boolean, upsample logits or not. scope: String, the scope for the loss. Raises: ValueError: Label or logits is None. """ if labels is None: raise ValueError('No label for softmax cross entropy loss.') for scale, logits in six.iteritems(scales_to_logits): loss_scope = None if scope: loss_scope = '%s_%s' % (scope, scale) if upsample_logits: # Label is not downsampled, and instead we upsample logits. logits = tf.image.resize_bilinear(logits, preprocess_utils.resolve_shape( labels, 4)[1:3], align_corners=True) scaled_labels = labels else: # Label is downsampled to the same size as logits. scaled_labels = tf.image.resize_nearest_neighbor( labels, preprocess_utils.resolve_shape(logits, 4)[1:3], align_corners=True) scaled_labels = tf.reshape(scaled_labels, shape=[-1]) not_ignore_mask = tf.to_float(tf.not_equal(scaled_labels, ignore_label)) * loss_weight one_hot_labels = slim.one_hot_encoding(scaled_labels, num_classes, on_value=1.0, off_value=0.0) tf.losses.softmax_cross_entropy(one_hot_labels, tf.reshape(logits, shape=[-1, num_classes]), weights=not_ignore_mask, scope=loss_scope)
def resize_im(image, image_size, pad_val, channels, elements_boxes=None): """Decodes and resizes the image. Args: image: Image to resize. image_size: The desired max image size. pad_val: The value to pad with. channels: The number of channels in the image. elements_boxes: The boxes from elements to resize. Returns: Resized image with possible padded regions, and possibly the resized elements boxes. """ [width, height, got_channels] = preprocess_utils.resolve_shape(image, rank=3) new_width, new_height = get_resize_dim(width, height, image_size) image = tf.reshape(image, [width, height, -1]) image = tf.cond( tf.logical_and(channels == 3, tf.equal(got_channels, 1)), true_fn=lambda: tf.image.grayscale_to_rgb(image), false_fn=lambda: image, ) image = tf.image.resize_images(image, [new_width, new_height]) image = preprocess_utils.pad_to_bounding_box(image, 0, 0, image_size, image_size, pad_val) if elements_boxes is not None: return image, elements_boxes / tf.to_float(tf.maximum(width, height)) return image
def resize_im(image, image_size, pad_val, channels, features=None): """Decodes and resizes the image. Args: image: Image to resize. image_size: The desired max image size. pad_val: The value to pad with. channels: The number of channels in the image. features: Other features to resize. Returns: Resized image with possible padded regions, and possibly the resized elements boxes. """ [height, width, got_channels] = preprocess_utils.resolve_shape(image, rank=3) new_height, new_width = get_resize_dim(height, width, image_size) image = tf.reshape(image, [height, width, -1]) image = tf.cond( tf.logical_and(channels == 3, tf.equal(got_channels, 1)), true_fn=lambda: tf.image.grayscale_to_rgb(image), false_fn=lambda: image, ) image = tf.image.resize_images(image, [new_height, new_width]) image = preprocess_utils.pad_to_bounding_box(image, 0, 0, image_size, image_size, pad_val) if features is not None: width, height = tf.to_float(width), tf.to_float(height) max_dim = tf.to_float(tf.maximum(width, height)) features[ELEMENTS_BOX_ID] = features[ELEMENTS_BOX_ID] / max_dim if GROUNDTRUTH_XMIN_ID in features: features[GROUNDTRUTH_XMIN_ID] *= width / max_dim features[GROUNDTRUTH_XMAX_ID] *= width / max_dim features[GROUNDTRUTH_YMIN_ID] *= height / max_dim features[GROUNDTRUTH_YMAX_ID] *= height / max_dim return image