def true_positive_rate_at_false_positive_rate_loss( labels, logits, target_rate, weights=1.0, dual_rate_factor=0.1, label_priors=None, surrogate_type='xent', lambdas_initializer=tf.constant_initializer(1.0), reuse=None, variables_collections=None, trainable=True, scope=None): """Computes true positive rate at false positive rate loss. The loss is based on a surrogate of the form wt * loss(+) + lambdas * (wt * loss(-) - r * (1 - pi)) where: - loss(-) is the loss on the negative examples - loss(+) is the loss on the positive examples - wt is a scalar or tensor of per-example weights - r is the target rate - pi is the label_priors. The per-example weights change not only the coefficients of individual training examples, but how the examples are counted toward the constraint. If `label_priors` is given, it MUST take `weights` into account. That is, label_priors = P / (P + N) where P = sum_i (wt_i on positives) N = sum_i (wt_i on negatives). Args: labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels]. logits: A `Tensor` with the same shape as `labels`. target_rate: The false positive rate at which to compute the loss. Can be a floating point value between 0 and 1 for a single false positive rate, or a `Tensor` of shape [num_labels] holding each label's false positive rate. weights: Coefficients for the loss. Must be a scalar or `Tensor` of shape [batch_size] or [batch_size, num_labels]. dual_rate_factor: A floating point value which controls the step size for the Lagrange multipliers. label_priors: None, or a floating point `Tensor` of shape [num_labels] containing the prior probability of each label (i.e. the fraction of the training data consisting of positive examples). If None, the label priors are computed from `labels` with a moving average. See the notes above regarding the interaction with `weights` and do not set this unless you have a good reason to do so. surrogate_type: Either 'xent' or 'hinge', specifying which upper bound should be used for indicator functions. 'xent' will use the cross-entropy loss surrogate, and 'hinge' will use the hinge loss. lambdas_initializer: An initializer op for the Lagrange multipliers. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for the variables. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). scope: Optional scope for `variable_scope`. Returns: loss: A `Tensor` of the same shape as `logits` with the component-wise loss. other_outputs: A dictionary of useful internal quantities for debugging. For more details, see http://arxiv.org/pdf/1608.04802.pdf. lambdas: A Tensor of shape [num_labels] consisting of the Lagrange multipliers. label_priors: A Tensor of shape [num_labels] consisting of the prior probability of each label learned by the loss, if not provided. true_positives_lower_bound: Lower bound on the number of true positives given `labels` and `logits`. This is the same lower bound which is used in the loss expression to be optimized. false_positives_upper_bound: Upper bound on the number of false positives given `labels` and `logits`. This is the same upper bound which is used in the loss expression to be optimized. Raises: ValueError: If `surrogate_type` is not `xent` or `hinge`. """ with tf.variable_scope(scope, 'tpr_at_fpr', [labels, logits, label_priors], reuse=reuse): labels, logits, weights, original_shape = _prepare_labels_logits_weights( labels, logits, weights) num_labels = util.get_num_labels(logits) # Convert other inputs to tensors and standardize dtypes. target_rate = util.convert_and_cast(target_rate, 'target_rate', logits.dtype) dual_rate_factor = util.convert_and_cast(dual_rate_factor, 'dual_rate_factor', logits.dtype) # Create lambdas. lambdas, lambdas_variable = _create_dual_variable( 'lambdas', shape=[num_labels], dtype=logits.dtype, initializer=lambdas_initializer, collections=variables_collections, trainable=trainable, dual_rate_factor=dual_rate_factor) # Maybe create label_priors. label_priors = maybe_create_label_priors(label_priors, labels, weights, variables_collections) # Loss op and other outputs. The log(2.0) term corrects for # logloss not being an upper bound on the indicator function. weighted_loss = weights * util.weighted_surrogate_loss( labels, logits, surrogate_type=surrogate_type, positive_weights=1.0, negative_weights=lambdas) maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0 maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype) lambda_term = lambdas * target_rate * (1.0 - label_priors) * maybe_log2 loss = tf.reshape(weighted_loss - lambda_term, original_shape) other_outputs = { 'lambdas': lambdas_variable, 'label_priors': label_priors, 'true_positives_lower_bound': true_positives_lower_bound(labels, logits, weights, surrogate_type), 'false_positives_upper_bound': false_positives_upper_bound(labels, logits, weights, surrogate_type) } return loss, other_outputs
def precision_recall_auc_loss(labels, logits, precision_range=(0.0, 1.0), num_anchors=20, weights=1.0, dual_rate_factor=0.1, label_priors=None, surrogate_type='xent', lambdas_initializer=tf.constant_initializer(1.0), reuse=None, variables_collections=None, trainable=True, scope=None): """Computes precision-recall AUC loss. The loss is based on a sum of losses for recall at a range of precision values (anchor points). This sum is a Riemann sum that approximates the area under the precision-recall curve. The per-example `weights` argument changes not only the coefficients of individual training examples, but how the examples are counted toward the constraint. If `label_priors` is given, it MUST take `weights` into account. That is, label_priors = P / (P + N) where P = sum_i (wt_i on positives) N = sum_i (wt_i on negatives). Args: labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels]. logits: A `Tensor` with the same shape as `labels`. precision_range: A length-two tuple, the range of precision values over which to compute AUC. The entries must be nonnegative, increasing, and less than or equal to 1.0. num_anchors: The number of grid points used to approximate the Riemann sum. weights: Coefficients for the loss. Must be a scalar or `Tensor` of shape [batch_size] or [batch_size, num_labels]. dual_rate_factor: A floating point value which controls the step size for the Lagrange multipliers. label_priors: None, or a floating point `Tensor` of shape [num_labels] containing the prior probability of each label (i.e. the fraction of the training data consisting of positive examples). If None, the label priors are computed from `labels` with a moving average. See the notes above regarding the interaction with `weights` and do not set this unless you have a good reason to do so. surrogate_type: Either 'xent' or 'hinge', specifying which upper bound should be used for indicator functions. lambdas_initializer: An initializer for the Lagrange multipliers. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for the variables. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). scope: Optional scope for `variable_scope`. Returns: loss: A `Tensor` of the same shape as `logits` with the component-wise loss. other_outputs: A dictionary of useful internal quantities for debugging. For more details, see http://arxiv.org/pdf/1608.04802.pdf. lambdas: A Tensor of shape [1, num_labels, num_anchors] consisting of the Lagrange multipliers. biases: A Tensor of shape [1, num_labels, num_anchors] consisting of the learned bias term for each. label_priors: A Tensor of shape [1, num_labels, 1] consisting of the prior probability of each label learned by the loss, if not provided. true_positives_lower_bound: Lower bound on the number of true positives given `labels` and `logits`. This is the same lower bound which is used in the loss expression to be optimized. false_positives_upper_bound: Upper bound on the number of false positives given `labels` and `logits`. This is the same upper bound which is used in the loss expression to be optimized. Raises: ValueError: If `surrogate_type` is not `xent` or `hinge`. """ with tf.variable_scope(scope, 'precision_recall_auc', [labels, logits, label_priors], reuse=reuse): labels, logits, weights, original_shape = _prepare_labels_logits_weights( labels, logits, weights) num_labels = util.get_num_labels(logits) # Convert other inputs to tensors and standardize dtypes. dual_rate_factor = util.convert_and_cast(dual_rate_factor, 'dual_rate_factor', logits.dtype) # Create Tensor of anchor points and distance between anchors. precision_values, delta = _range_to_anchors_and_delta( precision_range, num_anchors, logits.dtype) # Create lambdas with shape [1, num_labels, num_anchors]. lambdas, lambdas_variable = _create_dual_variable( 'lambdas', shape=[1, num_labels, num_anchors], dtype=logits.dtype, initializer=lambdas_initializer, collections=variables_collections, trainable=trainable, dual_rate_factor=dual_rate_factor) # Create biases with shape [1, num_labels, num_anchors]. biases = tf.contrib.framework.model_variable( name='biases', shape=[1, num_labels, num_anchors], dtype=logits.dtype, initializer=tf.zeros_initializer(), collections=variables_collections, trainable=trainable) # Maybe create label_priors. label_priors = maybe_create_label_priors(label_priors, labels, weights, variables_collections) label_priors = tf.reshape(label_priors, [1, num_labels, 1]) # Expand logits, labels, and weights to shape [batch_size, num_labels, 1]. logits = tf.expand_dims(logits, 2) labels = tf.expand_dims(labels, 2) weights = tf.expand_dims(weights, 2) # Calculate weighted loss and other outputs. The log(2.0) term corrects for # logloss not being an upper bound on the indicator function. loss = weights * util.weighted_surrogate_loss( labels, logits + biases, surrogate_type=surrogate_type, positive_weights=1.0 + lambdas * (1.0 - precision_values), negative_weights=lambdas * precision_values) maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0 maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype) lambda_term = lambdas * (1.0 - precision_values) * label_priors * maybe_log2 per_anchor_loss = loss - lambda_term per_label_loss = delta * tf.reduce_sum(per_anchor_loss, 2) # Normalize the AUC such that a perfect score function will have AUC 1.0. # Because precision_range is discretized into num_anchors + 1 intervals # but only num_anchors terms are included in the Riemann sum, the # effective length of the integration interval is `delta` less than the # length of precision_range. scaled_loss = tf.div(per_label_loss, precision_range[1] - precision_range[0] - delta, name='AUC_Normalize') scaled_loss = tf.reshape(scaled_loss, original_shape) other_outputs = { 'lambdas': lambdas_variable, 'biases': biases, 'label_priors': label_priors, 'true_positives_lower_bound': true_positives_lower_bound(labels, logits, weights, surrogate_type), 'false_positives_upper_bound': false_positives_upper_bound(labels, logits, weights, surrogate_type) } return scaled_loss, other_outputs
def true_positive_rate_at_false_positive_rate_loss( labels, logits, target_rate, weights=1.0, dual_rate_factor=0.1, label_priors=None, surrogate_type='xent', lambdas_initializer=tf.constant_initializer(1.0), reuse=None, variables_collections=None, trainable=True, scope=None): """Computes true positive rate at false positive rate loss. The loss is based on a surrogate of the form wt * loss(+) + lambdas * (wt * loss(-) - r * (1 - pi)) where: - loss(-) is the loss on the negative examples - loss(+) is the loss on the positive examples - wt is a scalar or tensor of per-example weights - r is the target rate - pi is the label_priors. The per-example weights change not only the coefficients of individual training examples, but how the examples are counted toward the constraint. If `label_priors` is given, it MUST take `weights` into account. That is, label_priors = P / (P + N) where P = sum_i (wt_i on positives) N = sum_i (wt_i on negatives). Args: labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels]. logits: A `Tensor` with the same shape as `labels`. target_rate: The false positive rate at which to compute the loss. Can be a floating point value between 0 and 1 for a single false positive rate, or a `Tensor` of shape [num_labels] holding each label's false positive rate. weights: Coefficients for the loss. Must be a scalar or `Tensor` of shape [batch_size] or [batch_size, num_labels]. dual_rate_factor: A floating point value which controls the step size for the Lagrange multipliers. label_priors: None, or a floating point `Tensor` of shape [num_labels] containing the prior probability of each label (i.e. the fraction of the training data consisting of positive examples). If None, the label priors are computed from `labels` with a moving average. See the notes above regarding the interaction with `weights` and do not set this unless you have a good reason to do so. surrogate_type: Either 'xent' or 'hinge', specifying which upper bound should be used for indicator functions. 'xent' will use the cross-entropy loss surrogate, and 'hinge' will use the hinge loss. lambdas_initializer: An initializer op for the Lagrange multipliers. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for the variables. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). scope: Optional scope for `variable_scope`. Returns: loss: A `Tensor` of the same shape as `logits` with the component-wise loss. other_outputs: A dictionary of useful internal quantities for debugging. For more details, see http://arxiv.org/pdf/1608.04802.pdf. lambdas: A Tensor of shape [num_labels] consisting of the Lagrange multipliers. label_priors: A Tensor of shape [num_labels] consisting of the prior probability of each label learned by the loss, if not provided. true_positives_lower_bound: Lower bound on the number of true positives given `labels` and `logits`. This is the same lower bound which is used in the loss expression to be optimized. false_positives_upper_bound: Upper bound on the number of false positives given `labels` and `logits`. This is the same upper bound which is used in the loss expression to be optimized. Raises: ValueError: If `surrogate_type` is not `xent` or `hinge`. """ with tf.variable_scope(scope, 'tpr_at_fpr', [labels, logits, label_priors], reuse=reuse): labels, logits, weights, original_shape = _prepare_labels_logits_weights( labels, logits, weights) num_labels = util.get_num_labels(logits) # Convert other inputs to tensors and standardize dtypes. target_rate = util.convert_and_cast( target_rate, 'target_rate', logits.dtype) dual_rate_factor = util.convert_and_cast( dual_rate_factor, 'dual_rate_factor', logits.dtype) # Create lambdas. lambdas, lambdas_variable = _create_dual_variable( 'lambdas', shape=[num_labels], dtype=logits.dtype, initializer=lambdas_initializer, collections=variables_collections, trainable=trainable, dual_rate_factor=dual_rate_factor) # Maybe create label_priors. label_priors = maybe_create_label_priors( label_priors, labels, weights, variables_collections) # Loss op and other outputs. The log(2.0) term corrects for # logloss not being an upper bound on the indicator function. weighted_loss = weights * util.weighted_surrogate_loss( labels, logits, surrogate_type=surrogate_type, positive_weights=1.0, negative_weights=lambdas) maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0 maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype) lambda_term = lambdas * target_rate * (1.0 - label_priors) * maybe_log2 loss = tf.reshape(weighted_loss - lambda_term, original_shape) other_outputs = { 'lambdas': lambdas_variable, 'label_priors': label_priors, 'true_positives_lower_bound': true_positives_lower_bound( labels, logits, weights, surrogate_type), 'false_positives_upper_bound': false_positives_upper_bound( labels, logits, weights, surrogate_type)} return loss, other_outputs
def precision_recall_auc_loss( labels, logits, precision_range=(0.0, 1.0), num_anchors=20, weights=1.0, dual_rate_factor=0.1, label_priors=None, surrogate_type='xent', lambdas_initializer=tf.constant_initializer(1.0), reuse=None, variables_collections=None, trainable=True, scope=None): """Computes precision-recall AUC loss. The loss is based on a sum of losses for recall at a range of precision values (anchor points). This sum is a Riemann sum that approximates the area under the precision-recall curve. The per-example `weights` argument changes not only the coefficients of individual training examples, but how the examples are counted toward the constraint. If `label_priors` is given, it MUST take `weights` into account. That is, label_priors = P / (P + N) where P = sum_i (wt_i on positives) N = sum_i (wt_i on negatives). Args: labels: A `Tensor` of shape [batch_size] or [batch_size, num_labels]. logits: A `Tensor` with the same shape as `labels`. precision_range: A length-two tuple, the range of precision values over which to compute AUC. The entries must be nonnegative, increasing, and less than or equal to 1.0. num_anchors: The number of grid points used to approximate the Riemann sum. weights: Coefficients for the loss. Must be a scalar or `Tensor` of shape [batch_size] or [batch_size, num_labels]. dual_rate_factor: A floating point value which controls the step size for the Lagrange multipliers. label_priors: None, or a floating point `Tensor` of shape [num_labels] containing the prior probability of each label (i.e. the fraction of the training data consisting of positive examples). If None, the label priors are computed from `labels` with a moving average. See the notes above regarding the interaction with `weights` and do not set this unless you have a good reason to do so. surrogate_type: Either 'xent' or 'hinge', specifying which upper bound should be used for indicator functions. lambdas_initializer: An initializer for the Lagrange multipliers. reuse: Whether or not the layer and its variables should be reused. To be able to reuse the layer scope must be given. variables_collections: Optional list of collections for the variables. trainable: If `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). scope: Optional scope for `variable_scope`. Returns: loss: A `Tensor` of the same shape as `logits` with the component-wise loss. other_outputs: A dictionary of useful internal quantities for debugging. For more details, see http://arxiv.org/pdf/1608.04802.pdf. lambdas: A Tensor of shape [1, num_labels, num_anchors] consisting of the Lagrange multipliers. biases: A Tensor of shape [1, num_labels, num_anchors] consisting of the learned bias term for each. label_priors: A Tensor of shape [1, num_labels, 1] consisting of the prior probability of each label learned by the loss, if not provided. true_positives_lower_bound: Lower bound on the number of true positives given `labels` and `logits`. This is the same lower bound which is used in the loss expression to be optimized. false_positives_upper_bound: Upper bound on the number of false positives given `labels` and `logits`. This is the same upper bound which is used in the loss expression to be optimized. Raises: ValueError: If `surrogate_type` is not `xent` or `hinge`. """ with tf.variable_scope(scope, 'precision_recall_auc', [labels, logits, label_priors], reuse=reuse): labels, logits, weights, original_shape = _prepare_labels_logits_weights( labels, logits, weights) num_labels = util.get_num_labels(logits) # Convert other inputs to tensors and standardize dtypes. dual_rate_factor = util.convert_and_cast( dual_rate_factor, 'dual_rate_factor', logits.dtype) # Create Tensor of anchor points and distance between anchors. precision_values, delta = _range_to_anchors_and_delta( precision_range, num_anchors, logits.dtype) # Create lambdas with shape [1, num_labels, num_anchors]. lambdas, lambdas_variable = _create_dual_variable( 'lambdas', shape=[1, num_labels, num_anchors], dtype=logits.dtype, initializer=lambdas_initializer, collections=variables_collections, trainable=trainable, dual_rate_factor=dual_rate_factor) # Create biases with shape [1, num_labels, num_anchors]. biases = tf.contrib.framework.model_variable( name='biases', shape=[1, num_labels, num_anchors], dtype=logits.dtype, initializer=tf.zeros_initializer(), collections=variables_collections, trainable=trainable) # Maybe create label_priors. label_priors = maybe_create_label_priors( label_priors, labels, weights, variables_collections) label_priors = tf.reshape(label_priors, [1, num_labels, 1]) # Expand logits, labels, and weights to shape [batch_size, num_labels, 1]. logits = tf.expand_dims(logits, 2) labels = tf.expand_dims(labels, 2) weights = tf.expand_dims(weights, 2) # Calculate weighted loss and other outputs. The log(2.0) term corrects for # logloss not being an upper bound on the indicator function. loss = weights * util.weighted_surrogate_loss( labels, logits + biases, surrogate_type=surrogate_type, positive_weights=1.0 + lambdas * (1.0 - precision_values), negative_weights=lambdas * precision_values) maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0 maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype) lambda_term = lambdas * (1.0 - precision_values) * label_priors * maybe_log2 per_anchor_loss = loss - lambda_term per_label_loss = delta * tf.reduce_sum(per_anchor_loss, 2) # Normalize the AUC such that a perfect score function will have AUC 1.0. # Because precision_range is discretized into num_anchors + 1 intervals # but only num_anchors terms are included in the Riemann sum, the # effective length of the integration interval is `delta` less than the # length of precision_range. scaled_loss = tf.div(per_label_loss, precision_range[1] - precision_range[0] - delta, name='AUC_Normalize') scaled_loss = tf.reshape(scaled_loss, original_shape) other_outputs = { 'lambdas': lambdas_variable, 'biases': biases, 'label_priors': label_priors, 'true_positives_lower_bound': true_positives_lower_bound( labels, logits, weights, surrogate_type), 'false_positives_upper_bound': false_positives_upper_bound( labels, logits, weights, surrogate_type)} return scaled_loss, other_outputs
def precision_recall_auc_loss(labels, logits, precision_range=(0.0, 1.0), num_anchors=20, weights=1.0, dual_rate_factor=0.1, label_priors=None, surrogate_type='xent', lambdas_initializer=tf.constant_initializer(1.0), reuse=None, variables_collections=None, trainable=True, scope=None): with tf.variable_scope(scope, 'precision_recall_auc', [labels, logits, label_priors], reuse=reuse): labels, logits, weights, original_shape = _prepare_labels_logits_weights( labels, logits, weights) num_labels = util.get_num_labels(logits) # Convert other inputs to tensors and standardize dtypes. dual_rate_factor = util.convert_and_cast(dual_rate_factor, 'dual_rate_factor', logits.dtype) # Create Tensor of anchor points and distance between anchors. precision_values, delta = _range_to_anchors_and_delta( precision_range, num_anchors, logits.dtype) # Create lambdas with shape [1, num_labels, num_anchors]. lambdas, lambdas_variable = _create_dual_variable( 'lambdas', shape=[1, num_labels, num_anchors], dtype=logits.dtype, initializer=lambdas_initializer, collections=variables_collections, trainable=trainable, dual_rate_factor=dual_rate_factor) # Create biases with shape [1, num_labels, num_anchors]. biases = tf.contrib.framework.model_variable( name='biases', shape=[1, num_labels, num_anchors], dtype=logits.dtype, initializer=tf.zeros_initializer(), collections=variables_collections, trainable=trainable) # Maybe create label_priors. label_priors = maybe_create_label_priors(label_priors, labels, weights, variables_collections) label_priors = tf.reshape(label_priors, [1, num_labels, 1]) # Expand logits, labels, and weights to shape [batch_size, num_labels, 1]. logits = tf.expand_dims(logits, 2) labels = tf.expand_dims(labels, 2) weights = tf.expand_dims(weights, 2) # Calculate weighted loss and other outputs. The log(2.0) term corrects for # logloss not being an upper bound on the indicator function. loss = weights * util.weighted_surrogate_loss( labels, logits + biases, surrogate_type=surrogate_type, positive_weights=1.0 + lambdas * (1.0 - precision_values), negative_weights=lambdas * precision_values) maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0 maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype) lambda_term = lambdas * (1.0 - precision_values) * label_priors * maybe_log2 per_anchor_loss = loss - lambda_term per_label_loss = delta * tf.reduce_sum(per_anchor_loss, 2) # Normalize the AUC such that a perfect score function will have AUC 1.0. # Because precision_range is discretized into num_anchors + 1 intervals # but only num_anchors terms are included in the Riemann sum, the # effective length of the integration interval is `delta` less than the # length of precision_range. scaled_loss = tf.div(per_label_loss, precision_range[1] - precision_range[0] - delta, name='AUC_Normalize') scaled_loss = tf.reshape(scaled_loss, original_shape) other_outputs = { 'lambdas': lambdas_variable, 'biases': biases, 'label_priors': label_priors, 'true_positives_lower_bound': true_positives_lower_bound(labels, logits, weights, surrogate_type), 'false_positives_upper_bound': false_positives_upper_bound(labels, logits, weights, surrogate_type) } return scaled_loss, other_outputs
def true_positive_rate_at_false_positive_rate_loss( labels, logits, target_rate, weights=1.0, dual_rate_factor=0.1, label_priors=None, surrogate_type='xent', lambdas_initializer=tf.constant_initializer(1.0), reuse=None, variables_collections=None, trainable=True, scope=None): with tf.variable_scope(scope, 'tpr_at_fpr', [labels, logits, label_priors], reuse=reuse): labels, logits, weights, original_shape = _prepare_labels_logits_weights( labels, logits, weights) num_labels = util.get_num_labels(logits) # Convert other inputs to tensors and standardize dtypes. target_rate = util.convert_and_cast(target_rate, 'target_rate', logits.dtype) dual_rate_factor = util.convert_and_cast(dual_rate_factor, 'dual_rate_factor', logits.dtype) # Create lambdas. lambdas, lambdas_variable = _create_dual_variable( 'lambdas', shape=[num_labels], dtype=logits.dtype, initializer=lambdas_initializer, collections=variables_collections, trainable=trainable, dual_rate_factor=dual_rate_factor) # Maybe create label_priors. label_priors = maybe_create_label_priors(label_priors, labels, weights, variables_collections) # Loss op and other outputs. The log(2.0) term corrects for # logloss not being an upper bound on the indicator function. weighted_loss = weights * util.weighted_surrogate_loss( labels, logits, surrogate_type=surrogate_type, positive_weights=1.0, negative_weights=lambdas) maybe_log2 = tf.log(2.0) if surrogate_type == 'xent' else 1.0 maybe_log2 = tf.cast(maybe_log2, logits.dtype.base_dtype) lambda_term = lambdas * target_rate * (1.0 - label_priors) * maybe_log2 loss = tf.reshape(weighted_loss - lambda_term, original_shape) other_outputs = { 'lambdas': lambdas_variable, 'label_priors': label_priors, 'true_positives_lower_bound': true_positives_lower_bound(labels, logits, weights, surrogate_type), 'false_positives_upper_bound': false_positives_upper_bound(labels, logits, weights, surrogate_type) } return loss, other_outputs