def test_without_logits(): # predictiions represented as logits prediction_tensor = tf.constant( [[0.97], [0.91], [0.73], [0.27], [0.09], [0.03]], tf.float32 ) # Ground truth target_tensor = tf.constant([[1], [1], [1], [0], [0], [0]], tf.float32) fl = sigmoid_focal_crossentropy( y_true=target_tensor, y_pred=prediction_tensor, alpha=None, gamma=None ) bce = tf.reduce_sum( K.binary_crossentropy(target_tensor, prediction_tensor), axis=-1 ) # When alpha and gamma are None, it should be equal to BCE assert np.allclose(fl, bce) # When gamma==2.0 fl = sigmoid_focal_crossentropy( y_true=target_tensor, y_pred=prediction_tensor, alpha=None, gamma=2.0 ) order_of_ratio = tf.pow(10.0, tf.math.floor(log10(bce / fl))) pow_values = tf.constant([1000, 100, 10, 10, 100, 1000]) assert np.allclose(order_of_ratio, pow_values)
def test_with_logits(self): # predictiions represented as logits prediction_tensor = tf.constant( [[self.to_logit(0.97)], [self.to_logit(0.91)], [self.to_logit(0.73)], [self.to_logit(0.27)], [self.to_logit(0.09)], [self.to_logit(0.03)]], tf.float32) # Ground truth target_tensor = tf.constant([[1], [1], [1], [0], [0], [0]], tf.float32) fl = sigmoid_focal_crossentropy(y_true=target_tensor, y_pred=prediction_tensor, from_logits=True, alpha=None, gamma=None) bce = K.binary_crossentropy(target_tensor, prediction_tensor, from_logits=True) # When alpha and gamma are None, it should be equal to BCE self.assertAllClose(fl, bce) # When gamma==2.0 fl = sigmoid_focal_crossentropy(y_true=target_tensor, y_pred=prediction_tensor, from_logits=True, alpha=None, gamma=2.0) # order_of_ratio = np.power(10, np.floor(np.log10(bce/FL))) order_of_ratio = tf.pow(10.0, tf.math.floor(self.log10(bce / fl))) pow_values = tf.constant([[1000], [100], [10], [10], [100], [1000]]) self.assertAllClose(order_of_ratio, pow_values)
def test_focal_loss(self): from tensorflow_addons.losses import sigmoid_focal_crossentropy y_true = tf.constant([1.0, 0.0, 0.0]) y_pre = tf.constant([0.1, 0.8, 0.1]) loss = sigmoid_focal_crossentropy(y_true=y_true, y_pred=y_pre) print(loss) y_pre = tf.constant([0.7, 0.1, 0.1]) loss = sigmoid_focal_crossentropy(y_true=y_true, y_pred=y_pre) print(loss)
def test_with_logits(): # predictiions represented as logits prediction_tensor = tf.constant( [ [to_logit(0.97)], [to_logit(0.91)], [to_logit(0.73)], [to_logit(0.27)], [to_logit(0.09)], [to_logit(0.03)], ], tf.float32, ) # Ground truth target_tensor = tf.constant([[1], [1], [1], [0], [0], [0]], tf.float32) fl = sigmoid_focal_crossentropy( y_true=target_tensor, y_pred=prediction_tensor, from_logits=True, alpha=None, gamma=None, ) bce = tf.reduce_sum( K.binary_crossentropy(target_tensor, prediction_tensor, from_logits=True), axis=-1, ) # When alpha and gamma are None, it should be equal to BCE np.testing.assert_allclose(fl, bce) # When gamma==2.0 fl = sigmoid_focal_crossentropy( y_true=target_tensor, y_pred=prediction_tensor, from_logits=True, alpha=None, gamma=2.0, ) # order_of_ratio = np.power(10, np.floor(np.log10(bce/FL))) order_of_ratio = tf.pow(10.0, tf.math.floor(log10(bce / fl))) pow_values = tf.constant([1000, 100, 10, 10, 100, 1000]) np.testing.assert_allclose(order_of_ratio, pow_values)
def step_fn(inputs): """Per-Replica StepFn.""" features, labels, _ = utils.create_feature_and_label(inputs) with tf.GradientTape() as tape: logits = model(features, training=True) if isinstance(logits, (list, tuple)): # If model returns a tuple of (logits, covmat), extract logits logits, _ = logits if FLAGS.use_bfloat16: logits = tf.cast(logits, tf.float32) loss_logits = tf.squeeze(logits, axis=1) if FLAGS.loss_type == 'cross_entropy': logging.info('Using cross entropy loss') negative_log_likelihood = tf.nn.sigmoid_cross_entropy_with_logits( labels, loss_logits) elif FLAGS.loss_type == 'focal_cross_entropy': logging.info('Using focal cross entropy loss') negative_log_likelihood = tfa_losses.sigmoid_focal_crossentropy( labels, loss_logits, alpha=FLAGS.focal_loss_alpha, gamma=FLAGS.focal_loss_gamma, from_logits=True) elif FLAGS.loss_type == 'mse': logging.info('Using mean squared error loss') loss_probs = tf.nn.sigmoid(loss_logits) negative_log_likelihood = tf.keras.losses.mean_squared_error( labels, loss_probs) elif FLAGS.loss_type == 'mae': logging.info('Using mean absolute error loss') loss_probs = tf.nn.sigmoid(loss_logits) negative_log_likelihood = tf.keras.losses.mean_absolute_error( labels, loss_probs) negative_log_likelihood = tf.reduce_mean(negative_log_likelihood) l2_loss = sum(model.losses) loss = negative_log_likelihood + l2_loss # Scale the loss given the TPUStrategy will reduce sum all gradients. scaled_loss = loss / strategy.num_replicas_in_sync grads = tape.gradient(scaled_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) probs = tf.nn.sigmoid(logits) # Cast labels to discrete for ECE computation. ece_labels = tf.cast(labels > FLAGS.ece_label_threshold, tf.float32) one_hot_labels = tf.one_hot(tf.cast(ece_labels, tf.int32), depth=num_classes) ece_probs = tf.concat([1. - probs, probs], axis=1) auc_probs = tf.squeeze(probs, axis=1) pred_labels = tf.math.argmax(ece_probs, axis=-1) sample_weight = generate_sample_weight( labels, class_weight['train/{}'.format(dataset_name)], FLAGS.ece_label_threshold) metrics['train/negative_log_likelihood'].update_state( negative_log_likelihood) metrics['train/accuracy'].update_state(labels, pred_labels) metrics['train/accuracy_weighted'].update_state( ece_labels, pred_labels, sample_weight=sample_weight) metrics['train/auroc'].update_state(labels, auc_probs) metrics['train/loss'].update_state(loss) metrics['train/ece'].add_batch(ece_probs, label=ece_labels) metrics['train/precision'].update_state(ece_labels, pred_labels) metrics['train/recall'].update_state(ece_labels, pred_labels) metrics['train/f1'].update_state(one_hot_labels, ece_probs)
def loss(y_true, y_pred, reduction="mean"): fl = sigmoid_focal_crossentropy(y_true, y_pred, gamma=gamma) return apply_reduction(fl, reduction=reduction)