def test_correct_distance(self): """Compare against numpy caluclation.""" tf_embeddings = tf.constant([[0.5, 0.5], [1.0, 1.0]]) expected_distance = np.array([[0, np.sqrt(2) / 2], [np.sqrt(2) / 2, 0]]) distances = pairwise_distance(tf_embeddings, squared=False) self.assertAllClose(expected_distance, distances)
def test_correct_distance_squared(self): """Compare against numpy caluclation for squared distances.""" tf_embeddings = tf.constant([[0.5, 0.5], [1.0, 1.0]]) expected_distance = np.array([[0, 0.5], [0.5, 0]]) distances = pairwise_distance(tf_embeddings, squared=True) self.assertAllClose(expected_distance, distances)
def test_positive_distances(self): """Test that the pairwise distances are always positive.""" # Create embeddings very close to each other in [1.0 - 2e-7, 1.0 + 2e-7] # This will encourage errors in the computation embeddings = 1.0 + 2e-7 * tf.random.uniform([64, 6], dtype=tf.float32) distances = pairwise_distance(embeddings, squared=False) self.assertAllGreaterEqual(distances, 0)
def triplet_hard_loss( y_true: TensorLike, y_pred: TensorLike, margin: FloatTensorLike = 1.0, soft: bool = False, ) -> tf.Tensor: """Computes the triplet loss with hard negative and hard positive mining. Args: y_true: 1-D integer `Tensor` with shape [batch_size] of multiclass integer labels. y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should be l2 normalized. margin: Float, margin term in the loss definition. soft: Boolean, if set, use the soft margin version. """ labels, embeddings = y_true, y_pred # Reshape label tensor to [batch_size, 1]. lshape = tf.shape(labels) labels = tf.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix. pdist_matrix = metric_learning.pairwise_distance(embeddings, squared=True) # Build pairwise binary adjacency matrix. adjacency = tf.math.equal(labels, tf.transpose(labels)) # Invert so we can select negatives only. adjacency_not = tf.math.logical_not(adjacency) adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32) # hard negatives: smallest D_an. hard_negatives = _masked_minimum(pdist_matrix, adjacency_not) batch_size = tf.size(labels) adjacency = tf.cast(adjacency, dtype=tf.dtypes.float32) mask_positives = tf.cast(adjacency, dtype=tf.dtypes.float32) - tf.linalg.diag( tf.ones([batch_size]) ) # hard positives: largest D_ap. hard_positives = _masked_maximum(pdist_matrix, mask_positives) if soft: triplet_loss = tf.math.log1p(tf.math.exp(hard_positives - hard_negatives)) else: triplet_loss = tf.maximum(hard_positives - hard_negatives + margin, 0.0) # Get final mean triplet loss triplet_loss = tf.reduce_mean(triplet_loss) return triplet_loss
def Quadruplet_loss(y_true, y_pred): labels = tf.convert_to_tensor(y_true, name="labels") embeddings = tf.convert_to_tensor(y_pred, name="embeddings") pdist_matrix = metric_learning.pairwise_distance(embeddings, squared=False) adjacency = tf.math.equal(labels, tf.transpose(labels)) # Invert so we can select negatives only. adjacency_not = tf.math.logical_not(adjacency) # cast to float32 adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32) adjacency = tf.cast(adjacency, dtype=tf.dtypes.float32) # hard negatives: smallest D_an. hard_negatives = _masked_minimum(pdist_matrix, adjacency_not) # Remove negative from adjacency_not adjacency_not2 = tf.math.equal(pdist_matrix, hard_negatives) adjacency_not2 = tf.math.logical_not(adjacency_not2) adjacency_not2 = tf.cast(adjacency_not2, dtype=tf.dtypes.float32) adjacency_not2_2 = tf.math.multiply(adjacency_not, adjacency_not2) hard_negatives2 = _masked_minimum(pdist_matrix, adjacency_not2_2) # batch size of Training batch_size = tf.size(labels) mask_positives = adjacency - tf.linalg.diag(tf.ones([batch_size])) # hard positives: largest D_ap. hard_positives = _masked_maximum(pdist_matrix, mask_positives) triplet_loss = tf.maximum( (hard_positives * 2) - hard_negatives - hard_negatives2 + 0.2, 0.0) return triplet_loss
def lifted_struct_loss(labels, embeddings, margin=1.0): """Computes the lifted structured loss. Args: labels: 1-D tf.int32 `Tensor` with shape [batch_size] of multiclass integer labels. embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not be l2 normalized. margin: Float, margin term in the loss definition. Returns: lifted_loss: tf.float32 scalar. """ # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor. lshape = tf.shape(labels) assert lshape.shape == 1 labels = tf.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix. pairwise_distances = metric_learning.pairwise_distance(embeddings) # Build pairwise binary adjacency matrix. adjacency = tf.math.equal(labels, tf.transpose(labels)) # Invert so we can select negatives only. adjacency_not = tf.math.logical_not(adjacency) batch_size = tf.size(labels) diff = margin - pairwise_distances mask = tf.cast(adjacency_not, dtype=tf.dtypes.float32) # Safe maximum: Temporarily shift negative distances # above zero before taking max. # this is to take the max only among negatives. row_minimums = tf.math.reduce_min(diff, 1, keepdims=True) row_negative_maximums = tf.math.reduce_max( tf.math.multiply(diff - row_minimums, mask), 1, keepdims=True) + row_minimums # Compute the loss. # Keep track of matrix of maximums where M_ij = max(m_i, m_j) # where m_i is the max of alpha - negative D_i's. # This matches the Caffe loss layer implementation at: # https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp # pylint: disable=line-too-long max_elements = tf.math.maximum(row_negative_maximums, tf.transpose(row_negative_maximums)) diff_tiled = tf.tile(diff, [batch_size, 1]) mask_tiled = tf.tile(mask, [batch_size, 1]) max_elements_vect = tf.reshape(tf.transpose(max_elements), [-1, 1]) loss_exp_left = tf.reshape( tf.math.reduce_sum( tf.math.multiply( tf.math.exp(diff_tiled - max_elements_vect), mask_tiled), 1, keepdims=True), [batch_size, batch_size]) loss_mat = max_elements + tf.math.log(loss_exp_left + tf.transpose(loss_exp_left)) # Add the positive distance. loss_mat += pairwise_distances mask_positives = tf.cast( adjacency, dtype=tf.dtypes.float32) - tf.linalg.diag( tf.ones([batch_size])) # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2. num_positives = tf.math.reduce_sum(mask_positives) / 2.0 lifted_loss = tf.math.truediv( 0.25 * tf.math.reduce_sum( tf.math.square( tf.math.maximum( tf.math.multiply(loss_mat, mask_positives), 0.0))), num_positives) return lifted_loss
def call(self, y_true, y_pred): from tensorflow_addons.losses import metric_learning self.sd.update_state(y_true, y_pred) labels = tf.cast( tf.convert_to_tensor(y_true, name="labels"), dtype=tf.dtypes.float32 ) if len(labels.shape) == 1: labels = tf.reshape(labels, (1, -1)) embeddings = tf.convert_to_tensor(y_pred, name="embeddings") convert_to_float32 = ( (embeddings.dtype == tf.dtypes.float16) or (embeddings.dtype == tf.dtypes.bfloat16) ) precise_embeddings = ( tf.cast(embeddings, tf.dtypes.float32) if convert_to_float32 else embeddings ) # Reshape label tensor to [batch_size, 1]. # lshape = tf.shape(labels) # labels = tf.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix distance_metric = self.distance_metric if distance_metric == "L2": pdist_matrix = metric_learning.pairwise_distance( precise_embeddings, squared=False ) elif distance_metric == "squared-L2": pdist_matrix = metric_learning.pairwise_distance( precise_embeddings, squared=True ) elif distance_metric == "angular": pdist_matrix = metric_learning.angular_distance(precise_embeddings) else: pdist_matrix = distance_metric(precise_embeddings) # Fetch pairwise labels as adjacency matrix. adjacency = self.response_diffs(labels) # Invert so we can select negatives only. adjacency_not = tf.math.logical_not(adjacency) batch_size = tf.size(labels) # Compute the mask. pdist_matrix_tile = tf.tile(pdist_matrix, [batch_size, 1]) mask = tf.math.logical_and( tf.tile(adjacency_not, [batch_size, 1]), tf.math.greater( pdist_matrix_tile, tf.reshape(tf.transpose(pdist_matrix), [-1, 1]) ), ) mask_final = tf.reshape( tf.math.greater( tf.math.reduce_sum( tf.cast(mask, dtype=tf.dtypes.float32), 1, keepdims=True ), 0.0, ), [batch_size, batch_size], ) mask_final = tf.transpose(mask_final) adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32) mask = tf.cast(mask, dtype=tf.dtypes.float32) # negatives_outside: smallest D_an where D_an > D_ap. negatives_outside = tf.reshape( _masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size] ) negatives_outside = tf.transpose(negatives_outside) # negatives_inside: largest D_an. negatives_inside = tf.tile( _masked_maximum(pdist_matrix, adjacency_not), [1, batch_size] ) semi_hard_negatives = tf.where( mask_final, negatives_outside, negatives_inside ) loss_mat = tf.math.add(self.margin, pdist_matrix - semi_hard_negatives) mask_positives = ( tf.cast(adjacency, dtype=tf.dtypes.float32) - tf.linalg.diag(tf.ones([batch_size])) ) # In lifted-struct, the authors multiply 0.5 for upper triangular # in semihard, they take all positive pairs except the diagonal. # Max(n, 1) necessary to stop nan loss, which just stops the whole # model from running. # Setting to 1 will just mean zero loss, since everything # else will be 0. num_positives = tf.math.maximum( tf.math.reduce_sum(mask_positives), 1.0 ) triplet_loss = tf.math.truediv( tf.math.reduce_sum( tf.math.maximum( tf.math.multiply(loss_mat, mask_positives), 0.0 ) ), num_positives, ) if convert_to_float32: return tf.cast(triplet_loss, embeddings.dtype) else: return triplet_loss
def call(self, y_true, y_pred): from tensorflow_addons.losses import metric_learning self.sd.update_state(y_true, y_pred) labels = tf.cast( tf.convert_to_tensor(y_true, name="labels"), dtype=tf.dtypes.float32 ) if len(labels.shape) == 1: labels = tf.reshape(labels, (1, -1)) embeddings = tf.convert_to_tensor(y_pred, name="embeddings") convert_to_float32 = ( (embeddings.dtype == tf.dtypes.float16) or (embeddings.dtype == tf.dtypes.bfloat16) ) precise_embeddings = ( tf.cast(embeddings, tf.dtypes.float32) if convert_to_float32 else embeddings ) # Reshape label tensor to [batch_size, 1]. # lshape = tf.shape(labels) # labels = tf.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix distance_metric = self.distance_metric if distance_metric == "L2": pdist_matrix = metric_learning.pairwise_distance( precise_embeddings, squared=False ) elif distance_metric == "squared-L2": pdist_matrix = metric_learning.pairwise_distance( precise_embeddings, squared=True ) elif distance_metric == "angular": pdist_matrix = metric_learning.angular_distance(precise_embeddings) else: pdist_matrix = distance_metric(precise_embeddings) # Fetch pairwise labels as adjacency matrix. adjacency = self.response_diffs(labels) # Invert so we can select negatives only. adjacency_not = tf.math.logical_not(adjacency) adjacency = tf.cast(adjacency, dtype=tf.dtypes.float32) adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32) hard_negatives = _masked_minimum(pdist_matrix, adjacency_not) batch_size = tf.size(labels) mask_positives = ( tf.cast(adjacency, dtype=tf.dtypes.float32) - tf.linalg.diag(tf.ones([batch_size])) ) # hard positives: largest D_ap. hard_positives = _masked_maximum(pdist_matrix, mask_positives) if self.soft: triplet_loss = tf.math.log1p( tf.math.exp(hard_positives - hard_negatives)) else: triplet_loss = tf.maximum( hard_positives - hard_negatives + self.margin, 0.0 ) # Get final mean triplet loss triplet_loss = tf.reduce_mean(triplet_loss) if convert_to_float32: return tf.cast(triplet_loss, embeddings.dtype) else: return triplet_loss
def triplet_semihard_loss( y_true: TensorLike, y_pred: TensorLike, margin: FloatTensorLike = 1.0, distance_metric: Union[str, Callable] = "L2", ) -> tf.Tensor: """Computes the triplet loss with semi-hard negative mining. Args: y_true: 1-D integer `Tensor` with shape [batch_size] of multiclass integer labels. y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should be l2 normalized. margin: Float, margin term in the loss definition. distance_metric: str or function, determines distance metric: "L2" for l2-norm distance "squared-L2" for squared l2-norm distance "angular" for cosine similarity A custom function returning a 2d adjacency matrix of a chosen distance metric can also be passed here. e.g. def custom_distance(batch): batch = 1 - batch @ batch.T return batch triplet_semihard_loss(batch, labels, distance_metric=custom_distance ) Returns: triplet_loss: float scalar with dtype of y_pred. """ labels, embeddings = y_true, y_pred convert_to_float32 = (embeddings.dtype == tf.dtypes.float16 or embeddings.dtype == tf.dtypes.bfloat16) precise_embeddings = (tf.cast(embeddings, tf.dtypes.float32) if convert_to_float32 else embeddings) # Reshape label tensor to [batch_size, 1]. lshape = tf.shape(labels) labels = tf.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix if distance_metric == "L2": pdist_matrix = metric_learning.pairwise_distance(precise_embeddings, squared=False) elif distance_metric == "squared-L2": pdist_matrix = metric_learning.pairwise_distance(precise_embeddings, squared=True) elif distance_metric == "angular": pdist_matrix = metric_learning.angular_distance(precise_embeddings) else: pdist_matrix = distance_metric(precise_embeddings) # Build pairwise binary adjacency matrix. adjacency = tf.math.equal(labels, tf.transpose(labels)) # Invert so we can select negatives only. adjacency_not = tf.math.logical_not(adjacency) batch_size = tf.size(labels) # Compute the mask. pdist_matrix_tile = tf.tile(pdist_matrix, [batch_size, 1]) mask = tf.math.logical_and( tf.tile(adjacency_not, [batch_size, 1]), tf.math.greater(pdist_matrix_tile, tf.reshape(tf.transpose(pdist_matrix), [-1, 1])), ) mask_final = tf.reshape( tf.math.greater( tf.math.reduce_sum(tf.cast(mask, dtype=tf.dtypes.float32), 1, keepdims=True), 0.0, ), [batch_size, batch_size], ) mask_final = tf.transpose(mask_final) adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32) mask = tf.cast(mask, dtype=tf.dtypes.float32) # negatives_outside: smallest D_an where D_an > D_ap. negatives_outside = tf.reshape(_masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size]) negatives_outside = tf.transpose(negatives_outside) # negatives_inside: largest D_an. negatives_inside = tf.tile(_masked_maximum(pdist_matrix, adjacency_not), [1, batch_size]) semi_hard_negatives = tf.where(mask_final, negatives_outside, negatives_inside) loss_mat = tf.math.add(margin, pdist_matrix - semi_hard_negatives) mask_positives = tf.cast(adjacency, dtype=tf.dtypes.float32) - tf.linalg.diag( tf.ones([batch_size])) # In lifted-struct, the authors multiply 0.5 for upper triangular # in semihard, they take all positive pairs except the diagonal. num_positives = tf.math.reduce_sum(mask_positives) triplet_loss = tf.math.truediv( tf.math.reduce_sum( tf.math.maximum(tf.math.multiply(loss_mat, mask_positives), 0.0)), num_positives, ) if convert_to_float32: return tf.cast(triplet_loss, embeddings.dtype) else: return triplet_loss
def call(self, y_true, y_pred): from tensorflow_addons.losses import metric_learning self.sd.update_state(y_true, y_pred) labels = tf.cast( tf.convert_to_tensor(y_true, name="labels"), dtype=tf.dtypes.float32 ) if len(labels.shape) == 1: labels = tf.reshape(labels, (1, -1)) batch_size = tf.shape(labels)[0] embeddings = tf.convert_to_tensor(y_pred, name="embeddings") convert_to_float32 = ( (embeddings.dtype == tf.dtypes.float16) or (embeddings.dtype == tf.dtypes.bfloat16) ) precise_embeddings = ( tf.cast(embeddings, tf.dtypes.float32) if convert_to_float32 else embeddings ) # Reshape label tensor to [batch_size, 1]. # lshape = tf.shape(labels) # labels = tf.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix distance_metric = self.distance_metric if distance_metric == "L2": pdist_matrix = metric_learning.pairwise_distance( precise_embeddings, squared=False ) elif distance_metric == "squared-L2": pdist_matrix = metric_learning.pairwise_distance( precise_embeddings, squared=True ) elif distance_metric == "angular": pdist_matrix = metric_learning.angular_distance(precise_embeddings) else: pdist_matrix = distance_metric(precise_embeddings) # Fetch pairwise labels as adjacency matrix. adjacency = self.response_diffs(labels) # Invert so we can select negatives only. adjacency_not = tf.math.logical_not(adjacency) radii = ( tf.reduce_mean(pdist_matrix, axis=1) - (tf.math.reduce_std(pdist_matrix, axis=1) / 2.) ) neighbors = tf.math.less(pdist_matrix, tf.reshape(radii, (-1, 1))) hits = ( tf.cast( tf.math.logical_and(neighbors, adjacency), tf.dtypes.float32 ) - tf.linalg.diag(tf.ones([batch_size])) ) misses = tf.cast( tf.math.logical_and(neighbors, adjacency_not), tf.dtypes.float32 ) nhits = tf.reduce_sum(hits) nmisses = tf.reduce_sum(misses) n = tf.cast(batch_size, tf.dtypes.float32) hits_dists = tf.multiply(pdist_matrix, hits) hits_dists = tf.math.divide_no_nan( hits_dists, tf.math.multiply(n, nhits) ) misses_dists = tf.multiply(pdist_matrix, misses) misses_dists = tf.math.divide_no_nan( misses_dists, tf.math.multiply(n, nmisses) ) loss = tf.subtract(misses_dists, hits_dists) loss = tf.reduce_sum(loss, axis=1) if convert_to_float32: return tf.cast(loss, embeddings.dtype) else: return loss
def triplet_semihard_loss(y_true, y_pred, margin=1.0): """Computes the triplet loss with semi-hard negative mining. Args: y_true: 1-D integer `Tensor` with shape [batch_size] of multiclass integer labels. y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should be l2 normalized. margin: Float, margin term in the loss definition. """ labels, embeddings = y_true, y_pred # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor. lshape = tf.shape(labels) assert lshape.shape == 1 labels = tf.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix. pdist_matrix = metric_learning.pairwise_distance(embeddings, squared=True) # Build pairwise binary adjacency matrix. adjacency = tf.math.equal(labels, tf.transpose(labels)) # Invert so we can select negatives only. adjacency_not = tf.math.logical_not(adjacency) batch_size = tf.size(labels) # Compute the mask. pdist_matrix_tile = tf.tile(pdist_matrix, [batch_size, 1]) mask = tf.math.logical_and( tf.tile(adjacency_not, [batch_size, 1]), tf.math.greater(pdist_matrix_tile, tf.reshape(tf.transpose(pdist_matrix), [-1, 1]))) mask_final = tf.reshape( tf.math.greater( tf.math.reduce_sum(tf.cast(mask, dtype=tf.dtypes.float32), 1, keepdims=True), 0.0), [batch_size, batch_size]) mask_final = tf.transpose(mask_final) adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32) mask = tf.cast(mask, dtype=tf.dtypes.float32) # negatives_outside: smallest D_an where D_an > D_ap. negatives_outside = tf.reshape(_masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size]) negatives_outside = tf.transpose(negatives_outside) # negatives_inside: largest D_an. negatives_inside = tf.tile(_masked_maximum(pdist_matrix, adjacency_not), [1, batch_size]) semi_hard_negatives = tf.where(mask_final, negatives_outside, negatives_inside) loss_mat = tf.math.add(margin, pdist_matrix - semi_hard_negatives) mask_positives = tf.cast(adjacency, dtype=tf.dtypes.float32) - tf.linalg.diag( tf.ones([batch_size])) # In lifted-struct, the authors multiply 0.5 for upper triangular # in semihard, they take all positive pairs except the diagonal. num_positives = tf.math.reduce_sum(mask_positives) triplet_loss = tf.math.truediv( tf.math.reduce_sum( tf.math.maximum(tf.math.multiply(loss_mat, mask_positives), 0.0)), num_positives) return triplet_loss
def triplet_hard_loss( y_true: TensorLike, y_pred: TensorLike, margin: FloatTensorLike = 1.0, soft: bool = False, distance_metric: Union[str, Callable] = "L2", ) -> tf.Tensor: """Computes the triplet loss with hard negative and hard positive mining. Args: y_true: 1-D integer `Tensor` with shape [batch_size] of multiclass integer labels. y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should be l2 normalized. margin: Float, margin term in the loss definition. soft: Boolean, if set, use the soft margin version. distance_metric: str or function, determines distance metric: "L2" for l2-norm distance "squared-L2" for squared l2-norm distance "angular" for cosine similarity A custom function returning a 2d adjacency matrix of a chosen distance metric can also be passed here. e.g. def custom_distance(batch): batch = 1 - batch @ batch.T return batch triplet_semihard_loss(batch, labels, distance_metric=custom_distance ) Returns: triplet_loss: float scalar with dtype of y_pred. """ labels, embeddings = y_true, y_pred convert_to_float32 = (embeddings.dtype == tf.dtypes.float16 or embeddings.dtype == tf.dtypes.bfloat16) precise_embeddings = (tf.cast(embeddings, tf.dtypes.float32) if convert_to_float32 else embeddings) # Reshape label tensor to [batch_size, 1]. lshape = tf.shape(labels) labels = tf.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix. if distance_metric == "L2": pdist_matrix = metric_learning.pairwise_distance(precise_embeddings, squared=False) elif distance_metric == "squared-L2": pdist_matrix = metric_learning.pairwise_distance(precise_embeddings, squared=True) elif distance_metric == "angular": pdist_matrix = metric_learning.angular_distance(precise_embeddings) else: pdist_matrix = distance_metric(precise_embeddings) # Build pairwise binary adjacency matrix. adjacency = tf.math.equal(labels, tf.transpose(labels)) # Invert so we can select negatives only. adjacency_not = tf.math.logical_not(adjacency) adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32) # hard negatives: smallest D_an. hard_negatives = _masked_minimum(pdist_matrix, adjacency_not) batch_size = tf.size(labels) adjacency = tf.cast(adjacency, dtype=tf.dtypes.float32) mask_positives = tf.cast(adjacency, dtype=tf.dtypes.float32) - tf.linalg.diag( tf.ones([batch_size])) # hard positives: largest D_ap. hard_positives = _masked_maximum(pdist_matrix, mask_positives) if soft: triplet_loss = tf.math.log1p( tf.math.exp(hard_positives - hard_negatives)) else: triplet_loss = tf.maximum(hard_positives - hard_negatives + margin, 0.0) # Get final mean triplet loss triplet_loss = tf.reduce_mean(triplet_loss) if convert_to_float32: return tf.cast(triplet_loss, embeddings.dtype) else: return triplet_loss
def test_zero_distance(): """Test that equal embeddings have a pairwise distance of 0.""" equal_embeddings = tf.constant([[1.0, 0.5], [1.0, 0.5]]) distances = pairwise_distance(equal_embeddings, squared=False) np.testing.assert_allclose(tf.math.reduce_sum(distances), 0, 1e-6, 1e-6)
def call(self, inputs, **kwargs): x_source, x_driving = inputs kp_source_value, kp_source_jacobian = self.kp_extractor(x_source) kp_driving_value, kp_driving_jacobian = self.kp_extractor(x_driving) generated = {} kp_driving_jacobian_inv = tf.linalg.inv(kp_driving_jacobian) generated_prediction = self.generator( (x_source, kp_driving_value, kp_driving_jacobian_inv, kp_source_value, kp_source_jacobian)) generated.update({ 'kp_source_value': kp_source_value, 'kp_driving_value': kp_driving_value, 'prediction': generated_prediction, }) loss_values = {} pyramide_real = self.pyramid(x_driving) pyramide_generated = self.pyramid(generated_prediction) # kp detector normalize loss if self.use_kp_loss: kp_source_loss = 0. kp_driving_loss = 0. kp_loss_koef = 0.7 for kp in kp_source_value: distances = metric_learning.pairwise_distance(kp) v, idx = tf.nn.top_k(-distances, 2) mins = -v[:, 1] # 10 # tf.print(mins) kp_source_loss += tf.reduce_sum(kp_loss_koef - mins) for kp in kp_driving_value: distances = metric_learning.pairwise_distance(kp) v, idx = tf.nn.top_k(-distances, 2) mins = -v[:, 1] # 10 # tf.print(mins) kp_driving_loss += tf.reduce_sum(kp_loss_koef - mins) kp_loss = (kp_source_loss + kp_driving_loss) / self.bs loss_values['kp_loss'] = kp_loss * self.kp_loss_weight if sum(self.loss_weights['perceptual']) != 0: value_total = 0 for scale in self.scales: x_vgg = self.vgg(pyramide_generated['prediction_' + str(scale)]) y_vgg = self.vgg(pyramide_real['prediction_' + str(scale)]) for i, weight in enumerate(self.loss_weights['perceptual']): value = tf.reduce_mean( tf.abs(x_vgg[i] - tf.stop_gradient(y_vgg[i]))) value_total += self.loss_weights['perceptual'][i] * value loss_values['perceptual'] = value_total if self.loss_weights['generator_gan'] != 0: discriminator_maps_generated = self.discriminator( (pyramide_generated, tf.stop_gradient(kp_driving_value))) discriminator_maps_real = self.discriminator( (pyramide_real, tf.stop_gradient(kp_driving_value))) value_total = 0 for scale in self.disc_scales: key = f'prediction_map_{scale}' value = tf.reduce_mean( (1 - discriminator_maps_generated[key])**2) value_total += self.loss_weights['generator_gan'] * value loss_values['gen_gan'] = value_total if sum(self.loss_weights['feature_matching']) != 0: value_total = 0 for scale in self.disc_scales: key = f'feature_maps_{scale}' for i, (a, b) in enumerate( zip(discriminator_maps_real[key], discriminator_maps_generated[key])): if self.loss_weights['feature_matching'][i] == 0: continue value = tf.reduce_mean(tf.abs(a - b)) value_total += self.loss_weights['feature_matching'][ i] * value loss_values['feature_matching'] = value_total if (self.loss_weights['equivariance_value'] + self.loss_weights['equivariance_jacobian']) != 0: # if self.transform is None: # self.transform = Transform(x_driving.shape[0], **self.train_params['transform_params']) transform = Transform(self.train_params['batch_size'], **self.train_params['transform_params']) transformed_frame = transform.transform_frame(x_driving) transformed_kp_value, transformed_kp_jacobian = self.kp_extractor( transformed_frame) # generated['transformed_frame'] = transformed_frame # generated['transformed_kp_value'] = transformed_kp_value # generated['transformed_kp_jacobian'] = transformed_kp_jacobian # Value loss part if self.loss_weights['equivariance_value'] != 0: value = tf.reduce_mean( tf.abs(kp_driving_value - transform.warp_coordinates(transformed_kp_value))) loss_values['equivariance_value'] = self.loss_weights[ 'equivariance_value'] * value # jacobian loss part if self.loss_weights['equivariance_jacobian'] != 0: jacobian_transformed = tf.matmul( transform.jacobian(transformed_kp_value, self.grad_tape), transformed_kp_jacobian) normed_driving = tf.linalg.inv(kp_driving_jacobian) normed_transformed = jacobian_transformed value = tf.matmul(normed_driving, normed_transformed) eye = tf.reshape(tf.eye(2), [1, 1, 2, 2]) value = tf.reduce_mean(tf.abs(eye - value)) loss_values['equivariance_jacobian'] = self.loss_weights[ 'equivariance_jacobian'] * value return loss_values, generated
def test_zero_distance(self): """Test that equal embeddings have a pairwise distance of 0.""" equal_embeddings = tf.constant([[1.0, 0.5], [1.0, 0.5]]) distances = pairwise_distance(equal_embeddings, squared=False) self.assertAllClose(tf.math.reduce_sum(distances), 0)
def triplet_semihard_loss( y_true: TensorLike, y_pred: TensorLike, margin: FloatTensorLike = 1.0, distance_metric: Union[str, Callable] = "L2", ) -> tf.Tensor: r"""Computes the triplet loss with semi-hard negative mining. Usage: >>> y_true = tf.convert_to_tensor([0, 0]) >>> y_pred = tf.convert_to_tensor([[0.0, 1.0], [1.0, 0.0]]) >>> tfa.losses.triplet_semihard_loss(y_true, y_pred, distance_metric="L2") <tf.Tensor: shape=(), dtype=float32, numpy=2.4142137> >>> # Calling with callable `distance_metric` >>> distance_metric = lambda x: tf.linalg.matmul(x, x, transpose_b=True) >>> tfa.losses.triplet_semihard_loss(y_true, y_pred, distance_metric=distance_metric) <tf.Tensor: shape=(), dtype=float32, numpy=1.0> Args: y_true: 1-D integer `Tensor` with shape `[batch_size]` of multiclass integer labels. y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should be l2 normalized. margin: Float, margin term in the loss definition. distance_metric: `str` or a `Callable` that determines distance metric. Valid strings are "L2" for l2-norm distance, "squared-L2" for squared l2-norm distance, and "angular" for cosine similarity. A `Callable` should take a batch of embeddings as input and return the pairwise distance matrix. Returns: triplet_loss: float scalar with dtype of `y_pred`. """ labels, embeddings = y_true, y_pred convert_to_float32 = (embeddings.dtype == tf.dtypes.float16 or embeddings.dtype == tf.dtypes.bfloat16) precise_embeddings = (tf.cast(embeddings, tf.dtypes.float32) if convert_to_float32 else embeddings) # Reshape label tensor to [batch_size, 1]. lshape = tf.shape(labels) labels = tf.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix if distance_metric == "L2": pdist_matrix = metric_learning.pairwise_distance(precise_embeddings, squared=False) elif distance_metric == "squared-L2": pdist_matrix = metric_learning.pairwise_distance(precise_embeddings, squared=True) elif distance_metric == "angular": pdist_matrix = metric_learning.angular_distance(precise_embeddings) else: pdist_matrix = distance_metric(precise_embeddings) # Build pairwise binary adjacency matrix. adjacency = tf.math.equal(labels, tf.transpose(labels)) # Invert so we can select negatives only. adjacency_not = tf.math.logical_not(adjacency) batch_size = tf.size(labels) # Compute the mask. pdist_matrix_tile = tf.tile(pdist_matrix, [batch_size, 1]) mask = tf.math.logical_and( tf.tile(adjacency_not, [batch_size, 1]), tf.math.greater(pdist_matrix_tile, tf.reshape(tf.transpose(pdist_matrix), [-1, 1])), ) mask_final = tf.reshape( tf.math.greater( tf.math.reduce_sum(tf.cast(mask, dtype=tf.dtypes.float32), 1, keepdims=True), 0.0, ), [batch_size, batch_size], ) mask_final = tf.transpose(mask_final) adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32) mask = tf.cast(mask, dtype=tf.dtypes.float32) # negatives_outside: smallest D_an where D_an > D_ap. negatives_outside = tf.reshape(_masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size]) negatives_outside = tf.transpose(negatives_outside) # negatives_inside: largest D_an. negatives_inside = tf.tile(_masked_maximum(pdist_matrix, adjacency_not), [1, batch_size]) semi_hard_negatives = tf.where(mask_final, negatives_outside, negatives_inside) loss_mat = tf.math.add(margin, pdist_matrix - semi_hard_negatives) mask_positives = tf.cast(adjacency, dtype=tf.dtypes.float32) - tf.linalg.diag( tf.ones([batch_size])) # In lifted-struct, the authors multiply 0.5 for upper triangular # in semihard, they take all positive pairs except the diagonal. num_positives = tf.math.reduce_sum(mask_positives) triplet_loss = tf.math.truediv( tf.math.reduce_sum( tf.math.maximum(tf.math.multiply(loss_mat, mask_positives), 0.0)), num_positives, ) if convert_to_float32: return tf.cast(triplet_loss, embeddings.dtype) else: return triplet_loss
def triplet_hard_loss( y_true: TensorLike, y_pred: TensorLike, margin: FloatTensorLike = 1.0, soft: bool = False, distance_metric: Union[str, Callable] = "L2", ) -> tf.Tensor: r"""Computes the triplet loss with hard negative and hard positive mining. Usage: >>> y_true = tf.convert_to_tensor([0, 0]) >>> y_pred = tf.convert_to_tensor([[0.0, 1.0], [1.0, 0.0]]) >>> tfa.losses.triplet_hard_loss(y_true, y_pred, distance_metric="L2") <tf.Tensor: shape=(), dtype=float32, numpy=1.0> >>> # Calling with callable `distance_metric` >>> distance_metric = lambda x: tf.linalg.matmul(x, x, transpose_b=True) >>> tfa.losses.triplet_hard_loss(y_true, y_pred, distance_metric=distance_metric) <tf.Tensor: shape=(), dtype=float32, numpy=0.0> Args: y_true: 1-D integer `Tensor` with shape `[batch_size]` of multiclass integer labels. y_pred: 2-D float `Tensor` of embedding vectors. Embeddings should be l2 normalized. margin: Float, margin term in the loss definition. soft: Boolean, if set, use the soft margin version. distance_metric: `str` or a `Callable` that determines distance metric. Valid strings are "L2" for l2-norm distance, "squared-L2" for squared l2-norm distance, and "angular" for cosine similarity. A `Callable` should take a batch of embeddings as input and return the pairwise distance matrix. Returns: triplet_loss: float scalar with dtype of `y_pred`. """ labels, embeddings = y_true, y_pred convert_to_float32 = (embeddings.dtype == tf.dtypes.float16 or embeddings.dtype == tf.dtypes.bfloat16) precise_embeddings = (tf.cast(embeddings, tf.dtypes.float32) if convert_to_float32 else embeddings) # Reshape label tensor to [batch_size, 1]. lshape = tf.shape(labels) labels = tf.reshape(labels, [lshape[0], 1]) # Build pairwise squared distance matrix. if distance_metric == "L2": pdist_matrix = metric_learning.pairwise_distance(precise_embeddings, squared=False) elif distance_metric == "squared-L2": pdist_matrix = metric_learning.pairwise_distance(precise_embeddings, squared=True) elif distance_metric == "angular": pdist_matrix = metric_learning.angular_distance(precise_embeddings) else: pdist_matrix = distance_metric(precise_embeddings) # Build pairwise binary adjacency matrix. adjacency = tf.math.equal(labels, tf.transpose(labels)) # Invert so we can select negatives only. adjacency_not = tf.math.logical_not(adjacency) adjacency_not = tf.cast(adjacency_not, dtype=tf.dtypes.float32) # hard negatives: smallest D_an. hard_negatives = _masked_minimum(pdist_matrix, adjacency_not) batch_size = tf.size(labels) adjacency = tf.cast(adjacency, dtype=tf.dtypes.float32) mask_positives = tf.cast(adjacency, dtype=tf.dtypes.float32) - tf.linalg.diag( tf.ones([batch_size])) # hard positives: largest D_ap. hard_positives = _masked_maximum(pdist_matrix, mask_positives) if soft: triplet_loss = tf.math.log1p( tf.math.exp(hard_positives - hard_negatives)) else: triplet_loss = tf.maximum(hard_positives - hard_negatives + margin, 0.0) # Get final mean triplet loss triplet_loss = tf.reduce_mean(triplet_loss) if convert_to_float32: return tf.cast(triplet_loss, embeddings.dtype) else: return triplet_loss