def _midn_loss_mine_hardest_negative(self, labels, losses): """Hardest negative mining of the MIDN loss. Args: labels: A [batch, num_classes] float tensor, where `1` denotes the presence of a class. losses: A [batch, num_classes] float tensor, the losses predicted by the model. Returns: mask: A [batch, num_classes] float tensor where `1` denotes the selected entry. """ batch, num_classes = utils.get_tensor_shape(labels) indices_0 = tf.range(batch, dtype=tf.int64) indices_1 = utils.masked_argmax(data=losses, mask=1.0 - labels, dim=1) indices = tf.stack([indices_0, indices_1], axis=-1) negative_masks = tf.sparse_to_dense(indices, [batch, num_classes], sparse_values=1.0) return tf.add(labels, negative_masks)
def _calc_oicr_loss(self, labels, num_proposals, proposals, scores_0, scores_1, scope, iou_threshold=0.5): """Calculates the OICR loss at refinement stage `i`. Args: labels: A [batch, num_classes] float tensor. num_proposals: A [batch] int tensor. proposals: A [batch, max_num_proposals, 4] float tensor. scores_0: A [batch, max_num_proposal, 1 + num_classes] float tensor, representing the proposal score at `k-th` refinement. scores_1: A [batch, max_num_proposal, 1 + num_classes] float tensor, representing the proposal score at `(k+1)-th` refinement. Returns: oicr_cross_entropy_loss: a scalar float tensor. """ with tf.name_scope(scope): (batch, max_num_proposals, num_classes_plus_one) = utils.get_tensor_shape(scores_0) num_classes = num_classes_plus_one - 1 # For each class, look for the most confident proposal. # proposal_ind shape = [batch, num_classes]. proposal_mask = tf.sequence_mask(num_proposals, maxlen=max_num_proposals, dtype=tf.float32) proposal_ind = utils.masked_argmax(tf.nn.softmax(scores_0, axis=-1)[:, :, 1:], tf.expand_dims(proposal_mask, axis=-1), dim=1) # Deal with the most confident proposal per each class. # Unstack the `proposal_ind`, `labels`. # proposal_labels shape = [batch, max_num_proposals, num_classes]. proposal_labels = [] indices_0 = tf.range(batch, dtype=tf.int64) for indices_1, label_per_class in zip( tf.unstack(proposal_ind, axis=-1), tf.unstack(labels, axis=-1)): # Gather the most confident proposal for the class. # confident_proosal shape = [batch, 4]. indices = tf.stack([indices_0, indices_1], axis=-1) confident_proposal = tf.gather_nd(proposals, indices) # Get the Iou from all the proposals to the most confident proposal. # iou shape = [batch, max_num_proposals]. confident_proposal_tiled = tf.tile( tf.expand_dims(confident_proposal, axis=1), [1, max_num_proposals, 1]) iou = box_utils.iou( tf.reshape(proposals, [-1, 4]), tf.reshape(confident_proposal_tiled, [-1, 4])) iou = tf.reshape(iou, [batch, max_num_proposals]) # Filter out irrelevant predictions using image-level label. target = tf.to_float(tf.greater_equal(iou, iou_threshold)) target = tf.where(label_per_class > 0, x=target, y=tf.zeros_like(target)) proposal_labels.append(target) proposal_labels = tf.stack(proposal_labels, axis=-1) # Add background targets, and normalize the sum value to 1.0. # proposal_labels shape = [batch, max_num_proposals, 1 + num_classes]. bkg = tf.logical_not(tf.reduce_sum(proposal_labels, axis=-1) > 0) proposal_labels = tf.concat( [tf.expand_dims(tf.to_float(bkg), axis=-1), proposal_labels], axis=-1) proposal_labels = tf.div( proposal_labels, tf.reduce_sum(proposal_labels, axis=-1, keepdims=True)) assert_op = tf.Assert( tf.reduce_all( tf.abs(tf.reduce_sum(proposal_labels, axis=-1) - 1) < 1e-6), ["Probabilities not sum to ONE", proposal_labels]) # Compute the loss. with tf.control_dependencies([assert_op]): losses = tf.nn.softmax_cross_entropy_with_logits( labels=tf.stop_gradient(proposal_labels), logits=scores_1) oicr_cross_entropy_loss = tf.reduce_mean( utils.masked_avg(data=losses, mask=proposal_mask, dim=1)) return oicr_cross_entropy_loss