Exemplo n.º 1
0
    def _midn_loss_mine_hardest_negative(self, labels, losses):
        """Hardest negative mining of the MIDN loss.

    Args:
      labels: A [batch, num_classes] float tensor, where `1` denotes the 
        presence of a class.
      losses: A [batch, num_classes] float tensor, the losses predicted by  
        the model.

    Returns:
      mask: A [batch, num_classes] float tensor where `1` denotes the 
        selected entry.
    """
        batch, num_classes = utils.get_tensor_shape(labels)
        indices_0 = tf.range(batch, dtype=tf.int64)
        indices_1 = utils.masked_argmax(data=losses, mask=1.0 - labels, dim=1)
        indices = tf.stack([indices_0, indices_1], axis=-1)
        negative_masks = tf.sparse_to_dense(indices, [batch, num_classes],
                                            sparse_values=1.0)
        return tf.add(labels, negative_masks)
Exemplo n.º 2
0
    def _calc_oicr_loss(self,
                        labels,
                        num_proposals,
                        proposals,
                        scores_0,
                        scores_1,
                        scope,
                        iou_threshold=0.5):
        """Calculates the OICR loss at refinement stage `i`.

    Args:
      labels: A [batch, num_classes] float tensor.
      num_proposals: A [batch] int tensor.
      proposals: A [batch, max_num_proposals, 4] float tensor.
      scores_0: A [batch, max_num_proposal, 1 + num_classes] float tensor, 
        representing the proposal score at `k-th` refinement.
      scores_1: A [batch, max_num_proposal, 1 + num_classes] float tensor,
        representing the proposal score at `(k+1)-th` refinement.

    Returns:
      oicr_cross_entropy_loss: a scalar float tensor.
    """
        with tf.name_scope(scope):
            (batch, max_num_proposals,
             num_classes_plus_one) = utils.get_tensor_shape(scores_0)
            num_classes = num_classes_plus_one - 1

            # For each class, look for the most confident proposal.
            #   proposal_ind shape = [batch, num_classes].

            proposal_mask = tf.sequence_mask(num_proposals,
                                             maxlen=max_num_proposals,
                                             dtype=tf.float32)
            proposal_ind = utils.masked_argmax(tf.nn.softmax(scores_0,
                                                             axis=-1)[:, :,
                                                                      1:],
                                               tf.expand_dims(proposal_mask,
                                                              axis=-1),
                                               dim=1)

            # Deal with the most confident proposal per each class.
            #   Unstack the `proposal_ind`, `labels`.
            #   proposal_labels shape = [batch, max_num_proposals, num_classes].

            proposal_labels = []
            indices_0 = tf.range(batch, dtype=tf.int64)
            for indices_1, label_per_class in zip(
                    tf.unstack(proposal_ind, axis=-1),
                    tf.unstack(labels, axis=-1)):

                # Gather the most confident proposal for the class.
                #   confident_proosal shape = [batch, 4].

                indices = tf.stack([indices_0, indices_1], axis=-1)
                confident_proposal = tf.gather_nd(proposals, indices)

                # Get the Iou from all the proposals to the most confident proposal.
                #   iou shape = [batch, max_num_proposals].

                confident_proposal_tiled = tf.tile(
                    tf.expand_dims(confident_proposal, axis=1),
                    [1, max_num_proposals, 1])
                iou = box_utils.iou(
                    tf.reshape(proposals, [-1, 4]),
                    tf.reshape(confident_proposal_tiled, [-1, 4]))
                iou = tf.reshape(iou, [batch, max_num_proposals])

                # Filter out irrelevant predictions using image-level label.

                target = tf.to_float(tf.greater_equal(iou, iou_threshold))
                target = tf.where(label_per_class > 0,
                                  x=target,
                                  y=tf.zeros_like(target))
                proposal_labels.append(target)

            proposal_labels = tf.stack(proposal_labels, axis=-1)

            # Add background targets, and normalize the sum value to 1.0.
            #   proposal_labels shape = [batch, max_num_proposals, 1 + num_classes].

            bkg = tf.logical_not(tf.reduce_sum(proposal_labels, axis=-1) > 0)
            proposal_labels = tf.concat(
                [tf.expand_dims(tf.to_float(bkg), axis=-1), proposal_labels],
                axis=-1)

            proposal_labels = tf.div(
                proposal_labels,
                tf.reduce_sum(proposal_labels, axis=-1, keepdims=True))

            assert_op = tf.Assert(
                tf.reduce_all(
                    tf.abs(tf.reduce_sum(proposal_labels, axis=-1) -
                           1) < 1e-6),
                ["Probabilities not sum to ONE", proposal_labels])

            # Compute the loss.

            with tf.control_dependencies([assert_op]):
                losses = tf.nn.softmax_cross_entropy_with_logits(
                    labels=tf.stop_gradient(proposal_labels), logits=scores_1)
                oicr_cross_entropy_loss = tf.reduce_mean(
                    utils.masked_avg(data=losses, mask=proposal_mask, dim=1))

        return oicr_cross_entropy_loss