Пример #1
0
    def test_masked_sum(self):
        tf.reset_default_graph()

        data = tf.placeholder(tf.float32, shape=[None, None])
        mask = tf.placeholder(tf.float32, shape=[None, None])
        masked_sums = utils.masked_sum(data, mask)

        with self.test_session() as sess:
            result = sess.run(masked_sums,
                              feed_dict={
                                  data: [[1, 2, 3], [4, 5, 6]],
                                  mask: [[1, 0, 1], [0, 1, 0]]
                              })
            self.assertAllClose(result, [[4], [5]])

            result = sess.run(masked_sums,
                              feed_dict={
                                  data: [[1, 2, 3], [4, 5, 6]],
                                  mask: [[0, 1, 0], [1, 0, 1]]
                              })
            self.assertAllClose(result, [[2], [10]])
Пример #2
0
    def _build_latent_network(self,
                              num_proposals,
                              proposal_features,
                              num_classes=20,
                              num_latent_factors=20,
                              proba_h_given_c=None,
                              proba_h_use_sigmoid=False):
        """Builds the Multiple Instance Detection Network.

    MIDN: An attention network.

    Args:
      num_proposals: A [batch] int tensor.
      proposal_features: A [batch, max_num_proposals, features_dims] 
        float tensor.
      num_classes: Number of classes.
      proba_h_given_c: A [num_latent_factors, num_classes] float tensor.

    Returns:
      logits: A [batch, num_classes] float tensor.
      proba_r_given_c: A [batch, max_num_proposals, num_classes] float tensor.
      proba_h_given_c: A [num_latent_factors, num_classes] float tensor.
    """
        if proba_h_given_c is not None:
            assert proba_h_given_c.get_shape()[0].value == num_latent_factors

        batch, max_num_proposals, _ = utils.get_tensor_shape(proposal_features)
        mask = tf.sequence_mask(num_proposals,
                                maxlen=max_num_proposals,
                                dtype=tf.float32)
        mask = tf.expand_dims(mask, axis=-1)

        # Calculates the values of following tensors:
        #   logits_c_given_r shape = [batch, max_num_proposals, num_classes].
        #   logits_r_given_h shape = [batch, max_num_proposals, num_latent_factors].
        #   logits_h_given_c shape = [num_latent_factors, num_classes].

        with tf.variable_scope('midn'):
            logits_c_given_r = slim.fully_connected(proposal_features,
                                                    num_outputs=num_classes,
                                                    activation_fn=None,
                                                    scope='proba_c_given_r')
            logits_r_given_h = slim.fully_connected(
                proposal_features,
                num_outputs=num_latent_factors,
                activation_fn=None,
                scope='proba_r_given_h')

            if proba_h_given_c is None:
                logits_h_given_c = slim.fully_connected(
                    tf.diag(tf.ones([num_classes])),
                    num_outputs=num_latent_factors,
                    activation_fn=None,
                    scope='proba_h_given_c')
                logits_h_given_c = tf.transpose(logits_h_given_c)
                tf.summary.histogram('logits_h_given_c', logits_h_given_c)

                if proba_h_use_sigmoid:
                    proba_h_given_c = tf.nn.sigmoid(logits_h_given_c)
                else:
                    proba_h_given_c = tf.nn.softmax(logits_h_given_c, axis=0)

        logits_r_given_c = tf.matmul(
            tf.reshape(logits_r_given_h, [-1, num_latent_factors]),
            proba_h_given_c)
        logits_r_given_c = tf.reshape(logits_r_given_c,
                                      [batch, max_num_proposals, num_classes])

        proba_r_given_c = utils.masked_softmax(data=logits_r_given_c,
                                               mask=mask,
                                               dim=1)
        proba_r_given_c = tf.multiply(mask, proba_r_given_c)

        # Aggregates the logits.

        class_logits = tf.multiply(logits_c_given_r, proba_r_given_c)
        class_logits = utils.masked_sum(data=class_logits, mask=mask, dim=1)

        proposal_scores = tf.multiply(tf.nn.sigmoid(class_logits),
                                      proba_r_given_c)
        #proposal_scores = tf.multiply(
        #    tf.nn.softmax(class_logits), proba_r_given_c)

        tf.summary.histogram('midn/logits_c_given_r', logits_c_given_r)
        tf.summary.histogram('midn/logits_r_given_h', logits_r_given_h)
        tf.summary.histogram('midn/class_logits', class_logits)

        return (tf.squeeze(class_logits, axis=1), proposal_scores,
                proba_r_given_c, proba_h_given_c)
Пример #3
0
    def _build_midn_network(self,
                            num_proposals,
                            proposal_features,
                            num_classes=20):
        """Builds the Multiple Instance Detection Network.

    MIDN: An attention network.

    Args:
      num_proposals: A [batch] int tensor.
      proposal_features: A [batch, max_num_proposals, features_dims] 
        float tensor.
      num_classes: Number of classes.

    Returns:
      logits: A [batch, num_classes] float tensor.
      proba_r_given_c: A [batch, max_num_proposals, num_classes] float tensor.
    """
        with tf.name_scope('multi_instance_detection'):

            batch, max_num_proposals, _ = utils.get_tensor_shape(
                proposal_features)
            mask = tf.sequence_mask(num_proposals,
                                    maxlen=max_num_proposals,
                                    dtype=tf.float32)
            mask = tf.expand_dims(mask, axis=-1)

            # Calculates the values of following tensors:
            #   logits_r_given_c shape = [batch, max_num_proposals, num_classes].
            #   logits_c_given_r shape = [batch, max_num_proposals, num_classes].

            with tf.variable_scope('midn'):
                logits_r_given_c = slim.fully_connected(
                    proposal_features,
                    num_outputs=num_classes,
                    activation_fn=None,
                    scope='proba_r_given_c')
                logits_c_given_r = slim.fully_connected(
                    proposal_features,
                    num_outputs=num_classes,
                    activation_fn=None,
                    scope='proba_c_given_r')

            # Calculates the detection scores.

            proba_r_given_c = utils.masked_softmax(data=tf.multiply(
                mask, logits_r_given_c),
                                                   mask=mask,
                                                   dim=1)
            proba_r_given_c = tf.multiply(mask, proba_r_given_c)

            # Aggregates the logits.

            class_logits = tf.multiply(logits_c_given_r, proba_r_given_c)
            class_logits = utils.masked_sum(data=class_logits,
                                            mask=mask,
                                            dim=1)

            proposal_scores = tf.multiply(tf.nn.sigmoid(class_logits),
                                          proba_r_given_c)
            #proposal_scores = tf.multiply(
            #    tf.nn.softmax(class_logits), proba_r_given_c)

            tf.summary.histogram('midn/logits_r_given_c', logits_r_given_c)
            tf.summary.histogram('midn/logits_c_given_r', logits_c_given_r)
            tf.summary.histogram('midn/proposal_scores', proposal_scores)
            tf.summary.histogram('midn/class_logits', class_logits)

        return tf.squeeze(class_logits,
                          axis=1), proposal_scores, proba_r_given_c