示例#1
0
 def test_ffnn(self):
     with tf.Graph().as_default():
         input_emb = tf.random_uniform([3, 5, 8])
         output_emb = common_layers.ffnn(input_emb=input_emb,
                                         hidden_sizes=[7, 9],
                                         dropout_ratio=0.2,
                                         mode=tf.estimator.ModeKeys.TRAIN)
         with tf.Session() as sess:
             sess.run(tf.compat.v1.global_variables_initializer())
             actual_output_emb = sess.run(output_emb)
         self.assertAllEqual(actual_output_emb.shape, [3, 5, 9])
def decomposable_attention(emb1,
                           len1,
                           emb2,
                           len2,
                           hidden_size,
                           hidden_layers,
                           dropout_ratio,
                           mode,
                           epsilon=1e-8):
    """See https://arxiv.org/abs/1606.01933.

  Args:
    emb1: A Tensor with shape [batch_size, max_len1, emb_size] representing the
        first input sequence.
    len1: A Tensor with shape [batch_size], indicating the true sequence length
        of `emb1`. This is required due to padding.
    emb2: A Tensor with shape [batch_size, max_len2, emb_size] representing the
        second input sequence.
    len2: A Tensor with shape [batch_size], indicating the true sequence length
        of `emb1`. This is required due to padding.
    hidden_size: An integer indicating the size of each hidden layer in the
        feed-forward neural networks.
    hidden_layers: An integer indicating the number of hidden layers in the
        feed-forward neural networks.
    dropout_ratio: The probability of dropping out each unit in the activation.
        This can be None, and is only applied during training.
    mode: One of the keys from tf.estimator.ModeKeys.
    epsilon: A small positive constant to add to masks for numerical stability.

  Returns:
    final_emb: A Tensor with shape [batch_size, hidden_size].
  """
    # [batch_size, maxlen1]
    mask1 = tf.sequence_mask(len1,
                             tensor_utils.shape(emb1, 1),
                             dtype=tf.float32)

    # [batch_size, maxlen2]
    mask2 = tf.sequence_mask(len2,
                             tensor_utils.shape(emb2, 1),
                             dtype=tf.float32)

    with tf.variable_scope("attend"):
        projected_emb1 = common_layers.ffnn(emb1,
                                            [hidden_size] * hidden_layers,
                                            dropout_ratio, mode)
    with tf.variable_scope("attend", reuse=True):
        projected_emb2 = common_layers.ffnn(emb2,
                                            [hidden_size] * hidden_layers,
                                            dropout_ratio, mode)

    # [batch_size, maxlen1, maxlen2]
    attention_scores = tf.matmul(projected_emb1,
                                 projected_emb2,
                                 transpose_b=True)
    attention_weights1 = tf.nn.softmax(
        attention_scores + tf.log(tf.expand_dims(mask2, 1) + epsilon), 2)
    attention_weights2 = tf.nn.softmax(
        attention_scores + tf.log(tf.expand_dims(mask1, 2) + epsilon), 1)

    # [batch_size, maxlen1, emb_size]
    attended_emb1 = tf.matmul(attention_weights1, emb2)

    # [batch_size, maxlen2, emb_size]
    attended_emb2 = tf.matmul(attention_weights2, emb1, transpose_a=True)

    with tf.variable_scope("compare"):
        compared_emb1 = common_layers.ffnn(
            tf.concat([emb1, attended_emb1], -1),
            [hidden_size] * hidden_layers, dropout_ratio, mode)
    with tf.variable_scope("compare", reuse=True):
        compared_emb2 = common_layers.ffnn(
            tf.concat([emb2, attended_emb2], -1),
            [hidden_size] * hidden_layers, dropout_ratio, mode)

    compared_emb1 *= tf.expand_dims(mask1, -1)
    compared_emb2 *= tf.expand_dims(mask2, -1)

    # [batch_size, hidden_size]
    aggregated_emb1 = tf.reduce_sum(compared_emb1, 1)
    aggregated_emb2 = tf.reduce_sum(compared_emb2, 1)
    with tf.variable_scope("aggregate"):
        final_emb = common_layers.ffnn(
            tf.concat([aggregated_emb1, aggregated_emb2], -1),
            [hidden_size] * hidden_layers, dropout_ratio, mode)
    return final_emb