Exemplo n.º 1
0
def attention_matching_layer(seq1,
                             seq1_length,
                             seq2,
                             seq2_length,
                             attn_type='diagonal_bilinear',
                             scaled=True,
                             with_sentinel=False):
    """Encodes seq1 conditioned on seq2, e.g., using word-by-word attention."""
    if attn_type == 'bilinear':
        _, _, attn_states = attention.bilinear_attention(
            seq1, seq2, seq2_length, scaled, with_sentinel)
    elif attn_type == 'dot':
        _, _, attn_states = attention.dot_attention(seq1, seq2, seq2_length,
                                                    scaled, with_sentinel)
    elif attn_type == 'diagonal_bilinear':
        _, _, attn_states = attention.diagonal_bilinear_attention(
            seq1, seq2, seq2_length, scaled, with_sentinel)
    elif attn_type == 'mlp':
        _, _, attn_states = attention.mlp_attention(seq1.get_shape()[-1].value,
                                                    tf.nn.relu, seq1, seq2,
                                                    seq2_length, with_sentinel)
    else:
        raise ValueError("Unknown attention type: %s" % attn_type)

    return attn_states
Exemplo n.º 2
0
def self_attention(inputs,
                   lengths,
                   attn_type='bilinear',
                   scaled=True,
                   repr_dim=None,
                   activation=None,
                   with_sentinel=False,
                   name='self_attention',
                   reuse=False):
    with tf.variable_scope(name, reuse):
        if attn_type == 'bilinear':
            attn_states = attention.bilinear_attention(inputs, inputs, lengths,
                                                       scaled,
                                                       with_sentinel)[2]
        elif attn_type == 'dot':
            attn_states = attention.dot_attention(inputs, inputs, lengths,
                                                  scaled, with_sentinel)[2]
        elif attn_type == 'diagonal_bilinear':
            attn_states = attention.diagonal_bilinear_attention(
                inputs, inputs, lengths, scaled, with_sentinel)[2]
        elif attn_type == 'mlp':
            attn_states = attention.mlp_attention(repr_dim, activation, inputs,
                                                  inputs, lengths,
                                                  with_sentinel)[2]
        else:
            raise ValueError("Unknown attention type: %s" % attn_type)

    return attn_states
Exemplo n.º 3
0
def san_answer_layer(size,
                     encoded_question,
                     question_length,
                     encoded_support,
                     support_length,
                     support2question,
                     answer2support,
                     is_eval,
                     topk=1,
                     max_span_size=10000,
                     num_steps=5,
                     dropout=0.4,
                     **kwargs):
    question_state = compute_question_state(encoded_question, question_length)
    question_state = tf.layers.dense(question_state,
                                     encoded_support.get_shape()[-1].value,
                                     tf.tanh)
    question_state = tf.gather(question_state, support2question)

    cell = tf.contrib.rnn.GRUBlockCell(size)

    all_start_scores = []
    all_end_scores = []

    support_mask = misc.mask_for_lengths(support_length)

    for i in range(num_steps):
        with tf.variable_scope('SAN', reuse=i > 0):
            question_state = tf.expand_dims(question_state, 1)
            support_attn = attention.bilinear_attention(
                question_state, encoded_support, support_length, False,
                False)[2]
            question_state = tf.squeeze(question_state, 1)
            support_attn = tf.squeeze(support_attn, 1)
            question_state = cell(support_attn, question_state)[0]

            hidden_start = tf.layers.dense(question_state,
                                           size,
                                           name="hidden_start")

            start_scores = tf.einsum('ik,ijk->ij', hidden_start,
                                     encoded_support)
            start_scores = start_scores + support_mask

            start_probs = segment_softmax(start_scores, support2question)
            start_states = tf.einsum('ij,ijk->ik', start_probs,
                                     encoded_support)
            start_states = tf.unsorted_segment_sum(
                start_states, support2question,
                tf.shape(question_length)[0])
            start_states = tf.gather(start_states, support2question)

            hidden_end = tf.layers.dense(tf.concat(
                [question_state, start_states], 1),
                                         size,
                                         name="hidden_end")

            end_scores = tf.einsum('ik,ijk->ij', hidden_end, encoded_support)
            end_scores = end_scores + support_mask
            all_start_scores.append(start_scores)
            all_end_scores.append(end_scores)

    all_start_scores = tf.stack(all_start_scores)
    all_end_scores = tf.stack(all_end_scores)
    dropout_mask = tf.nn.dropout(tf.ones([num_steps, 1, 1]), 1.0 - dropout)
    all_start_scores = tf.cond(is_eval,
                               lambda: all_start_scores * dropout_mask,
                               lambda: all_start_scores)
    all_end_scores = tf.cond(is_eval, lambda: all_end_scores * dropout_mask,
                             lambda: all_end_scores)

    start_scores = tf.reduce_mean(all_start_scores, axis=0)
    end_scores = tf.reduce_mean(all_end_scores, axis=0)

    return compute_spans(start_scores,
                         end_scores,
                         answer2support,
                         is_eval,
                         support2question,
                         topk=topk,
                         max_span_size=max_span_size)