Exemplo n.º 1
0
def self_attention(inputs,
                   lengths,
                   attn_type='bilinear',
                   scaled=True,
                   activation=None,
                   with_sentinel=False,
                   **kwargs):
    if attn_type == 'bilinear':
        attn_states = attention.bilinear_attention(inputs, inputs, lengths,
                                                   scaled, with_sentinel,
                                                   **kwargs)[2]
    elif attn_type == 'dot':
        attn_states = attention.dot_attention(inputs, inputs, lengths, scaled,
                                              with_sentinel, **kwargs)[2]
    elif attn_type == 'diagonal_bilinear':
        attn_states = \
            attention.diagonal_bilinear_attention(
                inputs, inputs, lengths, scaled, with_sentinel, **kwargs)[2]
    elif attn_type == 'mlp':
        attn_states = \
            attention.mlp_attention(
                kwargs['repr_dim'], activation, inputs, inputs, lengths, with_sentinel, **kwargs)[2]
    else:
        raise ValueError("Unknown attention type: %s" % attn_type)

    return attn_states
Exemplo n.º 2
0
def attention_matching_layer(seq1,
                             seq1_length,
                             seq2,
                             seq2_length,
                             seq2_to_seq1=None,
                             attn_type='diagonal_bilinear',
                             key_value_attn=False,
                             scaled=True,
                             with_sentinel=False,
                             repr_dim=None,
                             **kwargs):
    """Encodes seq1 conditioned on seq2, e.g., using word-by-word attention."""
    if attn_type == 'bilinear':
        _, _, attn_states = attention.bilinear_attention(
            seq1,
            seq2,
            seq2_length,
            scaled,
            with_sentinel,
            seq2_to_seq1=seq2_to_seq1)
    elif attn_type == 'dot':
        _, _, attn_states = attention.dot_attention(seq1,
                                                    seq2,
                                                    seq2_length,
                                                    scaled,
                                                    with_sentinel,
                                                    seq2_to_seq1=seq2_to_seq1)
    elif attn_type == 'diagonal_bilinear':
        _, _, attn_states = attention.diagonal_bilinear_attention(
            seq1,
            seq2,
            seq2_length,
            scaled,
            with_sentinel,
            seq2_to_seq1=seq2_to_seq1)
    elif attn_type == 'mlp':
        _, _, attn_states = attention.mlp_attention(seq1.get_shape()[-1].value,
                                                    tf.nn.relu,
                                                    seq1,
                                                    seq2,
                                                    seq2_length,
                                                    with_sentinel,
                                                    seq2_to_seq1=seq2_to_seq1)
    else:
        raise ValueError("Unknown attention type: %s" % attn_type)

    return attn_states
Exemplo n.º 3
0
def bidaf_layer(seq1,
                seq1_length,
                seq2,
                seq2_length,
                seq2_to_seq1=None,
                **kwargs):
    """Encodes seq1 conditioned on seq2, e.g., using word-by-word attention."""
    attn_scores, attn_probs, seq2_weighted = attention.diagonal_bilinear_attention(
        seq1, seq2, seq2_length, False, seq2_to_seq1=seq2_to_seq1)

    attn_scores += tf.expand_dims(
        mask_for_lengths(seq1_length,
                         tf.shape(attn_scores)[1]), 2)

    max_seq1 = tf.reduce_max(attn_scores, 2)
    seq1_attention = tf.nn.softmax(max_seq1, 1)
    seq1_weighted = tf.einsum('ij,ijk->ik', seq1_attention, seq1)
    seq1_weighted = tf.expand_dims(seq1_weighted, 1)
    seq1_weighted = tf.tile(seq1_weighted, [1, tf.shape(seq1)[1], 1])

    return tf.concat(
        [seq2_weighted, seq1 * seq2_weighted, seq1 * seq1_weighted], 2)