Exemplo n.º 1
0
def _get_top_k(scores1, scores2, k, max_span_size, support2question):
    max_support_length = tf.shape(scores1)[1]
    doc_idx, pointer1, topk_scores1 = segment_top_k(scores1, support2question,
                                                    k)

    # [num_questions * topk]
    doc_idx_flat = tf.reshape(doc_idx, [-1])
    pointer_flat1 = tf.reshape(pointer1, [-1])

    # [num_questions * topk, support_length]
    scores_gathered2 = tf.gather(scores2, doc_idx_flat)
    if max_span_size < 0:
        pointer_flat1, max_span_size = pointer_flat1 + max_span_size + 1, -max_span_size
    left_mask = misc.mask_for_lengths(tf.cast(pointer_flat1, tf.int32),
                                      max_support_length,
                                      mask_right=False)
    right_mask = misc.mask_for_lengths(
        tf.cast(pointer_flat1 + max_span_size, tf.int32), max_support_length)
    scores_gathered2 = scores_gathered2 + left_mask + right_mask

    pointer2 = tf.argmax(scores_gathered2, axis=1, output_type=tf.int32)

    topk_score2 = tf.gather_nd(scores2, tf.stack([doc_idx_flat, pointer2], 1))

    return doc_idx, pointer1, tf.reshape(
        pointer2, [-1, k]), topk_scores1 + tf.reshape(topk_score2, [-1, k])
Exemplo n.º 2
0
    def eval():
        # [num_questions * beam_size, support_length]
        left_mask = misc.mask_for_lengths(tf.cast(start_pointer, tf.int32),
                                          max_support_length, mask_right=False)
        right_mask = misc.mask_for_lengths(tf.cast(start_pointer + max_span_size, tf.int32),
                                           max_support_length)
        masked_end_scores = end_scores + left_mask + right_mask
        predicted_ends = tf.argmax(masked_end_scores, axis=1, output_type=tf.int32)

        return (start_scores, masked_end_scores,
                tf.gather(doc_idx_for_support, doc_idx_flat), start_pointer, predicted_ends)
Exemplo n.º 3
0
def bidaf_layer(seq1,
                seq1_length,
                seq2,
                seq2_length,
                seq1_to_seq2=None,
                seq2_to_seq1=None):
    """Encodes seq1 conditioned on seq2, e.g., using word-by-word attention."""
    attn_scores, attn_probs, seq2_weighted = attention.diagonal_bilinear_attention(
        seq1, seq2, seq2_length, False, seq2_to_seq1=seq2_to_seq1)

    attn_scores += tf.expand_dims(
        mask_for_lengths(seq1_length,
                         tf.shape(attn_scores)[1]), 2)

    max_seq1 = tf.reduce_max(attn_scores, 2)
    if seq1_to_seq2 is None:
        seq1_attention = tf.nn.softmax(max_seq1, 1)
    else:
        segm_max_seq1 = tf.unsorted_segment_max(
            max_seq1, seq1_to_seq2,
            tf.reduce_max(seq1_to_seq2) + 1)
        seq1_attention = tf.nn.softmax(segm_max_seq1, 1)
        seq1_attention = tf.gather(seq1_attention, seq1_to_seq2)
        seq1_attention.set_shape(max_seq1.get_shape())
    seq1_weighted = tf.einsum('ij,ijk->ik', seq1_attention, seq1)
    seq1_weighted = tf.expand_dims(seq1_weighted, 1)
    seq1_weighted = tf.tile(seq1_weighted, [1, tf.shape(seq1)[1], 1])

    return tf.concat(
        [seq2_weighted, seq1 * seq2_weighted, seq1 * seq1_weighted], 2)
Exemplo n.º 4
0
def apply_attention(attn_scores,
                    states,
                    length,
                    is_self=False,
                    with_sentinel=True,
                    reuse=False,
                    seq2_to_seq1=None):
    attn_scores += tf.expand_dims(
        misc.mask_for_lengths(length,
                              tf.shape(attn_scores)[2]), 1)
    softmax = tf.nn.softmax if seq2_to_seq1 is None else lambda x: segment.segment_softmax(
        x, seq2_to_seq1)
    if is_self:
        # exclude attending to state itself
        attn_scores += tf.expand_dims(
            tf.diag(tf.fill([tf.shape(attn_scores)[1]], -1e6)), 0)
    if with_sentinel:
        with tf.variable_scope('sentinel', reuse=reuse):
            s = tf.get_variable('score', [1, 1, 1], tf.float32,
                                tf.zeros_initializer())
        s = tf.tile(s, [tf.shape(attn_scores)[0], tf.shape(attn_scores)[1], 1])
        attn_probs = softmax(tf.concat([s, attn_scores], 2))
        attn_probs = attn_probs[:, :, 1:]
    else:
        attn_probs = softmax(attn_scores)
    attn_states = tf.einsum('abd,adc->abc', attn_probs, states)
    if seq2_to_seq1 is not None:
        attn_states = tf.unsorted_segment_sum(attn_states, seq2_to_seq1,
                                              tf.reduce_max(seq2_to_seq1) + 1)
    return attn_scores, attn_probs, attn_states
Exemplo n.º 5
0
def bilinear_answer_layer(size,
                          encoded_question,
                          question_length,
                          encoded_support,
                          support_length,
                          support2question,
                          answer2support,
                          is_eval,
                          topk=1,
                          max_span_size=10000):
    """Answer layer for multiple paragraph QA."""
    # computing single time attention over question
    size = encoded_support.get_shape()[-1].value
    question_state = compute_question_state(encoded_question, question_length)

    # compute logits
    hidden = tf.gather(
        tf.layers.dense(question_state, 2 * size, name="hidden"),
        support2question)
    hidden_start, hidden_end = tf.split(hidden, 2, 1)

    support_mask = misc.mask_for_lengths(support_length)

    start_scores = tf.einsum('ik,ijk->ij', hidden_start, encoded_support)
    start_scores = start_scores + support_mask

    end_scores = tf.einsum('ik,ijk->ij', hidden_end, encoded_support)
    end_scores = end_scores + support_mask

    return compute_spans(start_scores, end_scores, answer2support, is_eval,
                         support2question, topk, max_span_size)
Exemplo n.º 6
0
def compute_question_state(encoded_question, question_length):
    attention_scores = tf.layers.dense(encoded_question, 1, name="question_attention")
    q_mask = misc.mask_for_lengths(question_length)
    attention_scores = attention_scores + tf.expand_dims(q_mask, 2)
    question_attention_weights = tf.nn.softmax(attention_scores, 1, name="question_attention_weights")
    question_state = tf.reduce_sum(question_attention_weights * encoded_question, 1)
    return question_state
Exemplo n.º 7
0
def mlp_answer_layer(size, encoded_question, question_length, encoded_support, support_length,
                     support2question, answer2support, is_eval, beam_size=1, max_span_size=10000):
    """Answer layer for multiple paragraph QA."""
    # computing single time attention over question
    question_state = compute_question_state(encoded_question, question_length)

    # compute logits
    static_input = tf.concat([tf.gather(tf.expand_dims(question_state, 1), support2question) * encoded_support,
                              encoded_support], 2)

    hidden = tf.gather(tf.layers.dense(question_state, 2 * size, name="hidden_1"), support2question)
    hidden = tf.layers.dense(
        static_input, 2 * size, use_bias=False, name="hidden_2") + tf.expand_dims(hidden, 1)

    hidden_start, hidden_end = tf.split(tf.nn.relu(hidden), 2, 2)

    support_mask = misc.mask_for_lengths(support_length)

    start_scores = tf.layers.dense(hidden_start, 1, use_bias=False, name="start_scores")
    start_scores = tf.squeeze(start_scores, [2])
    start_scores = start_scores + support_mask

    end_scores = tf.layers.dense(hidden_end, 1, use_bias=False, name="end_scores")
    end_scores = tf.squeeze(end_scores, [2])
    end_scores = end_scores + support_mask

    return compute_spans(start_scores, end_scores, answer2support, is_eval, support2question,
                         beam_size, max_span_size)
Exemplo n.º 8
0
def bidaf_answer_layer(encoded_support_start, encoded_support_end, support_length,
                       support2question, answer2support, is_eval, beam_size=1, max_span_size=10000):
    # BiLSTM(M) = M^2 = encoded_support_end
    start_scores = tf.squeeze(tf.layers.dense(encoded_support_start, 1, use_bias=False), 2)
    end_scores = tf.squeeze(tf.layers.dense(encoded_support_end, 1, use_bias=False), 2)
    # mask out-of-bounds slots by adding -1000
    support_mask = misc.mask_for_lengths(support_length)
    start_scores = start_scores + support_mask
    end_scores = end_scores + support_mask
    return compute_spans(start_scores, end_scores, answer2support, is_eval,
                         support2question, beam_size=beam_size, max_span_size=max_span_size)
Exemplo n.º 9
0
def conv_char_embedding_alt(char_vocab,
                            size,
                            unique_word_chars,
                            unique_word_lengths,
                            word_to_uniqs,
                            conv_width=5,
                            emb_initializer=tf.random_normal_initializer(
                                0.0, 0.1),
                            scope=None):
    # "fixed PADDING on character level"
    pad = tf.zeros(
        tf.stack(
            [tf.shape(unique_word_lengths)[0],
             math.floor(conv_width / 2)]), tf.int32)
    unique_word_chars = tf.concat([pad, unique_word_chars, pad], 1)

    if not isinstance(word_to_uniqs, list):
        word_to_uniqs = [word_to_uniqs]

    with tf.variable_scope(scope or "char_embeddings") as vs:
        char_embedding_matrix = \
            tf.get_variable("char_embedding_matrix", shape=(len(char_vocab), size),
                            initializer=emb_initializer, trainable=True)

        max_word_length = tf.reduce_max(unique_word_lengths)
        embedded_chars = tf.nn.embedding_lookup(
            char_embedding_matrix, tf.cast(unique_word_chars, tf.int32))

        with tf.variable_scope("conv"):
            # create filter like this to get fan-in and fan-out right for initializers depending on those
            filter = tf.get_variable("filter", [conv_width * size, size])
            filter_reshaped = tf.reshape(filter, [conv_width, size, size])
            # [B, T, S + pad_right]
            conv_out = tf.nn.conv1d(embedded_chars, filter_reshaped, 1,
                                    "VALID")
            conv_mask = tf.expand_dims(
                misc.mask_for_lengths(unique_word_lengths,
                                      max_length=max_word_length), 2)
            conv_out = conv_out + conv_mask

        unique_embedded_words = tf.reduce_max(conv_out, [1])

        all_embedded = []
        for word_idx in word_to_uniqs:
            flat_word_idx = tf.reshape(word_idx, [-1])
            embedded_words = tf.gather(unique_embedded_words, flat_word_idx)
            embedded_words = tf.reshape(
                embedded_words,
                tf.stack([-1, tf.unstack(tf.shape(word_idx))[1], size]))
            all_embedded.append(embedded_words)

    return all_embedded
Exemplo n.º 10
0
def conv_char_embedding(num_chars,
                        size,
                        word_chars,
                        word_lengths,
                        word_sequences=None,
                        conv_width=5,
                        emb_initializer=tf.random_normal_initializer(0.0, 0.1),
                        scope=None):
    """Build simple convolutional character based embeddings for words with a fixed filter and size.

    After the convolution max-pooling over characters is employed for each filter. If word sequences are given,
    these will be embedded with the newly created embeddings.
    """
    # "fixed PADDING on character level"
    pad = tf.zeros(tf.stack([tf.shape(word_lengths)[0], conv_width // 2]),
                   tf.int32)
    word_chars = tf.concat([pad, word_chars, pad], 1)

    with tf.variable_scope(scope or "char_embeddings"):
        char_embedding_matrix = \
            tf.get_variable("char_embedding_matrix", shape=(num_chars, size),
                            initializer=emb_initializer, trainable=True)

        max_word_length = tf.reduce_max(word_lengths)
        embedded_chars = tf.nn.embedding_lookup(char_embedding_matrix,
                                                tf.cast(word_chars, tf.int32))

        with tf.variable_scope("conv"):
            # create filter like this to get fan-in and fan-out right for initializers depending on those
            filter = tf.get_variable("filter", [conv_width * size, size])
            filter_reshaped = tf.reshape(filter, [conv_width, size, size])
            # [B, T, S + pad_right]
            conv_out = tf.nn.conv1d(embedded_chars, filter_reshaped, 1,
                                    "VALID")
            conv_mask = tf.expand_dims(
                misc.mask_for_lengths(word_lengths,
                                      max_length=max_word_length), 2)
            conv_out = conv_out + conv_mask

        embedded_words = tf.reduce_max(conv_out, 1)

    if word_sequences is None:
        return embedded_words

    if not isinstance(word_sequences, list):
        word_sequences = [word_sequences]
    all_embedded = []
    for word_idxs in word_sequences:
        all_embedded.append(tf.nn.embedding_lookup(embedded_words, word_idxs))

    return all_embedded
Exemplo n.º 11
0
    def train():
        gathered_end_scores = tf.gather(end_scores, answer2support)
        gathered_start_scores = tf.gather(start_scores, answer2support)

        if correct_start is not None:
            # assuming we know the correct start we only consider ends after that
            left_mask = misc.mask_for_lengths(tf.cast(correct_start, tf.int32), max_support_length, mask_right=False)
            gathered_end_scores = gathered_end_scores + left_mask

        predicted_start_pointer = tf.argmax(gathered_start_scores, axis=1, output_type=tf.int32)
        predicted_end_pointer = tf.argmax(gathered_end_scores, axis=1, output_type=tf.int32)

        return (start_scores, end_scores,
                tf.gather(doc_idx_for_support, answer2support), predicted_start_pointer, predicted_end_pointer)
Exemplo n.º 12
0
def bidaf_layer(seq1, seq1_length, seq2, seq2_length):
    """Encodes seq1 conditioned on seq2, e.g., using word-by-word attention."""
    attn_scores, attn_probs, seq2_weighted = attention.diagonal_bilinear_attention(
        seq1, seq2, seq2_length, False)

    attn_scores += tf.expand_dims(
        mask_for_lengths(seq1_length,
                         tf.shape(attn_scores)[1]), 2)

    max_seq1 = tf.reduce_max(attn_scores, 2)
    seq1_attention = tf.nn.softmax(max_seq1, 1)
    seq1_weighted = tf.einsum('ij,ijk->ik', seq1_attention, seq1)
    seq1_weighted = tf.expand_dims(seq1_weighted, 1)
    seq1_weighted = tf.tile(seq1_weighted, [1, tf.shape(seq1)[1], 1])

    return tf.concat(
        [seq2_weighted, seq1 * seq2_weighted, seq1 * seq1_weighted], 2)
Exemplo n.º 13
0
def conv_char_embedding_multi_filter(
        num_chars,
        filter_sizes,
        embedding_size,
        word_chars,
        word_lengths,
        word_sequences=None,
        emb_initializer=tf.random_normal_initializer(0.0, 0.1),
        projection_size=None,
        scope=None):
    """Build convolutional character based embeddings for words with multiple filters.

    Filter sizes is a list and each the position of each size in the list entry refers to its corresponding conv width.
    It can also be 0 (i.e., no filter of that conv width). E.g., sizes [4, 0, 7, 8] will create 4 conv filters of width
    1, no filter of width 2, 7 of width 3 and 8 of width 4. After the convolution max-pooling over characters is
    employed for each filter.

    embedding_size refers to the size of the character embeddings and projection size, if given, to the final size of
    the embedded characters after a final projection. If it is None, no projection will be applied and the resulting
    size is the sum of all filters.

    If word sequences are given, these will be embedded with the newly created embeddings.
    """
    with tf.variable_scope(scope or "char_embeddings"):
        char_embedding_matrix = \
            tf.get_variable("char_embedding_matrix", shape=(num_chars, embedding_size),
                            initializer=emb_initializer, trainable=True)

        pad = tf.zeros(
            tf.stack([tf.shape(word_lengths)[0],
                      len(filter_sizes) // 2]), tf.int32)
        word_chars = tf.concat([pad, word_chars, pad], 1)

        max_word_length = tf.reduce_max(word_lengths)
        embedded_chars = tf.nn.embedding_lookup(char_embedding_matrix,
                                                tf.cast(word_chars, tf.int32))
        conv_mask = tf.expand_dims(
            misc.mask_for_lengths(word_lengths, max_length=max_word_length), 2)

        embedded_words = []
        for i, size in enumerate(filter_sizes):
            if size == 0:
                continue
            conv_width = i + 1
            with tf.variable_scope("conv_%d" % conv_width):
                # create filter like this to get fan-in and fan-out right for initializers depending on those
                filter = tf.get_variable("filter",
                                         [conv_width * embedding_size, size])
                filter_reshaped = tf.reshape(
                    filter, [conv_width, embedding_size, size])
                cut = len(filter_sizes) // 2 - conv_width // 2
                embedded_chars_conv = embedded_chars[:, cut:
                                                     -cut, :] if cut else embedded_chars
                conv_out = tf.nn.conv1d(embedded_chars_conv, filter_reshaped,
                                        1, "VALID")
                conv_out += conv_mask
                embedded_words.append(tf.reduce_max(conv_out, 1))

        embedded_words = tf.concat(embedded_words, 1)
        if projection_size is not None:
            embedded_words = tf.layers.dense(embedded_words, projection_size)

    if word_sequences is None:
        return embedded_words

    if not isinstance(word_sequences, list):
        word_sequences = [word_sequences]
    all_embedded = []
    for word_idxs in word_sequences:
        embedded_words = tf.nn.embedding_lookup(embedded_words, word_idxs)
        all_embedded.append(embedded_words)

    return all_embedded
Exemplo n.º 14
0
    def create_output(self, shared_vocab_config, emb_question, question_length,
                      emb_support, support_length, unique_word_chars,
                      unique_word_char_length, question_words2unique,
                      support_words2unique, answer2question, keep_prob,
                      is_eval):
        # 1. char embeddings + word embeddings
        # 2a. conv char embeddings
        # 2b. pool char embeddings
        # 3. cat + highway
        # 4. BiLSTM
        # 5. cat
        # 6. biattention
        # 6a. create matrix of question support attentions
        # 6b. generate feature matrix
        # 7. combine
        # 8. BiLSTM
        # 9. double cross-entropy loss
        with tf.variable_scope(
                "bidaf", initializer=tf.contrib.layers.xavier_initializer()):
            # Some helpers
            max_question_length = tf.reduce_max(question_length)
            max_support_length = tf.reduce_max(support_length)

            beam_size = 1
            beam_size = tf.cond(is_eval,
                                lambda: tf.constant(beam_size, tf.int32),
                                lambda: tf.constant(1, tf.int32))

            input_size = shared_vocab_config.config["repr_dim_input"]
            size = shared_vocab_config.config["repr_dim"]
            with_char_embeddings = shared_vocab_config.config.get(
                "with_char_embeddings", False)
            W = tf.get_variable("biattention_weight", [size * 6])
            W_start_index = tf.get_variable("start_index_weight", [size * 10])
            W_end_index = tf.get_variable("end_index_weight", [size * 10])

            # 1. char embeddings + word embeddings
            # set shapes for inputs
            emb_question.set_shape([None, None, input_size])
            emb_support.set_shape([None, None, input_size])

            # 1. + 2a. + 2b. 2a. char embeddings + conv + max pooling
            # compute combined embeddings
            [char_emb_question, char_emb_support] = conv_char_embedding_alt(
                shared_vocab_config.char_vocab, size, unique_word_chars,
                unique_word_char_length,
                [question_words2unique, support_words2unique])
            # 3. cat
            emb_question = tf.concat([emb_question, char_emb_question], 2)
            emb_support = tf.concat([emb_support, char_emb_support], 2)
            input_size += size

            # highway layer to allow for interaction between concatenated embeddings
            # 3. highway
            # following bidaf notation here  (qq=question, xx=support)
            highway_question = highway_network(emb_question,
                                               2,
                                               scope='question_highway')
            highway_support = highway_network(emb_support,
                                              2,
                                              scope='support_highway')

            # emb_question = tf.slice(highway_question, [0, 0, 0], tf.stack([-1, max_question_length, -1]))
            # emb_support = tf.slice(all_embedded_hw, tf.stack([0, max_question_length, 0]), [-1, -1, -1])

            # emb_question.set_shape([None, None, size])
            # emb_support.set_shape([None, None, size])

            # 4. BiLSTM
            cell1 = tf.contrib.rnn.LSTMBlockFusedCell(size)
            encoded_question = fused_birnn(cell1,
                                           highway_question,
                                           question_length,
                                           dtype=tf.float32,
                                           time_major=False,
                                           scope='question_encoding')[0]
            encoded_question = tf.concat(encoded_question, 2)

            cell2 = tf.contrib.rnn.LSTMBlockFusedCell(size)
            encoded_support = fused_birnn(cell2,
                                          highway_support,
                                          support_length,
                                          dtype=tf.float32,
                                          time_major=False,
                                          scope='support_encoding')[0]
            encoded_support = tf.concat(encoded_support, 2)

            # 6. biattention alpha(U, H) = S
            # S = W^T*[H; U; H*U]
            # question = U = [batch, 2*embedding, length1]
            # support = H = [batch, 2*embedding, length2]
            # -> expand with features

            # we want to get from [length 1] and [length 2] to [length1, length2] and [length1, length2]
            # we do that with
            # (a) expand dim
            # [batch, L2, 2*embedding ] -> [batch, 1, L2 2*embedding]
            support = tf.expand_dims(encoded_support, 1)
            # [batch, L1, 2*embedding] -> [batch, L1, 1, 2*embedding]
            question = tf.expand_dims(encoded_question, 2)
            # (b) tile with the other dimension
            support = tf.tile(support, [1, max_question_length, 1, 1])
            question = tf.tile(question, [1, 1, max_support_length, 1])

            # 5. cat
            # question = U = [batch, length1, length2, 2*embeddings]
            # support = H = [batch, length1, length2, 2*embeddings]
            # S = W^T*[H; U; H*U]
            features = tf.concat([support, question, question * support], 3)

            # 6. biattention
            # 6a. create matrix of question support attentions
            # features = [batch, length1, length2, 6*embeddings]
            # w = [6*embeddings]
            # S = attention matrix = [batch, length1, length2]
            S = tf.einsum('ijkl,l->ijk', features, W)

            # S = [batch, length1, length2]
            # question to support attention
            # softmax -> [ batch, length1, length2] = att_question
            att_question = tf.nn.softmax(S, 2)  # softmax over support
            # weighted =  [batch, length1, length2] * [batch, length1, length2, 2*embedding] -> [batch, length2, 2*embedding]
            question_weighted = tf.einsum('ijk,ijkl->ikl', att_question,
                                          question)

            # support to question attention
            # 1. filter important context words with max
            # 2. softmax over question to get the question words which are most relevant for the most relevant context words
            # max(S) = [batch, length1, length2] -> [ batch, length1] = most important context
            max_support = tf.reduce_max(S, 2)
            # softmax over question -> [batch, length1]
            support_attention = tf.nn.softmax(max_support, 1)
            # support attention * support = weighted support
            # [batch, length1] * [batch, length1, length2, 2*embedding] = [batch, 2*embedding]
            support_weighted = tf.einsum('ij,ijkl->il', support_attention,
                                         support)
            # tile to have the same dimension
            # [batch, 2*embedding] -> [batch, length2, 2*embedding]
            support_weighted = tf.expand_dims(support_weighted, 1)
            support_weighted = tf.tile(support_weighted,
                                       [1, max_support_length, 1])

            # 6b. generate feature matrix
            # G(support, weighted question, weighted support)  = G(h, *u, *h) = [h, *u, mul(h, *u), mul(h, h*)] = [batch, length2, embedding*8]
            G = tf.concat([
                encoded_support, question_weighted, encoded_support *
                question_weighted, encoded_support * support_weighted
            ], 2)

            # 8. BiLSTM(G) = M
            # start_index = M
            cell3 = tf.contrib.rnn.LSTMBlockFusedCell(size)
            start_index = \
                fused_birnn(cell3, G, support_length, dtype=tf.float32, time_major=False, scope='start_index')[0]
            start_index = tf.concat(start_index, 2)
            start_index = tf.concat([start_index, G], 2)
            # BiLSTM(M) = M^2 = end_index
            cell4 = tf.contrib.rnn.LSTMBlockFusedCell(size)
            end_index = \
                fused_birnn(cell4, start_index, support_length, dtype=tf.float32, time_major=False, scope='end_index')[
                    0]
            end_index = tf.concat(end_index, 2)
            end_index = tf.concat([end_index, G], 2)
            # 9. double cross-entropy loss (actually applied after this function)
            # 9a. prepare logits
            # 9b. prepare argmax for output module

            # 9a. prepare logits
            # start_index = [batch, length2, 10*embedding]
            # W_start_index = [10*embedding]
            # start_index *w_start_index = start_scores
            # [batch, length2, 10*embedding] * [10*embedding] = [batch, length2]
            start_scores = tf.einsum('ijk,k->ij', start_index, W_start_index)
            # end_index = [batch, length2, 10*emb]
            # W_end_index = [10*emb]
            # end_index *w_end_index = start_scores
            # [batch, length2, 10*emb] * [10*emb] = [batch, length2]
            end_scores = tf.einsum('ijk,k->ij', end_index, W_end_index)

            # mask out-of-bounds slots by adding -1000
            support_mask = mask_for_lengths(support_length)
            start_scores = start_scores + support_mask
            end_scores = end_scores + support_mask

            # 9b. prepare argmax for output module
            predicted_start_pointer = tf.argmax(start_scores)
            predicted_end_pointer = tf.argmax(end_scores)

            span = tf.concat([
                tf.expand_dims(predicted_start_pointer, 1),
                tf.expand_dims(predicted_end_pointer, 1)
            ], 1)

            return start_scores, end_scores, span
Exemplo n.º 15
0
def fastqa_answer_layer(size, encoded_question, question_length, encoded_support, support_length,
                        correct_start, answer2question, is_eval, beam_size=1):
    beam_size = tf.cond(is_eval, lambda: tf.constant(beam_size, tf.int32), lambda: tf.constant(1, tf.int32))
    batch_size = tf.shape(question_length)[0]
    answer2question = tf.cond(is_eval, lambda: tf.range(0, batch_size, dtype=tf.int32), lambda: answer2question)
    input_size = encoded_support.get_shape()[-1].value
    support_states_flat = tf.reshape(encoded_support, [-1, input_size])

    # computing single time attention over question
    attention_scores = tf.contrib.layers.fully_connected(encoded_question, 1,
                                                         activation_fn=None,
                                                         weights_initializer=None,
                                                         biases_initializer=None,
                                                         scope="question_attention")
    q_mask = misc.mask_for_lengths(question_length)
    attention_scores = attention_scores + tf.expand_dims(q_mask, 2)
    question_attention_weights = tf.nn.softmax(attention_scores, 1, name="question_attention_weights")
    question_state = tf.reduce_sum(question_attention_weights * encoded_question, [1])

    # Prediction
    # start
    start_input = tf.concat([tf.expand_dims(question_state, 1) * encoded_support,
                             encoded_support], 2)

    q_start_inter = tf.contrib.layers.fully_connected(question_state, size,
                                                      activation_fn=None,
                                                      weights_initializer=None,
                                                      scope="q_start_inter")

    q_start_state = tf.contrib.layers.fully_connected(start_input, size,
                                                      activation_fn=None,
                                                      weights_initializer=None,
                                                      biases_initializer=None,
                                                      scope="q_start") + tf.expand_dims(q_start_inter, 1)

    start_scores = tf.contrib.layers.fully_connected(tf.nn.relu(q_start_state), 1,
                                                     activation_fn=None,
                                                     weights_initializer=None,
                                                     biases_initializer=None,
                                                     scope="start_scores")
    start_scores = tf.squeeze(start_scores, [2])

    support_mask = misc.mask_for_lengths(support_length)
    start_scores = start_scores + support_mask

    # probs are needed during beam search
    start_probs = tf.nn.softmax(start_scores)

    predicted_start_probs, predicted_start_pointer = tf.nn.top_k(start_probs, beam_size)

    # use correct start during training, because p(end|start) should be optimized
    predicted_start_pointer = tf.gather(predicted_start_pointer, answer2question)
    predicted_start_probs = tf.gather(predicted_start_probs, answer2question)

    start_pointer = tf.cond(is_eval, lambda: predicted_start_pointer, lambda: tf.expand_dims(correct_start, 1))

    # flatten again
    start_pointer = tf.reshape(start_pointer, [-1])
    answer2questionwithbeam = tf.reshape(tf.tile(tf.expand_dims(answer2question, 1), tf.stack([1, beam_size])), [-1])

    offsets = tf.cast(tf.range(0, batch_size) * tf.reduce_max(support_length), dtype=tf.int32)
    offsets = tf.gather(offsets, answer2questionwithbeam)
    u_s = tf.gather(support_states_flat, start_pointer + offsets)

    start_scores = tf.gather(start_scores, answer2questionwithbeam)
    start_input = tf.gather(start_input, answer2questionwithbeam)
    encoded_support = tf.gather(encoded_support, answer2questionwithbeam)
    question_state = tf.gather(question_state, answer2questionwithbeam)
    support_mask = tf.gather(support_mask, answer2questionwithbeam)

    # end
    end_input = tf.concat([tf.expand_dims(u_s, 1) * encoded_support, start_input], 2)

    q_end_inter = tf.contrib.layers.fully_connected(tf.concat([question_state, u_s], 1), size,
                                                    activation_fn=None,
                                                    weights_initializer=None,
                                                    scope="q_end_inter")

    q_end_state = tf.contrib.layers.fully_connected(end_input, size,
                                                    activation_fn=None,
                                                    weights_initializer=None,
                                                    biases_initializer=None,
                                                    scope="q_end") + tf.expand_dims(q_end_inter, 1)

    end_scores = tf.contrib.layers.fully_connected(tf.nn.relu(q_end_state), 1,
                                                   activation_fn=None,
                                                   weights_initializer=None,
                                                   biases_initializer=None,
                                                   scope="end_scores")
    end_scores = tf.squeeze(end_scores, [2])
    end_scores = end_scores + support_mask

    def mask_with_start(scores):
        return scores + misc.mask_for_lengths(tf.cast(start_pointer, tf.int32),
                                              tf.reduce_max(support_length),
                                              mask_right=False)

    end_scores = tf.cond(is_eval, lambda: mask_with_start(end_scores), lambda: end_scores)

    # probs are needed during beam search
    end_probs = tf.nn.softmax(end_scores)
    predicted_end_probs, predicted_end_pointer = tf.nn.top_k(end_probs, 1)
    predicted_end_probs = tf.reshape(predicted_end_probs, tf.stack([-1, beam_size]))
    predicted_end_pointer = tf.reshape(predicted_end_pointer, tf.stack([-1, beam_size]))

    predicted_idx = tf.cast(tf.argmax(predicted_start_probs * predicted_end_probs, 1), tf.int32)
    predicted_idx = tf.stack([tf.range(0, tf.shape(answer2question)[0], dtype=tf.int32), predicted_idx], 1)

    predicted_start_pointer = tf.gather_nd(predicted_start_pointer, predicted_idx)
    predicted_end_pointer = tf.gather_nd(predicted_end_pointer, predicted_idx)

    return start_scores, end_scores, predicted_start_pointer, predicted_end_pointer
Exemplo n.º 16
0
def san_answer_layer(size,
                     encoded_question,
                     question_length,
                     encoded_support,
                     support_length,
                     support2question,
                     answer2support,
                     is_eval,
                     topk=1,
                     max_span_size=10000,
                     num_steps=5,
                     dropout=0.4,
                     **kwargs):
    question_state = compute_question_state(encoded_question, question_length)
    question_state = tf.layers.dense(question_state,
                                     encoded_support.get_shape()[-1].value,
                                     tf.tanh)
    question_state = tf.gather(question_state, support2question)

    cell = tf.contrib.rnn.GRUBlockCell(size)

    all_start_scores = []
    all_end_scores = []

    support_mask = misc.mask_for_lengths(support_length)

    for i in range(num_steps):
        with tf.variable_scope('SAN', reuse=i > 0):
            question_state = tf.expand_dims(question_state, 1)
            support_attn = attention.bilinear_attention(
                question_state, encoded_support, support_length, False,
                False)[2]
            question_state = tf.squeeze(question_state, 1)
            support_attn = tf.squeeze(support_attn, 1)
            question_state = cell(support_attn, question_state)[0]

            hidden_start = tf.layers.dense(question_state,
                                           size,
                                           name="hidden_start")

            start_scores = tf.einsum('ik,ijk->ij', hidden_start,
                                     encoded_support)
            start_scores = start_scores + support_mask

            start_probs = segment_softmax(start_scores, support2question)
            start_states = tf.einsum('ij,ijk->ik', start_probs,
                                     encoded_support)
            start_states = tf.unsorted_segment_sum(
                start_states, support2question,
                tf.shape(question_length)[0])
            start_states = tf.gather(start_states, support2question)

            hidden_end = tf.layers.dense(tf.concat(
                [question_state, start_states], 1),
                                         size,
                                         name="hidden_end")

            end_scores = tf.einsum('ik,ijk->ij', hidden_end, encoded_support)
            end_scores = end_scores + support_mask
            all_start_scores.append(start_scores)
            all_end_scores.append(end_scores)

    all_start_scores = tf.stack(all_start_scores)
    all_end_scores = tf.stack(all_end_scores)
    dropout_mask = tf.nn.dropout(tf.ones([num_steps, 1, 1]), 1.0 - dropout)
    all_start_scores = tf.cond(is_eval,
                               lambda: all_start_scores * dropout_mask,
                               lambda: all_start_scores)
    all_end_scores = tf.cond(is_eval, lambda: all_end_scores * dropout_mask,
                             lambda: all_end_scores)

    start_scores = tf.reduce_mean(all_start_scores, axis=0)
    end_scores = tf.reduce_mean(all_end_scores, axis=0)

    return compute_spans(start_scores,
                         end_scores,
                         answer2support,
                         is_eval,
                         support2question,
                         topk=topk,
                         max_span_size=max_span_size)
Exemplo n.º 17
0
def conditional_answer_layer(size, encoded_question, question_length, encoded_support, support_length,
                             correct_start, support2question, answer2support, is_eval, beam_size=1, max_span_size=10000,
                             bilinear=False):
    question_state = compute_question_state(encoded_question, question_length)
    question_state = tf.gather(question_state, support2question)

    # Prediction
    # start
    if bilinear:
        hidden_start = tf.layers.dense(question_state, size, name="hidden_start")
        start_scores = tf.einsum('ik,ijk->ij', hidden_start, encoded_support)
    else:
        static_input = tf.concat([tf.expand_dims(question_state, 1) * encoded_support, encoded_support], 2)
        hidden_start = tf.layers.dense(question_state, size, name="hidden_start_1")
        hidden_start = tf.layers.dense(
            static_input, size, use_bias=False, name="hidden_start_2") + tf.expand_dims(hidden_start, 1)
        start_scores = tf.layers.dense(tf.nn.relu(hidden_start), 1, use_bias=False, name="start_scores")
        start_scores = tf.squeeze(start_scores, [2])

    support_mask = misc.mask_for_lengths(support_length)
    start_scores = start_scores + support_mask

    max_support_length = tf.shape(start_scores)[1]
    _, _, num_doc_per_question = tf.unique_with_counts(support2question)
    offsets = tf.cumsum(num_doc_per_question, exclusive=True)
    doc_idx_for_support = tf.range(tf.shape(support2question)[0]) - tf.gather(offsets, support2question)

    doc_idx, start_pointer = tf.cond(
        is_eval,
        lambda: segment_top_k(start_scores, support2question, beam_size)[:2],
        lambda: (tf.expand_dims(answer2support, 1), tf.expand_dims(correct_start, 1)))

    doc_idx_flat = tf.reshape(doc_idx, [-1])
    start_pointer = tf.reshape(start_pointer, [-1])

    start_state = tf.gather_nd(encoded_support, tf.stack([doc_idx_flat, start_pointer], 1))
    start_state.set_shape([None, size])

    encoded_support_gathered = tf.gather(encoded_support, doc_idx_flat)
    question_state = tf.gather(question_state, doc_idx_flat)
    if bilinear:
        hidden_end = tf.layers.dense(tf.concat([question_state, start_state], 1), size, name="hidden_end")
        end_scores = tf.einsum('ik,ijk->ij', hidden_end, encoded_support_gathered)
    else:
        end_input = tf.concat([tf.expand_dims(start_state, 1) * encoded_support_gathered,
                               tf.gather(static_input, doc_idx_flat)], 2)

        hidden_end = tf.layers.dense(tf.concat([question_state, start_state], 1), size,
                                     name="hidden_end_1")
        hidden_end = tf.layers.dense(
            end_input, size, use_bias=False, name="hidden_end_2") + tf.expand_dims(hidden_end, 1)

        end_scores = tf.layers.dense(tf.nn.relu(hidden_end), 1, use_bias=False, name="end_scores")
        end_scores = tf.squeeze(end_scores, [2])

    end_scores = end_scores + tf.gather(support_mask, doc_idx_flat)

    def train():
        predicted_end_pointer = tf.argmax(end_scores, axis=1, output_type=tf.int32)
        return start_scores, end_scores, doc_idx, start_pointer, predicted_end_pointer

    def eval():
        # [num_questions * beam_size, support_length]
        left_mask = misc.mask_for_lengths(tf.cast(start_pointer, tf.int32),
                                          max_support_length, mask_right=False)
        right_mask = misc.mask_for_lengths(tf.cast(start_pointer + max_span_size, tf.int32),
                                           max_support_length)
        masked_end_scores = end_scores + left_mask + right_mask
        predicted_ends = tf.argmax(masked_end_scores, axis=1, output_type=tf.int32)

        return (start_scores, masked_end_scores,
                tf.gather(doc_idx_for_support, doc_idx_flat), start_pointer, predicted_ends)

    return tf.cond(is_eval, eval, train)
Exemplo n.º 18
0
    def create_output(self, shared_vocab_config, emb_question, question_length,
                      emb_support, support_length,
                      unique_word_chars, unique_word_char_length,
                      question_words2unique, support_words2unique,
                      word_in_question,
                      correct_start, answer2question, keep_prob, is_eval):
        """FastQA model.
        Args:
            shared_vocab_config: has at least a field config (dict) with keys "rep_dim", "rep_dim_input"
            emb_question: [Q, L_q, N]
            question_length: [Q]
            emb_support: [Q, L_s, N]
            support_length: [Q]
            unique_word_chars
            unique_word_char_length
            question_words2unique
            support_words2unique
            word_in_question: [Q, L_s]
            correct_start: [A], only during training, i.e., is_eval=False
            answer2question: [A], only during training, i.e., is_eval=False
            keep_prob: []
            is_eval: []

        Returns:
            start_scores [B, L_s, N], end_scores [B, L_s, N], span_prediction [B, 2]
        """
        with tf.variable_scope("fast_qa", initializer=tf.contrib.layers.xavier_initializer()):
            # Some helpers
            batch_size = tf.shape(question_length)[0]
            max_question_length = tf.reduce_max(question_length)
            support_mask = misc.mask_for_lengths(support_length)
            question_binary_mask = misc.mask_for_lengths(question_length, mask_right=False, value=1.0)

            input_size = shared_vocab_config.config["repr_dim_input"]
            size = shared_vocab_config.config["repr_dim"]
            with_char_embeddings = shared_vocab_config.config.get("with_char_embeddings", False)

            # set shapes for inputs
            emb_question.set_shape([None, None, input_size])
            emb_support.set_shape([None, None, input_size])

            if with_char_embeddings:
                # compute combined embeddings
                [char_emb_question, char_emb_support] = conv_char_embedding_alt(
                    shared_vocab_config.char_vocab, size, unique_word_chars, unique_word_char_length,
                    [question_words2unique, support_words2unique])

                emb_question = tf.concat([emb_question, char_emb_question], 2)
                emb_support = tf.concat([emb_support, char_emb_support], 2)
                input_size += size

                # set shapes for inputs
                emb_question.set_shape([None, None, input_size])
                emb_support.set_shape([None, None, input_size])

            # compute encoder features
            question_features = tf.ones(tf.stack([batch_size, max_question_length, 2]))

            v_wiqw = tf.get_variable("v_wiq_w", [1, 1, input_size],
                                     initializer=tf.constant_initializer(1.0))

            wiq_w = tf.matmul(emb_question * v_wiqw, emb_support, adjoint_b=True)
            wiq_w = wiq_w + tf.expand_dims(support_mask, 1)

            wiq_w = tf.reduce_sum(tf.nn.softmax(wiq_w) * tf.expand_dims(question_binary_mask, 2), [1])

            # [B, L , 2]
            support_features = tf.concat([tf.expand_dims(word_in_question, 2), tf.expand_dims(wiq_w, 2)], 2)

            # highway layer to allow for interaction between concatenated embeddings
            if with_char_embeddings:
                all_embedded = tf.concat([emb_question, emb_support], 1)
                all_embedded = tf.contrib.layers.fully_connected(all_embedded, size,
                                                                 activation_fn=None,
                                                                 weights_initializer=None,
                                                                 biases_initializer=None,
                                                                 scope="embeddings_projection")

                all_embedded_hw = highway_network(all_embedded, 1)

                emb_question = tf.slice(all_embedded_hw, [0, 0, 0], tf.stack([-1, max_question_length, -1]))
                emb_support = tf.slice(all_embedded_hw, tf.stack([0, max_question_length, 0]), [-1, -1, -1])

                emb_question.set_shape([None, None, size])
                emb_support.set_shape([None, None, size])

            # variational dropout
            dropout_shape = tf.unstack(tf.shape(emb_question))
            dropout_shape[1] = 1

            [emb_question, emb_support] = tf.cond(is_eval,
                                                  lambda: [emb_question, emb_support],
                                                  lambda: fixed_dropout([emb_question, emb_support],
                                                                        keep_prob, dropout_shape))

            # extend embeddings with features
            emb_question_ext = tf.concat([emb_question, question_features], 2)
            emb_support_ext = tf.concat([emb_support, support_features], 2)

            # encode question and support
            rnn = tf.contrib.rnn.LSTMBlockFusedCell
            encoded_question = birnn_with_projection(size, rnn, emb_question_ext, question_length,
                                                     projection_scope="question_proj")

            encoded_support = birnn_with_projection(size, rnn, emb_support_ext, support_length,
                                                    share_rnn=True, projection_scope="support_proj")

            start_scores, end_scores, predicted_start_pointer, predicted_end_pointer = \
                fastqa_answer_layer(size, encoded_question, question_length, encoded_support, support_length,
                                    correct_start, answer2question, is_eval,
                                    beam_size=shared_vocab_config.config.get("beam_size", 1))

            span = tf.stack([predicted_start_pointer, predicted_end_pointer], 1)

            return start_scores, end_scores, span
Exemplo n.º 19
0
def attention_softmax(attn_scores, length=None):
    attn_scores += misc.mask_for_lengths(length, tf.shape(attn_scores)[2])
    return tf.nn.softmax(attn_scores)
Exemplo n.º 20
0
def conv_char_embeddings(vocab,
                         size,
                         word_ids,
                         conv_width=5,
                         emb_initializer=tf.random_normal_initializer(
                             0.0, 0.1),
                         scope=None):
    """
    Args:
        vocab: filled Vocab instance
        size: size of embeddings
        word_ids: tf.Tensor[None, None] or list of tensors
        conv_width: int
        emb_initializer: initializer
        scope: scope

    Returns:
        char embedded word ids
    """
    if not isinstance(word_ids, list):
        word_ids = [word_ids]

    # create character vocab + word lengths + char ids per word
    pad_right = math.ceil(conv_width / 2)  # "fixed PAD o right side"
    vocab_size = max(vocab.sym2id.values()) + 1
    max_l = max(len(w) for w in vocab.sym2id) + pad_right
    char_vocab = defaultdict(lambda: len(char_vocab))
    char_vocab["PAD"] = 0
    word_to_chars_arr = np.zeros((vocab_size, max_l), np.int16)
    word_lengths_arr = np.zeros([vocab_size], np.int8)
    for w, i in vocab.sym2id.items():
        for k, c in enumerate(w):
            j = char_vocab[c]
            word_to_chars_arr[i, k] = j
        word_lengths_arr[i] = len(w) + conv_width - 1

    with tf.variable_scope(scope or "char_embeddings") as vs:
        word_to_chars = tf.constant(word_to_chars_arr, name="word_to_chars")
        word_lengths = tf.constant(word_lengths_arr, name="word_lengths")

        char_embedding_matrix = \
            tf.get_variable("char_embedding_matrix", shape=(len(char_vocab), size),
                            initializer=emb_initializer, trainable=True)

        all_embedded = []
        for i, ids in enumerate(zip(word_ids)):
            if i > 0:
                vs.reuse_variables()

            unique_words, word_idx = tf.unique(tf.reshape(ids, [-1]))
            chars = tf.nn.embedding_lookup(word_to_chars, unique_words)
            wl = tf.nn.embedding_lookup(word_lengths, unique_words)
            wl = tf.cast(wl, tf.int32)
            max_word_length = tf.reduce_max(wl)
            chars = tf.slice(chars, [0, 0], tf.stack([-1, max_word_length]))

            embedded_chars = tf.nn.embedding_lookup(char_embedding_matrix,
                                                    tf.cast(chars, tf.int32))

            with tf.variable_scope("conv"):
                # create filter like this to get fan-in and fan-out right for initializers depending on those
                filter = tf.get_variable("filter", [conv_width * size, size])
                filter_reshaped = tf.reshape(filter, [conv_width, size, size])
                conv_out = tf.nn.conv1d(embedded_chars, filter_reshaped, 1,
                                        "SAME")
                conv_mask = tf.expand_dims(
                    misc.mask_for_lengths(wl - pad_right,
                                          max_length=max_word_length), 2)
                conv_out = conv_out + conv_mask

            unique_embedded_words = tf.reduce_max(conv_out, [1])

            embedded_words = tf.gather(unique_embedded_words, word_idx)
            embedded_words = tf.reshape(
                embedded_words,
                tf.stack([-1, tf.unstack(tf.shape(ids))[1], size]))
            all_embedded.append(embedded_words)

    return all_embedded
Exemplo n.º 21
0
    def create_output(self, shared_vocab_config, emb_question, question_length,
                      emb_support, support_length, unique_word_chars,
                      unique_word_char_length, question_words2unique,
                      support_words2unique, word_in_question, correct_start,
                      answer2question, keep_prob, is_eval, answer_type_span):
        """cbow_baseline_model model.

        Args:
            shared_vocab_config: has at least a field config (dict) with keys "rep_dim", "rep_dim_input"
            emb_question: [Q, L_q, N]
            question_length: [Q]
            emb_support: [Q, L_s, N]
            support_length: [Q]
            unique_word_chars
            unique_word_char_length
            question_words2unique
            support_words2unique
            word_in_question: [Q, L_s]
            correct_start: [A], only during training, window_size.e., is_eval=False
            answer2question: [A], only during training, window_size.e., is_eval=False
            keep_prob: []
            is_eval: []
            answer_type_span: [Q, 2], span within question marking the expected answer type

        Returns:
            start_scores [B, L_s, N], end_scores [B, L_s, N], span_prediction [B, 2]
        """
        with tf.variable_scope(
                "cbow_xqa",
                initializer=tf.contrib.layers.xavier_initializer()):
            # Some helpers
            batch_size = tf.shape(question_length)[0]
            max_support_length = tf.reduce_max(support_length)
            max_question_length = tf.reduce_max(question_length)

            input_size = shared_vocab_config.config["repr_dim_input"]
            size = shared_vocab_config.config["repr_dim"]
            with_char_embeddings = shared_vocab_config.config.get(
                "with_char_embeddings", False)

            # set shapes for inputs
            emb_question.set_shape([None, None, input_size])
            emb_support.set_shape([None, None, input_size])

            if with_char_embeddings:
                # compute combined embeddings
                [char_emb_question,
                 char_emb_support] = conv_char_embedding_alt(
                     shared_vocab_config.char_vocab, size, unique_word_chars,
                     unique_word_char_length,
                     [question_words2unique, support_words2unique])

                emb_question = tf.concat([emb_question, char_emb_question], 2)
                emb_support = tf.concat([emb_support, char_emb_support], 2)
                input_size += size

                # set shapes for inputs
                emb_question.set_shape([None, None, input_size])
                emb_support.set_shape([None, None, input_size])

            # variational dropout
            dropout_shape = tf.unstack(tf.shape(emb_question))
            dropout_shape[1] = 1

            [emb_question, emb_support] = tf.cond(
                is_eval, lambda: [emb_question, emb_support],
                lambda: fixed_dropout([emb_question, emb_support], keep_prob,
                                      dropout_shape))

            # question encoding
            answer_type_start = tf.squeeze(tf.slice(answer_type_span, [0, 0],
                                                    [-1, 1]),
                                           axis=0)
            answer_type_end = tf.squeeze(tf.slice(answer_type_span, [0, 1],
                                                  [-1, -1]),
                                         axis=0)

            answer_type_mask = misc.mask_for_lengths(answer_type_start, max_question_length, value=1.0) * \
                               misc.mask_for_lengths(answer_type_end + 1, max_question_length,
                                                     mask_right=False, value=1.0)
            answer_type = tf.reduce_sum(emb_question * tf.expand_dims(answer_type_mask, 2), 1) / \
                          tf.maximum(1.0, tf.reduce_sum(answer_type_mask, 1, keep_dims=True))

            batch_size_range = tf.range(0, batch_size)
            answer_type_start_state = tf.gather_nd(
                emb_question, tf.stack([batch_size_range, answer_type_start],
                                       1))
            answer_type_end_state = tf.gather_nd(
                emb_question, tf.stack([batch_size_range, answer_type_end], 1))

            question_rep = tf.concat(
                [answer_type, answer_type_start_state, answer_type_end_state],
                1)
            question_rep.set_shape([None, input_size * 3])

            # wiq features
            support_mask = misc.mask_for_lengths(support_length)
            question_binary_mask = misc.mask_for_lengths(question_length,
                                                         mask_right=False,
                                                         value=1.0)

            v_wiqw = tf.get_variable("v_wiq_w", [1, 1, input_size],
                                     initializer=tf.constant_initializer(1.0))

            wiq_w = tf.matmul(emb_question * v_wiqw,
                              emb_support,
                              adjoint_b=True)
            wiq_w = wiq_w + tf.expand_dims(support_mask, 1)

            wiq_w = tf.reduce_sum(
                tf.nn.softmax(wiq_w) * tf.expand_dims(question_binary_mask, 2),
                [1])

            wiq_exp = tf.stack([word_in_question, wiq_w], 2)

            # support span encoding
            spans = [
                tf.stack([
                    tf.range(0, max_support_length),
                    tf.range(0, max_support_length)
                ], 1)
            ]

            wiq_exp = tf.pad(wiq_exp, [[0, 0], [20, 20], [0, 0]])
            wiq_pooled5 = tf.layers.average_pooling1d(
                tf.slice(wiq_exp, [0, 15, 0],
                         tf.stack([-1, max_support_length + 10, -1])), 5, [1],
                'valid')
            wiq_pooled10 = tf.layers.average_pooling1d(
                tf.slice(wiq_exp, [0, 10, 0],
                         tf.stack([-1, max_support_length + 20, -1])), 10, [1],
                'valid')
            wiq_pooled20 = tf.layers.average_pooling1d(wiq_exp, 20, [1],
                                                       'valid')

            wiqs_left5 = [
                tf.slice(wiq_pooled5, [0, 0, 0],
                         tf.stack([-1, max_support_length, -1]))
            ]
            wiqs_right5 = [tf.slice(wiq_pooled5, [0, 6, 0], [-1, -1, -1])]
            wiqs_left10 = [
                tf.slice(wiq_pooled10, [0, 0, 0],
                         tf.stack([-1, max_support_length, -1]))
            ]
            wiqs_right10 = [tf.slice(wiq_pooled10, [0, 11, 0], [-1, -1, -1])]
            wiqs_left20 = [
                tf.slice(wiq_pooled20, [0, 0, 0],
                         tf.stack([-1, max_support_length, -1]))
            ]
            wiqs_right20 = [tf.slice(wiq_pooled20, [0, 21, 0], [-1, -1, -1])]

            context_window = 5
            padded_support = tf.pad(
                emb_support,
                [[0, 0], [context_window, context_window], [0, 0]], "CONSTANT")
            # [B, L + 10 - 4, S]
            emb_support_windows = tf.layers.average_pooling1d(
                padded_support, 5, [1], "VALID", "channels_last")

            left_context_windows = tf.slice(
                emb_support_windows, [0, 0, 0],
                tf.stack([-1, max_support_length, -1]))
            right_context_windows = tf.slice(emb_support_windows,
                                             [0, context_window + 1, 0],
                                             [-1, -1, -1])
            span_rep = [
                tf.concat([
                    emb_support, emb_support, emb_support,
                    left_context_windows, right_context_windows
                ], 2)
            ]

            for window_size in range(2, _max_span_size + 1):
                start = tf.slice(
                    emb_support, [0, 0, 0],
                    tf.stack([-1, max_support_length - (window_size - 1), -1]))
                end = tf.slice(emb_support, [0, window_size - 1, 0],
                               [-1, -1, -1])
                averagespan = tf.layers.average_pooling1d(
                    emb_support, window_size, [1], "VALID", "channels_last")

                left_context_windows = tf.slice(
                    emb_support_windows, [0, 0, 0],
                    tf.stack([-1, max_support_length - (window_size - 1), -1]))
                right_context_windows = tf.slice(
                    emb_support_windows,
                    [0, window_size - 1 + context_window + 1, 0], [-1, -1, -1])

                span_rep.append(
                    tf.concat([
                        averagespan, start, end, left_context_windows,
                        right_context_windows
                    ], 2))

                wiqs_left5.append(
                    tf.slice(
                        wiq_pooled5, [0, 0, 0],
                        tf.stack(
                            [-1, max_support_length - (window_size - 1), -1])))
                wiqs_left10.append(
                    tf.slice(
                        wiq_pooled10, [0, 0, 0],
                        tf.stack(
                            [-1, max_support_length - (window_size - 1), -1])))
                wiqs_left20.append(
                    tf.slice(
                        wiq_pooled20, [0, 0, 0],
                        tf.stack(
                            [-1, max_support_length - (window_size - 1), -1])))

                wiqs_right5.append(
                    tf.slice(wiq_pooled5, [0, window_size + 5, 0],
                             [-1, -1, -1]))
                wiqs_right10.append(
                    tf.slice(wiq_pooled10, [0, window_size + 10, 0],
                             [-1, -1, -1]))
                wiqs_right20.append(
                    tf.slice(wiq_pooled20, [0, window_size + 20, 0],
                             [-1, -1, -1]))

                spans.append(
                    tf.stack([
                        tf.range(0, max_support_length - (window_size - 1)),
                        tf.range(window_size - 1, max_support_length)
                    ], 1))

            span_rep = tf.concat(span_rep, 1)
            span_rep.set_shape([None, None, input_size * 5])
            wiqs_left5 = tf.concat(wiqs_left5, 1)
            wiqs_left10 = tf.concat(wiqs_left10, 1)
            wiqs_left20 = tf.concat(wiqs_left20, 1)

            wiqs_right5 = tf.concat(wiqs_right5, 1)
            wiqs_right10 = tf.concat(wiqs_right10, 1)
            wiqs_right20 = tf.concat(wiqs_right20, 1)

            spans = tf.concat(spans, 0)

            # scoring
            with tf.variable_scope("question_rep"):
                question_rep = tf.layers.dense(question_rep,
                                               size,
                                               activation=tf.tanh)
            with tf.variable_scope("question_inter"):
                question_inter = tf.layers.dense(question_rep,
                                                 size,
                                                 activation=None)

            with tf.variable_scope("span_rep"):
                span_rep = tf.layers.dense(span_rep, size, activation=tf.tanh)

            span_question_rep = tf.concat([
                span_rep,
                tf.expand_dims(question_rep, 1) * span_rep, wiqs_left5,
                wiqs_left10, wiqs_left20, wiqs_right5, wiqs_right10,
                wiqs_right20
            ], 2)
            span_question_rep.set_shape([None, None, 2 * size + 6 * 2])

            with tf.variable_scope("hidden"):
                h = tf.tanh(
                    tf.layers.dense(span_question_rep, size, activation=None) +
                    tf.expand_dims(question_inter, 1))

            with tf.variable_scope("scoring"):
                span_scores = tf.squeeze(
                    tf.layers.dense(h, 1, activation=None), 2)

            best_span = tf.argmax(span_scores, 1)
            predicted_span = tf.gather(spans, best_span)

            return span_scores, tf.tile(tf.expand_dims(spans, 0),
                                        tf.stack([batch_size, 1,
                                                  1])), predicted_span
Exemplo n.º 22
0
    def create_output(self, shared_resources, input_tensors):
        tensors = TensorPortTensors(input_tensors)
        with tf.variable_scope(
                "fast_qa", initializer=tf.contrib.layers.xavier_initializer()):
            # Some helpers
            batch_size = tf.shape(tensors.question_length)[0]
            max_question_length = tf.reduce_max(tensors.question_length)
            support_mask = misc.mask_for_lengths(tensors.support_length)

            input_size = shared_resources.config["repr_dim_input"]
            size = shared_resources.config["repr_dim"]
            with_char_embeddings = shared_resources.config.get(
                "with_char_embeddings", False)

            # set shapes for inputs
            tensors.emb_question.set_shape([None, None, input_size])
            tensors.emb_support.set_shape([None, None, input_size])

            emb_question = tensors.emb_question
            emb_support = tensors.emb_support
            if with_char_embeddings:
                # compute combined embeddings
                [char_emb_question, char_emb_support] = conv_char_embedding(
                    len(shared_resources.char_vocab), size, tensors.word_chars,
                    tensors.word_char_length,
                    [tensors.question_words, tensors.support_words])

                emb_question = tf.concat([emb_question, char_emb_question], 2)
                emb_support = tf.concat([emb_support, char_emb_support], 2)
                input_size += size

                # set shapes for inputs
                emb_question.set_shape([None, None, input_size])
                emb_support.set_shape([None, None, input_size])

            # compute encoder features
            question_features = tf.ones(
                tf.stack([batch_size, max_question_length, 2]))

            v_wiqw = tf.get_variable("v_wiq_w", [1, 1, input_size],
                                     initializer=tf.constant_initializer(1.0))

            wiq_w = tf.matmul(tf.gather(emb_question * v_wiqw,
                                        tensors.support2question),
                              emb_support,
                              adjoint_b=True)
            wiq_w = wiq_w + tf.expand_dims(support_mask, 1)

            question_binary_mask = tf.gather(
                tf.sequence_mask(tensors.question_length, dtype=tf.float32),
                tensors.support2question)
            wiq_w = tf.reduce_sum(
                tf.nn.softmax(wiq_w) * tf.expand_dims(question_binary_mask, 2),
                [1])

            # [B, L , 2]
            support_features = tf.stack([tensors.word_in_question, wiq_w], 2)

            # highway layer to allow for interaction between concatenated embeddings
            if with_char_embeddings:
                with tf.variable_scope("char_embeddings") as vs:
                    emb_question = tf.layers.dense(
                        emb_question, size, name="embeddings_projection")
                    emb_question = highway_network(emb_question, 1)
                    vs.reuse_variables()
                    emb_support = tf.layers.dense(emb_support,
                                                  size,
                                                  name="embeddings_projection")
                    emb_support = highway_network(emb_support, 1)

            keep_prob = 1.0 - shared_resources.config.get("dropout", 0.0)
            emb_question, emb_support = tf.cond(
                tensors.is_eval, lambda: (emb_question, emb_support), lambda:
                (tf.nn.dropout(emb_question,
                               keep_prob,
                               noise_shape=
                               [1, 1, emb_question.get_shape()[-1].value]),
                 tf.nn.dropout(emb_support,
                               keep_prob,
                               noise_shape=
                               [1, 1, emb_question.get_shape()[-1].value])))

            # extend embeddings with features
            emb_question_ext = tf.concat([emb_question, question_features], 2)
            emb_support_ext = tf.concat([emb_support, support_features], 2)

            # encode question and support
            encoder_type = shared_resources.config.get('encoder',
                                                       'lstm').lower()
            if encoder_type in ['lstm', 'sru', 'gru']:
                size = size + 2 if encoder_type == 'sru' else size  # to allow for use of residual in SRU
                encoded_question = encoder(emb_question_ext,
                                           tensors.question_length,
                                           size,
                                           module=encoder_type)
                encoded_support = encoder(emb_support_ext,
                                          tensors.support_length,
                                          size,
                                          module=encoder_type,
                                          reuse=True)
                projection_initializer = tf.constant_initializer(
                    np.concatenate([np.eye(size), np.eye(size)]))
                encoded_question = tf.layers.dense(
                    encoded_question,
                    size,
                    tf.tanh,
                    use_bias=False,
                    kernel_initializer=projection_initializer,
                    name='projection_q')
                encoded_support = tf.layers.dense(
                    encoded_support,
                    size,
                    tf.tanh,
                    use_bias=False,
                    kernel_initializer=projection_initializer,
                    name='projection_s')
            else:
                raise ValueError(
                    "Only rnn ('lstm', 'sru', 'gru') encoder allowed for FastQA!"
                )

            answer_layer = shared_resources.config.get('answer_layer',
                                                       'conditional').lower()

            if answer_layer == 'conditional':
                start_scores, end_scores, doc_idx, predicted_start_pointer, predicted_end_pointer = \
                    conditional_answer_layer(size, encoded_question, tensors.question_length, encoded_support,
                                             tensors.support_length,
                                             tensors.correct_start, tensors.support2question, tensors.answer2support,
                                             tensors.is_eval,
                                             beam_size=shared_resources.config.get("beam_size", 1),
                                             max_span_size=shared_resources.config.get("max_span_size", 10000))
            elif answer_layer == 'conditional_bilinear':
                start_scores, end_scores, doc_idx, predicted_start_pointer, predicted_end_pointer = \
                    conditional_answer_layer(size, encoded_question, tensors.question_length, encoded_support,
                                             tensors.support_length,
                                             tensors.correct_start, tensors.support2question, tensors.answer2support,
                                             tensors.is_eval,
                                             beam_size=shared_resources.config.get("beam_size", 1),
                                             max_span_size=shared_resources.config.get("max_span_size", 10000),
                                             bilinear=True)
            elif answer_layer == 'bilinear':
                start_scores, end_scores, doc_idx, predicted_start_pointer, predicted_end_pointer = \
                    bilinear_answer_layer(size, encoded_question, tensors.question_length, encoded_support,
                                          tensors.support_length,
                                          tensors.support2question, tensors.answer2support, tensors.is_eval,
                                          beam_size=shared_resources.config.get("beam_size", 1),
                                          max_span_size=shared_resources.config.get("max_span_size", 10000))
            else:
                raise ValueError

            span = tf.stack(
                [doc_idx, predicted_start_pointer, predicted_end_pointer], 1)

            return TensorPort.to_mapping(self.output_ports,
                                         (start_scores, end_scores, span))
Exemplo n.º 23
0
 def mask_with_start(scores):
     return scores + misc.mask_for_lengths(tf.cast(start_pointer, tf.int32),
                                           tf.reduce_max(support_length),
                                           mask_right=False)