Пример #1
0
def coattention_layer(seq1,
                      seq1_length,
                      seq2,
                      seq2_length,
                      attn_type='diagonal_bilinear',
                      scaled=True,
                      with_sentinel=False,
                      seq2_to_seq1=None,
                      num_layers=1,
                      encoder=None,
                      **kwargs):
    """Encodes seq1 conditioned on seq2, e.g., using word-by-word attention."""
    if attn_type == 'bilinear':
        attn_fun = attention.bilinear_attention
    elif attn_type == 'dot':
        attn_fun = attention.dot_attention
    elif attn_type == 'diagonal_bilinear':
        attn_fun = attention.diagonal_bilinear_attention
    else:
        raise ValueError("Unknown attention type: %s" % attn_type)

    _, _, attn_states1, attn_states2, co_attn_state = attention.coattention(
        seq1, seq1_length, seq2, seq2_length, scaled, with_sentinel, attn_fun)

    if num_layers < 2:
        out = tf.concat([attn_states1, co_attn_state], 2)
    else:
        seq1, attn_states1, attn_states2, co_attn_state = [], [attn_states1], [
            attn_states2
        ], [co_attn_state]
        for i in range(1, num_layers):
            with tf.variable_scope(str(i)):
                enc_1 = sequence_encoder.encoder(attn_states1[-1],
                                                 seq1_length,
                                                 name='encoder1',
                                                 **encoder)
                enc_2 = sequence_encoder.encoder(attn_states2[-1],
                                                 seq2_length,
                                                 name='encoder2',
                                                 **encoder)
                seq1.append(enc_1)
                _, _, new_attn_states1, new_attn_states2, new_co_attn_state = attention.coattention(
                    enc_1,
                    seq1_length,
                    enc_2,
                    seq2_length,
                    scaled,
                    with_sentinel,
                    attn_fun,
                    seq2_to_seq1=seq2_to_seq1)
                attn_states1.append(new_attn_states1)
                attn_states2.append(new_attn_states2)
                co_attn_state.append(new_co_attn_state)
        out = tf.concat(seq1 + attn_states1 + co_attn_state, 2)

    return out
Пример #2
0
def answer_layer(encoded_question, question_length, encoded_support, support_length,
                 support2question, answer2support, is_eval, correct_start=None, topk=1, max_span_size=10000,
                 encoder=None, module='bilinear', repr_dim=100, **kwargs):
    if module == 'bilinear':
        return bilinear_answer_layer(
            repr_dim, encoded_question, question_length, encoded_support, support_length,
            support2question, answer2support, is_eval, topk, max_span_size)
    elif module == 'mlp':
        return mlp_answer_layer(repr_dim, encoded_question, question_length, encoded_support, support_length,
                                support2question, answer2support, is_eval, topk, max_span_size)
    elif module == 'conditional':
        return conditional_answer_layer(
            repr_dim, encoded_question, question_length, encoded_support, support_length,
            correct_start, support2question, answer2support, is_eval, topk, max_span_size)
    elif module == 'conditional_bilinear':
        return conditional_answer_layer(
            repr_dim, encoded_question, question_length, encoded_support, support_length,
            correct_start, support2question, answer2support, is_eval, topk, max_span_size, bilinear=True)
    elif module == 'san':
        return san_answer_layer(
            repr_dim, encoded_question, question_length, encoded_support, support_length,
            support2question, answer2support, is_eval, topk, max_span_size, **kwargs)
    elif module == 'bidaf':
        if 'repr_dim' not in encoder:
            encoder['repr_dim'] = repr_dim
        encoded_support_end = sequence_encoder.encoder(
            encoded_support, support_length, name='encoded_support_end', **encoder)
        encoded_support_end = tf.concat([encoded_support, encoded_support_end], 2)
        return bidaf_answer_layer(encoded_support, encoded_support_end, support_length,
                                  support2question, answer2support, is_eval, topk=1, max_span_size=10000)
    else:
        raise ValueError("Unknown answer layer type: %s" % module)
Пример #3
0
    def encode_module(module):
        module_type = module['module']

        reuse = module['name'] in seen_layer
        seen_layer.add(module['name'])

        if module_type == 'repeat':
            reuse = module.get('reuse')
            for k in range(module['num']):
                prefix = module['name'] + '/' if reuse else '%s_%d/' % (
                    module['name'], k)
                for j, inner_module in enumerate(module['encoder']):
                    # copy this configuration
                    inner_module = copy.deepcopy(inner_module)
                    if 'name' not in inner_module:
                        inner_module['name'] = _unique_module_name(
                            inner_module, j)
                    inner_module['name'] = prefix + inner_module['name']
                    encode_module(inner_module)
            return

        try:
            key = module['input']
            out_key = module.get('output', key)
            if module['module'] in [
                    'concat', 'add', 'mul', 'weighted_add', 'sub'
            ]:
                outputs_length[out_key] = outputs_length[key[0]]
                outputs_mapping[out_key] = outputs_mapping.get(key[0])
                if module['module'] == 'concat':
                    outputs[out_key] = tf.concat([outputs[k] for k in key],
                                                 2,
                                                 name=module['name'])
                    return
                if module['module'] == 'add':
                    outputs[out_key] = tf.add_n([outputs[k] for k in key],
                                                name=module['name'])
                    return
                if module['module'] == 'sub':
                    outputs[out_key] = tf.subtract(outputs[key[0]],
                                                   outputs[key[1]],
                                                   name=module['name'])
                    return
                if module['module'] == 'mul':
                    o = outputs[key[0]]
                    for k in key[1:-1]:
                        o *= outputs[k]
                    outputs[out_key] = tf.multiply(o,
                                                   outputs[key[-1]],
                                                   name=module['name'])
                    return
                if module['module'] == 'weighted_add':
                    bias = module.get('bias', 0.0)
                    g = tf.layers.dense(
                        tf.concat([outputs[k] for k in key], 2),
                        outputs[key[0]].get_shape()[-1].value,
                        tf.sigmoid,
                        bias_initializer=tf.constant_initializer(bias))
                    outputs[out_key] = tf.identity(g * outputs[key[0]] +
                                                   (1.0 - g) * outputs[key[0]],
                                                   name=module['name'])
                    return
            if 'repr_dim' not in module:
                module['repr_dim'] = default_repr_dim
            if 'dependent' in module:
                dep_key = module['dependent']
                outputs[out_key] = interaction_layer(
                    outputs[key],
                    outputs_length[key],
                    outputs[dep_key],
                    outputs_length[dep_key],
                    outputs_mapping.get(key),
                    outputs_mapping.get(dep_key),
                    reuse=reuse,
                    **module)
            else:
                if module.get('dropout') is True:
                    # set dropout to default dropout
                    module['dropout'] = dropout
                outputs[out_key] = encoder(outputs[key],
                                           outputs_length[key],
                                           reuse=reuse,
                                           is_eval=is_eval,
                                           **module)
            outputs_length[out_key] = outputs_length[key]
            outputs_mapping[out_key] = outputs_mapping.get(key)
        except Exception as e:
            logger.error('Creating module %s failed.', module['name'])
            raise e
Пример #4
0
    def create_output(self, shared_resources, input_tensors):
        tensors = TensorPortTensors(input_tensors)
        with tf.variable_scope(
                "fast_qa", initializer=tf.contrib.layers.xavier_initializer()):
            # Some helpers
            batch_size = tf.shape(tensors.question_length)[0]
            max_question_length = tf.reduce_max(tensors.question_length)
            support_mask = misc.mask_for_lengths(tensors.support_length)

            input_size = shared_resources.embeddings.shape[-1]
            size = shared_resources.config["repr_dim"]
            with_char_embeddings = shared_resources.config.get(
                "with_char_embeddings", False)

            # set shapes for inputs
            tensors.emb_question.set_shape([None, None, input_size])
            tensors.emb_support.set_shape([None, None, input_size])

            emb_question = tensors.emb_question
            emb_support = tensors.emb_support
            if with_char_embeddings:
                # compute combined embeddings
                [char_emb_question, char_emb_support
                 ] = conv_char_embedding(len(shared_resources.char_vocab),
                                         size, tensors.word_chars,
                                         tensors.word_char_length, [
                                             tensors.question_batch_words,
                                             tensors.support_batch_words
                                         ])

                emb_question = tf.concat([emb_question, char_emb_question], 2)
                emb_support = tf.concat([emb_support, char_emb_support], 2)
                input_size += size

                # set shapes for inputs
                emb_question.set_shape([None, None, input_size])
                emb_support.set_shape([None, None, input_size])

            # compute encoder features
            question_features = tf.ones(
                tf.stack([batch_size, max_question_length, 2]))

            v_wiqw = tf.get_variable("v_wiq_w", [1, 1, input_size],
                                     initializer=tf.constant_initializer(1.0))

            wiq_w = tf.matmul(tf.gather(emb_question * v_wiqw,
                                        tensors.support2question),
                              emb_support,
                              adjoint_b=True)
            wiq_w = wiq_w + tf.expand_dims(support_mask, 1)

            question_binary_mask = tf.gather(
                tf.sequence_mask(tensors.question_length, dtype=tf.float32),
                tensors.support2question)
            wiq_w = tf.reduce_sum(
                tf.nn.softmax(wiq_w) * tf.expand_dims(question_binary_mask, 2),
                [1])

            # [B, L , 2]
            support_features = tf.stack([tensors.word_in_question, wiq_w], 2)

            # highway layer to allow for interaction between concatenated embeddings
            if with_char_embeddings:
                with tf.variable_scope("char_embeddings") as vs:
                    emb_question = tf.layers.dense(
                        emb_question, size, name="embeddings_projection")
                    emb_question = highway_network(emb_question, 1)
                    vs.reuse_variables()
                    emb_support = tf.layers.dense(emb_support,
                                                  size,
                                                  name="embeddings_projection")
                    emb_support = highway_network(emb_support, 1)

            keep_prob = 1.0 - shared_resources.config.get("dropout", 0.0)
            emb_question, emb_support = tf.cond(
                tensors.is_eval, lambda: (emb_question, emb_support), lambda:
                (tf.nn.dropout(emb_question,
                               keep_prob,
                               noise_shape=
                               [1, 1, emb_question.get_shape()[-1].value]),
                 tf.nn.dropout(emb_support,
                               keep_prob,
                               noise_shape=
                               [1, 1, emb_question.get_shape()[-1].value])))

            # extend embeddings with features
            emb_question_ext = tf.concat([emb_question, question_features], 2)
            emb_support_ext = tf.concat([emb_support, support_features], 2)

            # encode question and support
            encoder_type = shared_resources.config.get('encoder',
                                                       'lstm').lower()
            if encoder_type in ['lstm', 'sru', 'gru']:
                size = size + 2 if encoder_type == 'sru' else size  # to allow for use of residual in SRU
                encoded_question = encoder(emb_question_ext,
                                           tensors.question_length,
                                           size,
                                           module=encoder_type)
                encoded_support = encoder(emb_support_ext,
                                          tensors.support_length,
                                          size,
                                          module=encoder_type,
                                          reuse=True)
                projection_initializer = tf.constant_initializer(
                    np.concatenate([np.eye(size), np.eye(size)]))
                encoded_question = tf.layers.dense(
                    encoded_question,
                    size,
                    tf.tanh,
                    use_bias=False,
                    kernel_initializer=projection_initializer,
                    name='projection_q')
                encoded_support = tf.layers.dense(
                    encoded_support,
                    size,
                    tf.tanh,
                    use_bias=False,
                    kernel_initializer=projection_initializer,
                    name='projection_s')
            else:
                raise ValueError(
                    "Only rnn ('lstm', 'sru', 'gru') encoder allowed for FastQA!"
                )

            answer_layer = shared_resources.config.get('answer_layer',
                                                       'conditional').lower()

            topk = tf.get_variable('topk',
                                   initializer=shared_resources.config.get(
                                       'topk', 1),
                                   dtype=tf.int32,
                                   trainable=False)
            topk_p = tf.placeholder(tf.int32, [], 'beam_size_setter')
            topk_assign = topk.assign(topk_p)
            self._topk_assign = lambda k: self.tf_session.run(
                topk_assign, {topk_p: k})

            if answer_layer == 'conditional':
                start_scores, end_scores, doc_idx, predicted_start_pointer, predicted_end_pointer = \
                    conditional_answer_layer(size, encoded_question, tensors.question_length, encoded_support,
                                             tensors.support_length,
                                             tensors.correct_start, tensors.support2question, tensors.answer2support,
                                             tensors.is_eval,
                                             topk=topk,
                                             max_span_size=shared_resources.config.get("max_span_size", 10000))
            elif answer_layer == 'conditional_bilinear':
                start_scores, end_scores, doc_idx, predicted_start_pointer, predicted_end_pointer = \
                    conditional_answer_layer(size, encoded_question, tensors.question_length, encoded_support,
                                             tensors.support_length,
                                             tensors.correct_start, tensors.support2question, tensors.answer2support,
                                             tensors.is_eval,
                                             topk=topk,
                                             max_span_size=shared_resources.config.get("max_span_size", 10000),
                                             bilinear=True)
            elif answer_layer == 'bilinear':
                start_scores, end_scores, doc_idx, predicted_start_pointer, predicted_end_pointer = \
                    bilinear_answer_layer(size, encoded_question, tensors.question_length, encoded_support,
                                          tensors.support_length,
                                          tensors.support2question, tensors.answer2support, tensors.is_eval,
                                          topk=topk,
                                          max_span_size=shared_resources.config.get("max_span_size", 10000))
            else:
                raise ValueError

            span = tf.stack(
                [doc_idx, predicted_start_pointer, predicted_end_pointer], 1)

            return TensorPort.to_mapping(self.output_ports,
                                         (start_scores, end_scores, span))