Пример #1
0
def inference(documents, doc_mask, query, query_mask):
    embedding = tf.get_variable('embedding',
                                [FLAGS.vocab_size, FLAGS.embedding_size],
                                initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05))

    regularizer = tf.nn.l2_loss(embedding)

    doc_emb = tf.nn.dropout(tf.nn.embedding_lookup(
        embedding, documents), FLAGS.dropout_keep_prob)
    doc_emb.set_shape([None, None, FLAGS.embedding_size])

    query_emb = tf.nn.dropout(tf.nn.embedding_lookup(
        embedding, query), FLAGS.dropout_keep_prob)
    query_emb.set_shape([None, None, FLAGS.embedding_size])

    with tf.variable_scope('document', initializer=orthogonal_initializer()):
        fwd_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size)
        back_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size)

        doc_len = tf.reduce_sum(doc_mask, reduction_indices=1)
        h, _ = tf.nn.bidirectional_dynamic_rnn(
            fwd_cell, back_cell, doc_emb,
            sequence_length=tf.to_int64(doc_len),
            dtype=tf.float32)
        h_doc = tf.concat(2, h)

    with tf.variable_scope('query', initializer=orthogonal_initializer()):
        fwd_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size)
        back_cell = tf.nn.rnn_cell.GRUCell(FLAGS.hidden_size)

        query_len = tf.reduce_sum(query_mask, reduction_indices=1)
        h, _ = tf.nn.bidirectional_dynamic_rnn(
            fwd_cell, back_cell, query_emb,
            sequence_length=tf.to_int64(query_len),
            dtype=tf.float32)
        h_query = tf.concat(2, h)

    M = tf.batch_matmul(h_doc, h_query, adj_y=True)
    M_mask = tf.to_float(tf.batch_matmul(tf.expand_dims(
        doc_mask, -1), tf.expand_dims(query_mask, 1)))

    alpha = softmax(M, 1, M_mask)
    beta = softmax(M, 2, M_mask)

    query_importance = tf.expand_dims(tf.reduce_sum(
        beta, 1) / tf.to_float(tf.expand_dims(doc_len, -1)), -1)

    s = tf.squeeze(tf.batch_matmul(alpha, query_importance), [2])

    unpacked_s = zip(tf.unpack(s, FLAGS.batch_size),
                     tf.unpack(documents, FLAGS.batch_size))
    y_hat = tf.pack([
        tf.unsorted_segment_sum(attentions, sentence_ids, FLAGS.vocab_size)
        for (attentions, sentence_ids) in unpacked_s
    ])

    return y_hat, regularizer
Пример #2
0
def inference(documents, doc_mask, query, query_mask):
    '''

    :param documents: document_batch -------[batch_size,seq_length]
    :param doc_mask: document_weights ------[batch_size, seq_length]
    :param query: query_batch
    :param query_mask: query_weights
    :return:
    '''

    # 1.Contextual Embedding:将one-hot向量输入embedding层
    #   这里的文档嵌入层和问题嵌入层的权值矩阵共享,通过共享词嵌入,文档和问题都可以参与嵌入的学习过程,
    #   然后使用双向GRU分别对文档和问题进行编码,文档和问题的编码都拼接正向和反向GRU的隐藏层输出,
    #   这时编码得到的文档和问题词向量都包含了上下文信息

    embedding = tf.get_variable(
        'embedding', [FLAGS.vocab_size, FLAGS.embedding_size],
        initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05))

    regularizer = tf.nn.l2_loss(embedding)

    doc_emb = tf.nn.dropout(tf.nn.embedding_lookup(embedding, documents),
                            FLAGS.dropout_keep_prob)
    doc_emb.set_shape([None, None, FLAGS.embedding_size])

    query_emb = tf.nn.dropout(tf.nn.embedding_lookup(embedding, query),
                              FLAGS.dropout_keep_prob)
    query_emb.set_shape([None, None, FLAGS.embedding_size])

    with tf.variable_scope('document', initializer=orthogonal_initializer()):
        fwd_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size)
        back_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size)

        # 得到一个batch内各个句子长度的列表,[batch_size,1]
        doc_len = tf.reduce_sum(doc_mask, reduction_indices=1)

        # tf.nn.bidirectional_dynamic_rnn,实现双向lstm,下面的h为outputs
        # return:(outputs, output_states)
        # outputs为(output_fw, output_bw),是一个包含前向cell输出tensor和后向cell输出tensor组成的元组。
        # 假设 time_major=false,tensor的shape为[batch_size, max_time, depth],相当于[batch_size, seq_len, embedding_size]。
        # 实验中使用tf.concat(outputs, 2)将其拼接。
        # ------------------------------------------------------------------------------------------------------
        # output_states为(output_state_fw, output_state_bw),包含了前向和后向最后的隐藏状态的组成的元组。
        # output_state_fw和output_state_bw的类型为LSTMStateTuple。
        # LSTMStateTuple由(c,h)组成,分别代表memory cell和hidden state。
        # sequence_length: (optional) An int32/int64 vector, size [batch_size],
        # containing the actual lengths for each of the sequences in the batch.
        h, _ = tf.nn.bidirectional_dynamic_rnn(
            fwd_cell,
            back_cell,
            doc_emb,
            sequence_length=tf.to_int64(doc_len),
            dtype=tf.float32)
        h_doc = tf.concat(h, 2)

    with tf.variable_scope('query', initializer=orthogonal_initializer()):
        fwd_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size)
        back_cell = tf.contrib.rnn.GRUCell(FLAGS.hidden_size)

        query_len = tf.reduce_sum(query_mask, reduction_indices=1)
        h, _ = tf.nn.bidirectional_dynamic_rnn(
            fwd_cell,
            back_cell,
            query_emb,
            sequence_length=tf.to_int64(query_len),
            dtype=tf.float32)
        # h_query = tf.nn.dropout(tf.concat(2, h), FLAGS.dropout_keep_prob)
        h_query = tf.concat(h, 2)

    # 2.Pair-wise Matching Score
    # 将h_doc和h_query进行点积计算得到成对匹配矩阵(pair-wise matching matrix)
    # M(i, j)代表文档中第i个单词的Contextual Embedding与问题中第j个单词的Contextual Embedding的点积之和.
    # M(i, j)这个矩阵即图1中点积之后得到的矩阵,横向表示问题,纵向表示文档,故M(i, j)的维度为|D| * |Q|。

    # adjoint_b: 如果为真, b则在进行乘法计算前进行共轭和转置
    M = tf.matmul(h_doc, h_query, adjoint_b=True)
    # -1表示最后一维,将doc和query最后再加一维向量,变成M_mask:[batch_size,D_size,1] x [batch_size,1,Q_size]
    M_mask = tf.to_float(
        tf.matmul(tf.expand_dims(doc_mask, -1), tf.expand_dims(query_mask, 1)))

    # M:[batch_size,D_size,Q_size],
    # alpha:[batch_size,D_size,Q_size],
    # beta:[batch_size,D_size,Q_size]
    alpha = softmax(M, 1, M_mask)
    beta = softmax(M, 2, M_mask)

    # tf.reduce_sum(beta, 1)---[batch_size,1,Q_size]
    # tf.expand_dims(doc_len, -1)---[batch_size,1,1]
    # query_importance---[1,Q_size,1]
    query_importance = tf.expand_dims(
        tf.reduce_sum(beta, 1) / tf.to_float(tf.expand_dims(doc_len, -1)), -1)

    # s:  [batch_size,D_size,1] ---> [batch_size,D_size]
    s = tf.squeeze(tf.matmul(alpha, query_importance), [2])

    # tf.unstack(s, FLAGS.batch_size) ---> [1,D_Size]
    # unpacked_s: [1, D_szie] ,里面的元素为:(s, document)
    unpacked_s = zip(tf.unstack(s, FLAGS.batch_size),
                     tf.unstack(documents, FLAGS.batch_size))
    y_hat = tf.stack([
        tf.unsorted_segment_sum(attentions, sentence_ids, FLAGS.vocab_size)
        for (attentions, sentence_ids) in unpacked_s
    ])

    return y_hat, regularizer