Exemplo n.º 1
0
def context_layer(in_question_repres,
                  in_passage_repres,
                  question_lengths,
                  passage_lengths,
                  question_mask,
                  passage_mask,
                  input_dim,
                  is_training,
                  options=None):
    with tf.variable_scope('context_layer'):
        for i in xrange(
                options.context_layer_num):  # support multiple context layer
            with tf.variable_scope('layer-{}'.format(i)):
                # contextual lstm for both passage and question
                in_question_repres = tf.multiply(
                    in_question_repres, tf.expand_dims(question_mask, axis=-1))
                in_passage_repres = tf.multiply(
                    in_passage_repres, tf.expand_dims(passage_mask, axis=-1))
                (question_context_representation_fw,
                 question_context_representation_bw,
                 in_question_repres) = layer_utils.my_lstm_layer(
                     in_question_repres,
                     options.context_lstm_dim,
                     input_lengths=question_lengths,
                     scope_name="context_represent",
                     reuse=False,
                     is_training=is_training,
                     dropout_rate=options.dropout_rate,
                     use_cudnn=options.use_cudnn)
                (passage_context_representation_fw,
                 passage_context_representation_bw,
                 in_passage_repres) = layer_utils.my_lstm_layer(
                     in_passage_repres,
                     options.context_lstm_dim,
                     input_lengths=passage_lengths,
                     scope_name="context_represent",
                     reuse=True,
                     is_training=is_training,
                     dropout_rate=options.dropout_rate,
                     use_cudnn=options.use_cudnn)
    question_context_representation = tf.concat(
        axis=2,
        values=[
            question_context_representation_fw,
            question_context_representation_bw
        ])
    passage_context_representation = tf.concat(
        axis=2,
        values=[
            passage_context_representation_fw,
            passage_context_representation_bw
        ])
    return (question_context_representation, passage_context_representation)
Exemplo n.º 2
0
    def create_model_graph(self, num_classes, word_vocab=None, char_vocab=None, is_training=True, global_step=None):
        options = self.options
        # ======word representation layer======
        in_question_repres = [] # word and char
        in_passage_repres = [] # word and char
        input_dim = 0
        if word_vocab is not None:
            word_vec_trainable = True
            cur_device = '/gpu:0'
            if options.fix_word_vec:
                word_vec_trainable = False
                cur_device = '/cpu:0'
            with tf.device(cur_device):
                self.word_embedding = tf.get_variable("word_embedding", trainable=word_vec_trainable, 
                                                  initializer=tf.constant(word_vocab.word_vecs), dtype=tf.float32)

            in_question_word_repres = tf.nn.embedding_lookup(self.word_embedding, self.in_question_words) # [batch_size, question_len, word_dim]
            in_passage_word_repres = tf.nn.embedding_lookup(self.word_embedding, self.in_passage_words) # [batch_size, passage_len, word_dim]
            in_question_repres.append(in_question_word_repres)
            in_passage_repres.append(in_passage_word_repres)

            input_shape = tf.shape(self.in_question_words)
            batch_size = input_shape[0]
            question_len = input_shape[1]
            input_shape = tf.shape(self.in_passage_words)
            passage_len = input_shape[1]
            input_dim += word_vocab.word_dim
            
        if options.with_char and char_vocab is not None:
            input_shape = tf.shape(self.in_question_chars)
            batch_size = input_shape[0]
            question_len = input_shape[1]
            q_char_len = input_shape[2]
            input_shape = tf.shape(self.in_passage_chars)
            passage_len = input_shape[1]
            p_char_len = input_shape[2]
            char_dim = char_vocab.word_dim
            self.char_embedding = tf.get_variable("char_embedding", initializer=tf.constant(char_vocab.word_vecs), dtype=tf.float32)

            in_question_char_repres = tf.nn.embedding_lookup(self.char_embedding, self.in_question_chars) # [batch_size, question_len, q_char_len, char_dim]
            in_question_char_repres = tf.reshape(in_question_char_repres, shape=[-1, q_char_len, char_dim])
            question_char_lengths = tf.reshape(self.question_char_lengths, [-1])
            quesiton_char_mask = tf.sequence_mask(question_char_lengths, q_char_len, dtype=tf.float32)  # [batch_size*question_len, q_char_len]
            in_question_char_repres = tf.multiply(in_question_char_repres, tf.expand_dims(quesiton_char_mask, axis=-1))


            in_passage_char_repres = tf.nn.embedding_lookup(self.char_embedding, self.in_passage_chars) # [batch_size, passage_len, p_char_len, char_dim]
            in_passage_char_repres = tf.reshape(in_passage_char_repres, shape=[-1, p_char_len, char_dim])
            passage_char_lengths = tf.reshape(self.passage_char_lengths, [-1])
            passage_char_mask = tf.sequence_mask(passage_char_lengths, p_char_len, dtype=tf.float32)  # [batch_size*passage_len, p_char_len]
            in_passage_char_repres = tf.multiply(in_passage_char_repres, tf.expand_dims(passage_char_mask, axis=-1))

            (question_char_outputs_fw, question_char_outputs_bw, _) = layer_utils.my_lstm_layer(in_question_char_repres, options.char_lstm_dim,
                    input_lengths=question_char_lengths,scope_name="char_lstm", reuse=False,
                    is_training=is_training, dropout_rate=options.dropout_rate, use_cudnn=options.use_cudnn)
            question_char_outputs_fw = layer_utils.collect_final_step_of_lstm(question_char_outputs_fw, question_char_lengths - 1)
            question_char_outputs_bw = question_char_outputs_bw[:, 0, :]
            question_char_outputs = tf.concat(axis=1, values=[question_char_outputs_fw, question_char_outputs_bw])
            question_char_outputs = tf.reshape(question_char_outputs, [batch_size, question_len, 2*options.char_lstm_dim])

            (passage_char_outputs_fw, passage_char_outputs_bw, _) = layer_utils.my_lstm_layer(in_passage_char_repres, options.char_lstm_dim,
                    input_lengths=passage_char_lengths, scope_name="char_lstm", reuse=True,
                    is_training=is_training, dropout_rate=options.dropout_rate, use_cudnn=options.use_cudnn)
            passage_char_outputs_fw = layer_utils.collect_final_step_of_lstm(passage_char_outputs_fw, passage_char_lengths - 1)
            passage_char_outputs_bw = passage_char_outputs_bw[:, 0, :]
            passage_char_outputs = tf.concat(axis=1, values=[passage_char_outputs_fw, passage_char_outputs_bw])
            passage_char_outputs = tf.reshape(passage_char_outputs, [batch_size, passage_len, 2*options.char_lstm_dim])
                
            in_question_repres.append(question_char_outputs)
            in_passage_repres.append(passage_char_outputs)

            input_dim += 2*options.char_lstm_dim

        in_question_repres = tf.concat(axis=2, values=in_question_repres) # [batch_size, question_len, dim] # concat word and char
        in_passage_repres = tf.concat(axis=2, values=in_passage_repres) # [batch_size, passage_len, dim] # concat word and char

        if is_training:
            in_question_repres = tf.nn.dropout(in_question_repres, (1 - options.dropout_rate))
            in_passage_repres = tf.nn.dropout(in_passage_repres, (1 - options.dropout_rate))

        mask = tf.sequence_mask(self.passage_lengths, passage_len, dtype=tf.float32) # [batch_size, passage_len]
        question_mask = tf.sequence_mask(self.question_lengths, question_len, dtype=tf.float32) # [batch_size, question_len]

        # ======Highway layer======
        if options.with_highway:
            with tf.variable_scope("input_highway"):
                in_question_repres = match_utils.multi_highway_layer(in_question_repres, input_dim, options.highway_layer_num)
                tf.get_variable_scope().reuse_variables()
                in_passage_repres = match_utils.multi_highway_layer(in_passage_repres, input_dim, options.highway_layer_num)

        # in_question_repres = tf.multiply(in_question_repres, tf.expand_dims(question_mask, axis=-1))
        # in_passage_repres = tf.multiply(in_passage_repres, tf.expand_dims(mask, axis=-1))

        # ========Bilateral Matching=====
        (match_representation, match_dim) = match_utils.bilateral_match_func(in_question_repres, in_passage_repres,
                        self.question_lengths, self.passage_lengths, question_mask, mask, input_dim, is_training, options=options)

        #========Prediction Layer=========
        # match_dim = 4 * self.options.aggregation_lstm_dim
        w_0 = tf.get_variable("w_0", [match_dim, match_dim/2], dtype=tf.float32)
        b_0 = tf.get_variable("b_0", [match_dim/2], dtype=tf.float32)
        w_1 = tf.get_variable("w_1", [match_dim/2, num_classes],dtype=tf.float32)
        b_1 = tf.get_variable("b_1", [num_classes],dtype=tf.float32)

        # if is_training: match_representation = tf.nn.dropout(match_representation, (1 - options.dropout_rate))
        logits = tf.matmul(match_representation, w_0) + b_0
        logits = tf.tanh(logits)
        if is_training: logits = tf.nn.dropout(logits, (1 - options.dropout_rate))
        logits = tf.matmul(logits, w_1) + b_1

        self.prob = tf.nn.softmax(logits)
        
        gold_matrix = tf.one_hot(self.truth, num_classes, dtype=tf.float32)
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=gold_matrix))

        correct = tf.nn.in_top_k(logits, self.truth, 1)
        self.eval_correct = tf.reduce_sum(tf.cast(correct, tf.int32))
        self.predictions = tf.argmax(self.prob, 1)

        if not is_training: return

        tvars = tf.trainable_variables()
        if self.options.lambda_l2>0.0:
            l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1])
            self.loss = self.loss + self.options.lambda_l2 * l2_loss

        if self.options.optimize_type == 'adadelta':
            optimizer = tf.train.AdadeltaOptimizer(learning_rate=self.options.learning_rate)
        elif self.options.optimize_type == 'adam':
            optimizer = tf.train.AdamOptimizer(learning_rate=self.options.learning_rate)

        grads = layer_utils.compute_gradients(self.loss, tvars)
        grads, _ = tf.clip_by_global_norm(grads, self.options.grad_clipper)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)
        # self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        if self.options.with_moving_average:
            # Track the moving averages of all trainable variables.
            MOVING_AVERAGE_DECAY = 0.9999  # The decay to use for the moving average.
            variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
            variables_averages_op = variable_averages.apply(tf.trainable_variables())
            train_ops = [self.train_op, variables_averages_op]
            self.train_op = tf.group(*train_ops)
Exemplo n.º 3
0
    def bilateral_match_func(self, in_question_repres, in_passage_repres,
                        question_lengths, passage_lengths, question_mask,
                        passage_mask, input_dim):
        question_aware_representatins = []
        question_aware_dim = 0
        passage_aware_representatins = []
        passage_aware_dim = 0

        # ====word level matching======
        (match_reps, match_dim) = self.match_passage_with_question(in_passage_repres,
                                    in_question_repres, passage_mask, question_mask,
                                    passage_lengths,
                                    question_lengths, input_dim, scope="word_match_forward",
                                    with_full_match=False, with_maxpool_match=self.config.with_maxpool_match,
                                    with_attentive_match=self.config.with_attentive_match,
                                    with_max_attentive_match=self.config.with_max_attentive_match,
                                    dropout_rate=self.dropout_rate, forward=True)
        question_aware_representatins.append(match_reps)
        question_aware_dim += match_dim

        (match_reps, match_dim) = self.match_passage_with_question(in_question_repres,
                                    in_passage_repres, question_mask, passage_mask,
                                    question_lengths,
                                    passage_lengths, input_dim, scope="word_match_backward",
                                    with_full_match=False, with_maxpool_match=self.config.with_maxpool_match,
                                    with_attentive_match=self.config.with_attentive_match,
                                    with_max_attentive_match=self.config.with_max_attentive_match,
                                    dropout_rate=self.dropout_rate, forward=False)
        passage_aware_representatins.append(match_reps)
        passage_aware_dim += match_dim

        with tf.variable_scope('context_MP_matching'):
            for i in range(self.config.context_layer_num): # support multiple context layer
                with tf.variable_scope('layer-{}'.format(i)):
                    # contextual lstm for both passage and question
                    in_question_repres = tf.multiply(in_question_repres, tf.expand_dims(question_mask, axis=-1))
                    in_passage_repres = tf.multiply(in_passage_repres, tf.expand_dims(passage_mask, axis=-1))
                    (question_context_representation_fw, question_context_representation_bw,
                     in_question_repres) = layer_utils.my_lstm_layer(
                            in_question_repres, self.config.context_lstm_dim, input_lengths=question_lengths, scope_name="context_represent",
                            reuse=False, dropout_rate=self.dropout_rate, use_cudnn=self.config.use_cudnn)
                    (passage_context_representation_fw, passage_context_representation_bw,
                     in_passage_repres) = layer_utils.my_lstm_layer(
                            in_passage_repres, self.config.context_lstm_dim, input_lengths=passage_lengths, scope_name="context_represent",
                            reuse=True, dropout_rate=self.dropout_rate, use_cudnn=self.config.use_cudnn)

                    # Multi-perspective matching
                    with tf.variable_scope('left_MP_matching'):
                        (match_reps, match_dim) = self.match_passage_with_question(passage_context_representation_fw,
                                    question_context_representation_fw, passage_mask, question_mask, passage_lengths,
                                    question_lengths, self.config.context_lstm_dim, scope="forward_match",
                                    with_full_match=self.config.with_full_match, with_maxpool_match=self.config.with_maxpool_match,
                                    with_attentive_match=self.config.with_attentive_match,
                                    with_max_attentive_match=self.config.with_max_attentive_match,
                                    dropout_rate=self.dropout_rate, forward=True)
                        question_aware_representatins.append(match_reps)
                        question_aware_dim += match_dim
                        (match_reps, match_dim) = self.match_passage_with_question(passage_context_representation_bw,
                                    question_context_representation_bw, passage_mask, question_mask, passage_lengths,
                                    question_lengths, self.config.context_lstm_dim, scope="backward_match",
                                    with_full_match=self.config.with_full_match, with_maxpool_match=self.config.with_maxpool_match,
                                    with_attentive_match=self.config.with_attentive_match,
                                    with_max_attentive_match=self.config.with_max_attentive_match,
                                    dropout_rate=self.dropout_rate, forward=False)
                        question_aware_representatins.append(match_reps)
                        question_aware_dim += match_dim

                    with tf.variable_scope('right_MP_matching'):
                        (match_reps, match_dim) = self.match_passage_with_question(question_context_representation_fw,
                                    passage_context_representation_fw, question_mask, passage_mask, question_lengths,
                                    passage_lengths, self.config.context_lstm_dim, scope="forward_match",
                                    with_full_match=self.config.with_full_match, with_maxpool_match=self.config.with_maxpool_match,
                                    with_attentive_match=self.config.with_attentive_match,
                                    with_max_attentive_match=self.config.with_max_attentive_match,
                                    dropout_rate=self.dropout_rate, forward=True)
                        passage_aware_representatins.append(match_reps)
                        passage_aware_dim += match_dim
                        (match_reps, match_dim) = self.match_passage_with_question(question_context_representation_bw,
                                    passage_context_representation_bw, question_mask, passage_mask, question_lengths,
                                    passage_lengths, self.config.context_lstm_dim, scope="backward_match",
                                    with_full_match=self.config.with_full_match, with_maxpool_match=self.config.with_maxpool_match,
                                    with_attentive_match=self.config.with_attentive_match,
                                    with_max_attentive_match=self.config.with_max_attentive_match,
                                    dropout_rate=self.dropout_rate, forward=False)
                        passage_aware_representatins.append(match_reps)
                        passage_aware_dim += match_dim

        question_aware_representatins = tf.concat(axis=2, values=question_aware_representatins) # [batch_size, passage_len, passage_aware_dim]
        passage_aware_representatins = tf.concat(axis=2, values=passage_aware_representatins) # [batch_size, question_len, question_aware_dim]

        question_aware_representatins = tf.nn.dropout(question_aware_representatins, (1 - self.dropout_rate))
        passage_aware_representatins = tf.nn.dropout(passage_aware_representatins, (1 - self.dropout_rate))

        # ======Highway layer======
        if self.config.with_match_highway:
            with tf.variable_scope("left_matching_highway"):
                question_aware_representatins = self.multi_highway_layer(question_aware_representatins, question_aware_dim,
                                                                    self.config.highway_layer_num)
            with tf.variable_scope("right_matching_highway"):
                passage_aware_representatins = self.multi_highway_layer(passage_aware_representatins, passage_aware_dim,
                                                               self.config.highway_layer_num)

        #========Aggregation Layer======
        aggregation_representation = []
        aggregation_dim = 0

        qa_aggregation_input = question_aware_representatins
        pa_aggregation_input = passage_aware_representatins
        with tf.variable_scope('aggregation_layer'):
            for i in range(self.config.aggregation_layer_num): # support multiple aggregation layer
                qa_aggregation_input = tf.multiply(qa_aggregation_input, tf.expand_dims(passage_mask, axis=-1))
                (fw_rep, bw_rep, cur_aggregation_representation) = layer_utils.my_lstm_layer(
                            qa_aggregation_input, self.config.aggregation_lstm_dim,
                            input_lengths=passage_lengths, scope_name='left_layer-{}'.format(i),
                            reuse=False, dropout_rate=self.dropout_rate, use_cudnn=self.config.use_cudnn)
                fw_rep = layer_utils.collect_final_step_of_lstm(fw_rep, passage_lengths - 1)
                bw_rep = bw_rep[:, 0, :]
                aggregation_representation.append(fw_rep)
                aggregation_representation.append(bw_rep)
                aggregation_dim += 2 * self.config.aggregation_lstm_dim
                # [batch_size, question_len, 2*aggregation_lstm_dim]
                qa_aggregation_input = cur_aggregation_representation

                pa_aggregation_input = tf.multiply(pa_aggregation_input, tf.expand_dims(question_mask, axis=-1))
                (fw_rep, bw_rep, cur_aggregation_representation) = layer_utils.my_lstm_layer(
                            pa_aggregation_input, self.config.aggregation_lstm_dim,
                            input_lengths=question_lengths, scope_name='right_layer-{}'.format(i),
                            reuse=False, dropout_rate=self.dropout_rate, use_cudnn=self.config.use_cudnn)
                fw_rep = layer_utils.collect_final_step_of_lstm(fw_rep, question_lengths - 1)
                bw_rep = bw_rep[:, 0, :]
                aggregation_representation.append(fw_rep)
                aggregation_representation.append(bw_rep)
                aggregation_dim += 2 * self.config.aggregation_lstm_dim
                # [batch_size, passage_len, 2*aggregation_lstm_dim]
                pa_aggregation_input = cur_aggregation_representation

        # [batch_size, 4*aggregation_lstm_dim*aggregation_layer_num]
        aggregation_representation = tf.concat(axis=1, values=aggregation_representation)

        # ======Highway layer======
        if self.config.with_aggregation_highway:
            with tf.variable_scope("aggregation_highway"):
                agg_shape = tf.shape(aggregation_representation)
                batch_size = agg_shape[0]
                aggregation_representation = tf.reshape(aggregation_representation, [1, batch_size, aggregation_dim])
                aggregation_representation = self.multi_highway_layer(aggregation_representation, aggregation_dim, self.config.highway_layer_num)
                aggregation_representation = tf.reshape(aggregation_representation, [batch_size, aggregation_dim])
        return (aggregation_representation, aggregation_dim)
Exemplo n.º 4
0
def Matching_Model(c_emb, q_emb, passage_lengths, question_lengths, c_mask,
                   q_mask, is_training, dropout_rate, options):

    with tf.variable_scope("Embedding_Encoder_Layer"):
        q_emb = tf.multiply(q_emb, tf.expand_dims(q_mask, axis=-1))
        c_emb = tf.multiply(c_emb, tf.expand_dims(c_mask, axis=-1))

        (q_fw, q_bw,
         q) = layer_utils.my_lstm_layer(q_emb,
                                        options.context_lstm_dim,
                                        input_lengths=question_lengths,
                                        scope_name="context_represent",
                                        reuse=False,
                                        is_training=is_training,
                                        dropout_rate=dropout_rate,
                                        use_cudnn=options.use_cudnn)

        (c_fw, c_bw,
         c) = layer_utils.my_lstm_layer(c_emb,
                                        options.context_lstm_dim,
                                        input_lengths=passage_lengths,
                                        scope_name="context_represent",
                                        reuse=True,
                                        is_training=is_training,
                                        dropout_rate=dropout_rate,
                                        use_cudnn=options.use_cudnn)

        q = tf.multiply(q, tf.expand_dims(q_mask, axis=-1))
        c = tf.multiply(c, tf.expand_dims(c_mask, axis=-1))

    with tf.variable_scope("Co-attention_Layer"):
        c2q, q2c = dot_attention(q, c, q_mask, c_mask)

    with tf.variable_scope("Model_Encoder_Layer"):
        passage_inputs = tf.concat([c2q, c, c2q * c, c - c2q], axis=2)
        question_inputs = tf.concat([q2c, q, q2c * q, q - q2c], axis=2)
        passage_inputs = tf.layers.dense(inputs=passage_inputs,
                                         units=2 * options.context_lstm_dim,
                                         activation=tf.nn.relu,
                                         use_bias=True,
                                         name='pro',
                                         reuse=False)
        question_inputs = tf.layers.dense(inputs=question_inputs,
                                          units=2 * options.context_lstm_dim,
                                          activation=tf.nn.relu,
                                          use_bias=True,
                                          name='pro',
                                          reuse=True)
        question_inputs = tf.multiply(question_inputs,
                                      tf.expand_dims(q_mask, axis=-1))
        passage_inputs = tf.multiply(passage_inputs,
                                     tf.expand_dims(c_mask, axis=-1))

        (fw_rep, bw_rep,
         cur_aggregation_representation) = layer_utils.my_lstm_layer(
             question_inputs,
             options.aggregation_lstm_dim,
             input_lengths=question_lengths,
             scope_name='aggregate_layer',
             reuse=False,
             is_training=is_training,
             dropout_rate=dropout_rate,
             use_cudnn=options.use_cudnn)

        question_inputs = cur_aggregation_representation

        (fw_rep, bw_rep,
         cur_aggregation_representation) = layer_utils.my_lstm_layer(
             passage_inputs,
             options.aggregation_lstm_dim,
             input_lengths=passage_lengths,
             scope_name='aggregate_layer',
             reuse=True,
             is_training=is_training,
             dropout_rate=dropout_rate,
             use_cudnn=options.use_cudnn)
        passage_inputs = cur_aggregation_representation  # [batch_size, passage_len, 2*aggregation_lstm_dim]
        # if is_training:
        #     question_inputs = tf.nn.dropout(question_inputs, (1 - options.dropout_rate))
        #     passage_inputs = tf.nn.dropout(passage_inputs, (1 - options.dropout_rate))
        question_inputs = tf.multiply(question_inputs,
                                      tf.expand_dims(q_mask, axis=-1))
        passage_inputs = tf.multiply(passage_inputs,
                                     tf.expand_dims(c_mask, axis=-1))

        passage_outputs_mean = tf.div(
            tf.reduce_sum(passage_inputs, 1),
            tf.expand_dims(tf.cast(passage_lengths, tf.float32), -1))
        question_outputs_mean = tf.div(
            tf.reduce_sum(question_inputs, 1),
            tf.expand_dims(tf.cast(question_lengths, tf.float32), -1))
        passage_outputs_max = tf.reduce_max(passage_inputs, axis=1)
        question_outputs_max = tf.reduce_max(question_inputs, axis=1)
        input_dim = int(passage_inputs.shape[2])

        question_outputs = tf.concat(
            [question_outputs_max, question_outputs_mean], axis=1)
        passage_outputs = tf.concat(
            [passage_outputs_max, passage_outputs_mean], axis=1)
        match_representation = tf.concat(
            axis=1, values=[question_outputs, passage_outputs])
    # ========Prediction Layer=========
    if is_training:
        match_representation = tf.nn.dropout(match_representation,
                                             (1 - dropout_rate))

    return match_representation
Exemplo n.º 5
0
def MCAN_match_func(in_question_repres,
                    in_passage_repres,
                    question_lengths,
                    passage_lengths,
                    question_mask,
                    passage_mask,
                    input_dim,
                    is_training,
                    scope="default",
                    options=None):
    question_reps = in_question_repres
    passage_reps = in_passage_repres

    total_match_dim = 0
    final_question_repres=question_reps
    final_passage_repres=passage_reps
    #####

    (match_reps, match_dim) = match_passage_with_question(in_passage_repres, in_question_repres, passage_mask,
                                                          question_mask, passage_lengths,
                                                          question_lengths, input_dim, scope="word_match_forward",
                                                          with_full_match=False,
                                                          with_maxpool_match=options.with_maxpool_match,
                                                          with_attentive_match=options.with_attentive_match,
                                                          with_max_attentive_match=options.with_max_attentive_match,
                                                          is_training=is_training, options=options,
                                                          dropout_rate=options.dropout_rate, forward=True)

    final_passage_repres = tf.concat([final_passage_repres, match_reps],
                                      axis=-1)
    total_match_dim+=match_dim
    (match_reps, match_dim) = match_passage_with_question(in_question_repres, in_passage_repres, question_mask,
                                                          passage_mask, question_lengths,
                                                          passage_lengths, input_dim, scope="word_match_backward",
                                                          with_full_match=False,
                                                          with_maxpool_match=options.with_maxpool_match,
                                                          with_attentive_match=options.with_attentive_match,
                                                          with_max_attentive_match=options.with_max_attentive_match,
                                                          is_training=is_training, options=options,
                                                          dropout_rate=options.dropout_rate, forward=False)
    final_question_repres = tf.concat([final_question_repres, match_reps],
                                       axis=-1)

    #####

    # self-attention

    # relevancy_matrix3 = cal_relevancy_matrix(question_reps, question_reps)
    # relevancy_matrix3 = mask_relevancy_matrix(relevancy_matrix3, question_mask, question_mask)
    # relevancy_matrix3 = tf.nn.softmax(relevancy_matrix3,axis=-1)
    # relevancy_matrix3 = mask_relevancy_matrix(relevancy_matrix3, question_mask, question_mask)
    # attended_question = tf.matmul(relevancy_matrix3,question_reps)
    # final_question_repres=tf.concat([final_question_repres,tf.layers.dense(attended_question, units=5)],axis=-1)
    #
    # relevancy_matrix4 = cal_relevancy_matrix(passage_reps, passage_reps)
    # relevancy_matrix4 = mask_relevancy_matrix(relevancy_matrix4, passage_mask, passage_mask)
    # relevancy_matrix4 = tf.nn.softmax(relevancy_matrix4, axis=-1)
    # relevancy_matrix4 = mask_relevancy_matrix(relevancy_matrix4, passage_mask, passage_mask)
    # attended_passage = tf.matmul(relevancy_matrix4, passage_reps)
    # final_passage_repres = tf.concat([final_passage_repres, tf.layers.dense(attended_passage, units=5)],
    #                                  axis=-1)

    # LSTM-matching
    in_question_repres_masked = tf.multiply(in_question_repres, tf.expand_dims(question_mask, axis=-1))
    in_passage_repres_masked = tf.multiply(in_passage_repres, tf.expand_dims(passage_mask, axis=-1))
    (question_context_representation_fw, question_context_representation_bw,
     in_question_repres_masked) = layer_utils.my_lstm_layer(
        in_question_repres_masked, options.context_lstm_dim, input_lengths=question_lengths,
        scope_name="context_represent",
        reuse=False, is_training=is_training, dropout_rate=options.dropout_rate,
        use_cudnn=options.use_cudnn)
    (passage_context_representation_fw, passage_context_representation_bw,
     in_passage_repres_masked) = layer_utils.my_lstm_layer(
        in_passage_repres_masked, options.context_lstm_dim, input_lengths=passage_lengths,
        scope_name="context_represent",
        reuse=True, is_training=is_training, dropout_rate=options.dropout_rate, use_cudnn=options.use_cudnn)

    # Multi-perspective matching
    with tf.variable_scope('left_MP_matching'):
        (match_reps, match_dim) = match_passage_with_question(passage_context_representation_fw,
                                                              question_context_representation_fw,
                                                              passage_mask, question_mask, passage_lengths,
                                                              question_lengths, options.context_lstm_dim,
                                                              scope="forward_match",
                                                              with_full_match=options.with_full_match,
                                                              with_maxpool_match=options.with_maxpool_match,
                                                              with_attentive_match=options.with_attentive_match,
                                                              with_max_attentive_match=options.with_max_attentive_match,
                                                              is_training=is_training, options=options,
                                                              dropout_rate=options.dropout_rate,
                                                              forward=True)
        final_passage_repres = tf.concat([final_passage_repres, match_reps],
                                         axis=-1)
        total_match_dim+=match_dim
        (match_reps, match_dim) = match_passage_with_question(passage_context_representation_bw,
                                                              question_context_representation_bw,
                                                              passage_mask, question_mask, passage_lengths,
                                                              question_lengths, options.context_lstm_dim,
                                                              scope="backward_match",
                                                              with_full_match=options.with_full_match,
                                                              with_maxpool_match=options.with_maxpool_match,
                                                              with_attentive_match=options.with_attentive_match,
                                                              with_max_attentive_match=options.with_max_attentive_match,
                                                              is_training=is_training, options=options,
                                                              dropout_rate=options.dropout_rate,
                                                              forward=False)
        final_passage_repres = tf.concat([final_passage_repres, match_reps],
                                         axis=-1)
        total_match_dim += match_dim
    with tf.variable_scope('right_MP_matching'):
        (match_reps, match_dim) = match_passage_with_question(question_context_representation_fw,
                                                              passage_context_representation_fw,
                                                              question_mask, passage_mask, question_lengths,
                                                              passage_lengths, options.context_lstm_dim,
                                                              scope="forward_match",
                                                              with_full_match=options.with_full_match,
                                                              with_maxpool_match=options.with_maxpool_match,
                                                              with_attentive_match=options.with_attentive_match,
                                                              with_max_attentive_match=options.with_max_attentive_match,
                                                              is_training=is_training, options=options,
                                                              dropout_rate=options.dropout_rate,
                                                              forward=True)
        final_question_repres = tf.concat([final_question_repres, match_reps],
                                          axis=-1)
        (match_reps, match_dim) = match_passage_with_question(question_context_representation_bw,
                                                              passage_context_representation_bw,
                                                              question_mask, passage_mask, question_lengths,
                                                              passage_lengths, options.context_lstm_dim,
                                                              scope="backward_match",
                                                              with_full_match=options.with_full_match,
                                                              with_maxpool_match=options.with_maxpool_match,
                                                              with_attentive_match=options.with_attentive_match,
                                                              with_max_attentive_match=options.with_max_attentive_match,
                                                              is_training=is_training, options=options,
                                                              dropout_rate=options.dropout_rate,
                                                              forward=False)
        final_question_repres = tf.concat([final_question_repres, match_reps],
                                          axis=-1)


    if is_training:
        final_question_repres = tf.nn.dropout(final_question_repres, (1 - options.dropout_rate))
        final_passage_repres = tf.nn.dropout(final_passage_repres, (1 - options.dropout_rate))
    print(total_match_dim)
    # ======Highway layer======
    #if options.with_match_highway:
    #    with tf.variable_scope("left_matching_highway"):
    #        final_question_repres = multi_highway_layer(final_question_repres, total_match_dim,
    #                                                            options.highway_layer_num)
    #    with tf.variable_scope("right_matching_highway"):
    #        final_passage_repres = multi_highway_layer(final_passage_repres, total_match_dim,
    #                                                           options.highway_layer_num)



    # final encoder

    qa_aggregation_input = final_passage_repres
    pa_aggregation_input = final_question_repres
    aggregation_representation = []
    aggregation_dim = 0
    with tf.variable_scope('aggregation_layer'):
        for i in range(options.aggregation_layer_num):  # support multiple aggregation layer
            if passage_mask != None:
                qa_aggregation_input = tf.multiply(qa_aggregation_input, tf.expand_dims(passage_mask, axis=-1))
            (fw_rep, bw_rep, cur_aggregation_representation) = layer_utils.my_lstm_layer(
                qa_aggregation_input, options.aggregation_lstm_dim, input_lengths=passage_lengths,
                scope_name=scope + '_left_layer-{}'.format(i),
                reuse=False, is_training=is_training, dropout_rate=options.dropout_rate, use_cudnn=options.use_cudnn)
            fw_rep = layer_utils.collect_final_step_of_lstm(fw_rep, passage_lengths - 1)
            bw_rep = bw_rep[:, 0, :]
            aggregation_representation.append(fw_rep)
            aggregation_representation.append(bw_rep)
            aggregation_dim += 2 * options.aggregation_lstm_dim
            qa_aggregation_input = cur_aggregation_representation  # [batch_size, passage_len, 2*aggregation_lstm_dim]
            if question_mask != None:
                pa_aggregation_input = tf.multiply(pa_aggregation_input, tf.expand_dims(question_mask, axis=-1))
            (fw_rep, bw_rep, cur_aggregation_representation) = layer_utils.my_lstm_layer(
                pa_aggregation_input, options.aggregation_lstm_dim,
                input_lengths=question_lengths, scope_name=scope + '_right_layer-{}'.format(i),
                reuse=False, is_training=is_training, dropout_rate=options.dropout_rate, use_cudnn=options.use_cudnn)
            fw_rep = layer_utils.collect_final_step_of_lstm(fw_rep, question_lengths - 1)
            bw_rep = bw_rep[:, 0, :]
            aggregation_representation.append(fw_rep)
            aggregation_representation.append(bw_rep)
            aggregation_dim += 2 * options.aggregation_lstm_dim
            pa_aggregation_input = cur_aggregation_representation  # [batch_size, passage_len, 2*aggregation_lstm_dim]

    aggregation_representation = tf.concat(axis=1, values=aggregation_representation)  # [batch_size, aggregation_dim]

    # ======Highway layer======
    if options.with_aggregation_highway:
        with tf.variable_scope(scope + "_aggregation_highway"):
            agg_shape = tf.shape(aggregation_representation)
            batch_size = agg_shape[0]
            aggregation_representation = tf.reshape(aggregation_representation, [1, batch_size, aggregation_dim])
            aggregation_representation = multi_highway_layer(aggregation_representation, aggregation_dim,
                                                             options.highway_layer_num)
            aggregation_representation = tf.reshape(aggregation_representation, [batch_size, aggregation_dim])

    return (aggregation_representation, aggregation_dim)
    def create_siameseLSTM_model_graph(self,
                                       num_classes,
                                       word_vocab=None,
                                       char_vocab=None,
                                       is_training=True,
                                       global_step=None):
        """
        """
        options = self.options
        # ======word representation layer======
        in_question_repres = []
        in_passage_repres = []
        input_dim = 0
        if word_vocab is not None:
            word_vec_trainable = True
            cur_device = '/gpu:0'
            if options.fix_word_vec:
                word_vec_trainable = False
                cur_device = '/cpu:0'
            with tf.device(cur_device):
                self.embedding = tf.placeholder(
                    tf.float32, shape=word_vocab.word_vecs.shape)
                self.word_embedding = tf.get_variable(
                    "word_embedding",
                    trainable=word_vec_trainable,
                    initializer=self.embedding,
                    dtype=tf.float32)  # tf.constant(word_vocab.word_vecs)

            in_question_word_repres = tf.nn.embedding_lookup(
                self.word_embedding,
                self.in_question_words)  # [batch_size, question_len, word_dim]
            in_passage_word_repres = tf.nn.embedding_lookup(
                self.word_embedding,
                self.in_passage_words)  # [batch_size, passage_len, word_dim]
            in_question_repres.append(in_question_word_repres)
            in_passage_repres.append(in_passage_word_repres)

            input_shape = tf.shape(self.in_question_words)
            batch_size = input_shape[0]
            question_len = input_shape[1]
            input_shape = tf.shape(self.in_passage_words)
            passage_len = input_shape[1]
            input_dim += word_vocab.word_dim

        in_question_repres = tf.concat(
            axis=2,
            values=in_question_repres)  # [batch_size, question_len, dim]
        in_passage_repres = tf.concat(
            axis=2, values=in_passage_repres)  # [batch_size, passage_len, dim]

        if is_training:
            in_question_repres = tf.nn.dropout(in_question_repres,
                                               (1 - options.dropout_rate))
            in_passage_repres = tf.nn.dropout(in_passage_repres,
                                              (1 - options.dropout_rate))

        passage_mask = tf.sequence_mask(
            self.passage_lengths, passage_len,
            dtype=tf.float32)  # [batch_size, passage_len]
        question_mask = tf.sequence_mask(
            self.question_lengths, question_len,
            dtype=tf.float32)  # [batch_size, question_len]

        # ======Highway layer======
        if options.with_highway:
            with tf.variable_scope("input_highway"):
                in_question_repres = match_utils.multi_highway_layer(
                    in_question_repres, input_dim, options.highway_layer_num)
                tf.get_variable_scope().reuse_variables()
                in_passage_repres = match_utils.multi_highway_layer(
                    in_passage_repres, input_dim, options.highway_layer_num)

        # ======BiLSTM context layer======
        for i in range(
                options.context_layer_num):  # support multiple context layer
            with tf.variable_scope('bilstm-layer-{}'.format(i)):
                # contextual lstm for both passage and question
                in_question_repres = tf.multiply(
                    in_question_repres, tf.expand_dims(question_mask, axis=-1))
                (question_context_representation_fw,
                 question_context_representation_bw,
                 in_question_repres) = layer_utils.my_lstm_layer(
                     in_question_repres,
                     options.context_lstm_dim,
                     input_lengths=self.question_lengths,
                     scope_name="context_represent",
                     reuse=False,
                     is_training=is_training,
                     dropout_rate=options.dropout_rate,
                     use_cudnn=options.use_cudnn)

                # Encode the second sentence, using the same LSTM weights.
                tf.get_variable_scope().reuse_variables()
                in_passage_repres = tf.multiply(
                    in_passage_repres, tf.expand_dims(passage_mask, axis=-1))
                (passage_context_representation_fw,
                 passage_context_representation_bw,
                 in_passage_repres) = layer_utils.my_lstm_layer(
                     in_passage_repres,
                     options.context_lstm_dim,
                     input_lengths=self.passage_lengths,
                     scope_name="context_represent",
                     reuse=True,
                     is_training=is_training,
                     dropout_rate=options.dropout_rate,
                     use_cudnn=options.use_cudnn)

        if options.lstm_out_type == 'mean':
            question_context_representation_fw = layer_utils.collect_mean_step_of_lstm(
                question_context_representation_fw)
            question_context_representation_bw = layer_utils.collect_mean_step_of_lstm(
                question_context_representation_bw)
            passage_context_representation_fw = layer_utils.collect_mean_step_of_lstm(
                passage_context_representation_fw)
            passage_context_representation_bw = layer_utils.collect_mean_step_of_lstm(
                passage_context_representation_bw)
        elif options.lstm_out_type == 'end':
            question_context_representation_fw = layer_utils.collect_final_step_of_lstm(
                question_context_representation_fw, self.question_lengths - 1)
            question_context_representation_bw = question_context_representation_bw[:,
                                                                                    0, :]
            passage_context_representation_fw = layer_utils.collect_final_step_of_lstm(
                passage_context_representation_fw, self.passage_lengths - 1)
            passage_context_representation_bw = passage_context_representation_bw[:,
                                                                                  0, :]

        question_context_outputs = tf.concat(
            axis=1,
            values=[
                question_context_representation_fw,
                question_context_representation_bw
            ])
        passage_context_outputs = tf.concat(
            axis=1,
            values=[
                passage_context_representation_fw,
                passage_context_representation_bw
            ])

        (match_representation, match_dim) = match_utils.siameseLSTM_match_func(
            question_context_outputs, passage_context_outputs,
            options.context_lstm_dim)

        #========Prediction Layer=========
        w_0 = tf.get_variable("w_0", [match_dim, int(match_dim / 2)],
                              dtype=tf.float32)
        b_0 = tf.get_variable("b_0", [int(match_dim / 2)], dtype=tf.float32)
        w_1 = tf.get_variable("w_1", [int(match_dim / 2), num_classes],
                              dtype=tf.float32)
        b_1 = tf.get_variable("b_1", [num_classes], dtype=tf.float32)

        # if is_training: match_representation = tf.nn.dropout(match_representation, (1 - options.dropout_rate))
        logits = tf.matmul(match_representation, w_0) + b_0
        logits = tf.nn.relu(logits)
        if is_training:
            logits = tf.nn.dropout(logits, (1 - options.dropout_rate))
        logits = tf.matmul(logits, w_1) + b_1

        self.prob = tf.nn.softmax(logits)
        self.predictions = tf.argmax(self.prob, 1)

        gold_matrix = tf.one_hot(self.truth, num_classes, dtype=tf.float32)
        self.loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                    labels=gold_matrix))

        correct = tf.nn.in_top_k(logits, self.truth, 1)
        self.eval_correct = tf.reduce_sum(tf.cast(correct, tf.int32))

        if not is_training: return

        tvars = tf.trainable_variables()
        if self.options.lambda_l1 > 0.0:
            l1_loss = tf.add_n([
                tf.contrib.layers.l1_regularizer(self.options.lambda_l1)(v)
                for v in tvars if v.get_shape().ndims > 1
            ])
            self.loss = self.loss + l1_loss
        if self.options.lambda_l2 > 0.0:
            # l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1])
            l2_loss = tf.add_n([
                tf.contrib.layers.l2_regularizer(self.options.lambda_l2)(v)
                for v in tvars if v.get_shape().ndims > 1
            ])
            self.loss = self.loss + l2_loss

        if self.options.optimize_type == 'adadelta':
            optimizer = tf.train.AdadeltaOptimizer(
                learning_rate=self.options.learning_rate)
        elif self.options.optimize_type == 'adam':
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.options.learning_rate)

        grads = layer_utils.compute_gradients(self.loss, tvars)
        grads, _ = tf.clip_by_global_norm(grads, self.options.grad_clipper)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars),
                                                  global_step=global_step)
        # self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        if self.options.with_moving_average:
            # Track the moving averages of all trainable variables.
            MOVING_AVERAGE_DECAY = 0.9999  # The decay to use for the moving average.
            variable_averages = tf.train.ExponentialMovingAverage(
                MOVING_AVERAGE_DECAY, global_step)
            variables_averages_op = variable_averages.apply(
                tf.trainable_variables())
            train_ops = [self.train_op, variables_averages_op]
            self.train_op = tf.group(*train_ops)
Exemplo n.º 7
0
def MCAN_match_func(in_question_repres,
                    in_passage_repres,
                    question_lengths,
                    passage_lengths,
                    question_mask,
                    passage_mask,
                    input_dim,
                    is_training,
                    options=None):
    question_aware_representatins = []
    question_aware_dim = 0
    passage_aware_representatins = []
    passage_aware_dim = 0

    # ====word level matching======
    # because the with_full_match allways False, so it has no significance that the forward is True or False.
    # match_passage_with_question(repres1,repres2,...) is to calculate each vector of repres1 to match whole repres2, so the return match_reps size is[batchSize,repres1.length,repre_dim]
    # passage to question
    (match_reps, match_dim) = match_passage_with_question(
        in_passage_repres,
        in_question_repres,
        passage_mask,
        question_mask,
        passage_lengths,
        question_lengths,
        input_dim,
        scope="word_match_forward",
        with_full_match=False,
        with_maxpool_match=options.with_maxpool_match,
        with_attentive_match=options.with_attentive_match,
        with_max_attentive_match=options.with_max_attentive_match,
        is_training=is_training,
        options=options,
        dropout_rate=options.dropout_rate,
        forward=False)
    question_aware_representatins.append(match_reps)
    question_aware_dim += match_dim

    # add passage to passage
    (match_reps, match_dim) = match_passage_with_question(
        in_passage_repres,
        in_passage_repres,
        passage_mask,
        passage_mask,
        passage_lengths,
        passage_lengths,
        input_dim,
        scope="word_match_passage",
        with_full_match=False,
        with_maxpool_match=options.with_maxpool_match,
        with_attentive_match=options.with_attentive_match,
        with_max_attentive_match=options.with_max_attentive_match,
        is_training=is_training,
        options=options,
        dropout_rate=options.dropout_rate,
        forward=False)
    question_aware_representatins.append(match_reps)
    question_aware_dim += match_dim

    # question to passage
    (match_reps, match_dim) = match_passage_with_question(
        in_question_repres,
        in_passage_repres,
        question_mask,
        passage_mask,
        question_lengths,
        passage_lengths,
        input_dim,
        scope="word_match_backward",
        with_full_match=False,
        with_maxpool_match=options.with_maxpool_match,
        with_attentive_match=options.with_attentive_match,
        with_max_attentive_match=options.with_max_attentive_match,
        is_training=is_training,
        options=options,
        dropout_rate=options.dropout_rate,
        forward=False)
    passage_aware_representatins.append(match_reps)
    passage_aware_dim += match_dim

    # add question to question
    (match_reps, match_dim) = match_passage_with_question(
        in_question_repres,
        in_question_repres,
        question_mask,
        question_mask,
        question_lengths,
        question_lengths,
        input_dim,
        scope="word_match_question",
        with_full_match=False,
        with_maxpool_match=options.with_maxpool_match,
        with_attentive_match=options.with_attentive_match,
        with_max_attentive_match=options.with_max_attentive_match,
        is_training=is_training,
        options=options,
        dropout_rate=options.dropout_rate,
        forward=False)
    passage_aware_representatins.append(match_reps)
    passage_aware_dim += match_dim

    with tf.variable_scope('context_MP_matching'):
        for i in range(
                options.context_layer_num):  # support multiple context layer
            with tf.variable_scope('layer-{}'.format(i)):
                # contextual lstm for both passage and question
                in_question_repres = tf.multiply(
                    in_question_repres, tf.expand_dims(question_mask, axis=-1))
                in_passage_repres = tf.multiply(
                    in_passage_repres, tf.expand_dims(passage_mask, axis=-1))
                (question_context_representation_fw,
                 question_context_representation_bw,
                 in_question_repres) = layer_utils.my_lstm_layer(
                     in_question_repres,
                     options.context_lstm_dim,
                     input_lengths=question_lengths,
                     scope_name="context_represent",
                     reuse=False,
                     is_training=is_training,
                     dropout_rate=options.dropout_rate,
                     use_cudnn=options.use_cudnn)
                (passage_context_representation_fw,
                 passage_context_representation_bw,
                 in_passage_repres) = layer_utils.my_lstm_layer(
                     in_passage_repres,
                     options.context_lstm_dim,
                     input_lengths=passage_lengths,
                     scope_name="context_represent",
                     reuse=True,
                     is_training=is_training,
                     dropout_rate=options.dropout_rate,
                     use_cudnn=options.use_cudnn)

                # Multi-perspective matching
                with tf.variable_scope('left_MP_matching'):
                    (match_reps, match_dim) = match_passage_with_question(
                        passage_context_representation_fw,
                        question_context_representation_fw,
                        passage_mask,
                        question_mask,
                        passage_lengths,
                        question_lengths,
                        options.context_lstm_dim,
                        scope="ques_forward_match",
                        with_full_match=options.with_full_match,
                        with_maxpool_match=options.with_maxpool_match,
                        with_attentive_match=options.with_attentive_match,
                        with_max_attentive_match=options.
                        with_max_attentive_match,
                        is_training=is_training,
                        options=options,
                        dropout_rate=options.dropout_rate,
                        forward=True)
                    question_aware_representatins.append(match_reps)
                    question_aware_dim += match_dim
                    (match_reps, match_dim) = match_passage_with_question(
                        passage_context_representation_bw,
                        question_context_representation_bw,
                        passage_mask,
                        question_mask,
                        passage_lengths,
                        question_lengths,
                        options.context_lstm_dim,
                        scope="ques_backward_match",
                        with_full_match=options.with_full_match,
                        with_maxpool_match=options.with_maxpool_match,
                        with_attentive_match=options.with_attentive_match,
                        with_max_attentive_match=options.
                        with_max_attentive_match,
                        is_training=is_training,
                        options=options,
                        dropout_rate=options.dropout_rate,
                        forward=False)
                    question_aware_representatins.append(match_reps)
                    question_aware_dim += match_dim
                    # add passage to passage
                    (match_reps, match_dim) = match_passage_with_question(
                        passage_context_representation_fw,
                        passage_context_representation_fw,
                        passage_mask,
                        passage_mask,
                        passage_lengths,
                        passage_lengths,
                        options.context_lstm_dim,
                        scope="pass_self_forward_match",
                        with_full_match=options.with_full_match,
                        with_maxpool_match=options.with_maxpool_match,
                        with_attentive_match=options.with_attentive_match,
                        with_max_attentive_match=options.
                        with_max_attentive_match,
                        is_training=is_training,
                        options=options,
                        dropout_rate=options.dropout_rate,
                        forward=True)

                    question_aware_representatins.append(match_reps)
                    question_aware_dim += match_dim
                    (match_reps, match_dim) = match_passage_with_question(
                        passage_context_representation_bw,
                        passage_context_representation_bw,
                        passage_mask,
                        passage_mask,
                        passage_lengths,
                        passage_lengths,
                        options.context_lstm_dim,
                        scope="pass_self_backward_match",
                        with_full_match=options.with_full_match,
                        with_maxpool_match=options.with_maxpool_match,
                        with_attentive_match=options.with_attentive_match,
                        with_max_attentive_match=options.
                        with_max_attentive_match,
                        is_training=is_training,
                        options=options,
                        dropout_rate=options.dropout_rate,
                        forward=False)
                    question_aware_representatins.append(match_reps)
                    question_aware_dim += match_dim

                with tf.variable_scope('right_MP_matching'):
                    (match_reps, match_dim) = match_passage_with_question(
                        question_context_representation_fw,
                        passage_context_representation_fw,
                        question_mask,
                        passage_mask,
                        question_lengths,
                        passage_lengths,
                        options.context_lstm_dim,
                        scope="pass_forward_match",
                        with_full_match=options.with_full_match,
                        with_maxpool_match=options.with_maxpool_match,
                        with_attentive_match=options.with_attentive_match,
                        with_max_attentive_match=options.
                        with_max_attentive_match,
                        is_training=is_training,
                        options=options,
                        dropout_rate=options.dropout_rate,
                        forward=True)
                    passage_aware_representatins.append(match_reps)
                    passage_aware_dim += match_dim
                    (match_reps, match_dim) = match_passage_with_question(
                        question_context_representation_bw,
                        passage_context_representation_bw,
                        question_mask,
                        passage_mask,
                        question_lengths,
                        passage_lengths,
                        options.context_lstm_dim,
                        scope="pass_backward_match",
                        with_full_match=options.with_full_match,
                        with_maxpool_match=options.with_maxpool_match,
                        with_attentive_match=options.with_attentive_match,
                        with_max_attentive_match=options.
                        with_max_attentive_match,
                        is_training=is_training,
                        options=options,
                        dropout_rate=options.dropout_rate,
                        forward=False)
                    passage_aware_representatins.append(match_reps)
                    passage_aware_dim += match_dim
                    # add question to question
                    (match_reps, match_dim) = match_passage_with_question(
                        question_context_representation_fw,
                        question_context_representation_fw,
                        question_mask,
                        question_mask,
                        question_lengths,
                        question_lengths,
                        options.context_lstm_dim,
                        scope="ques_self_forward_match",
                        with_full_match=options.with_full_match,
                        with_maxpool_match=options.with_maxpool_match,
                        with_attentive_match=options.with_attentive_match,
                        with_max_attentive_match=options.
                        with_max_attentive_match,
                        is_training=is_training,
                        options=options,
                        dropout_rate=options.dropout_rate,
                        forward=True)
                    passage_aware_representatins.append(match_reps)
                    passage_aware_dim += match_dim
                    (match_reps, match_dim) = match_passage_with_question(
                        question_context_representation_bw,
                        question_context_representation_bw,
                        question_mask,
                        question_mask,
                        question_lengths,
                        question_lengths,
                        options.context_lstm_dim,
                        scope="ques_self_backward_match",
                        with_full_match=options.with_full_match,
                        with_maxpool_match=options.with_maxpool_match,
                        with_attentive_match=options.with_attentive_match,
                        with_max_attentive_match=options.
                        with_max_attentive_match,
                        is_training=is_training,
                        options=options,
                        dropout_rate=options.dropout_rate,
                        forward=False)
                    passage_aware_representatins.append(match_reps)
                    passage_aware_dim += match_dim

    question_aware_representatins = tf.concat(
        axis=2, values=question_aware_representatins
    )  # [batch_size, passage_len, question_aware_dim]
    passage_aware_representatins = tf.concat(
        axis=2, values=passage_aware_representatins
    )  # [batch_size, question_len, question_aware_dim]

    if is_training:
        question_aware_representatins = tf.nn.dropout(
            question_aware_representatins, (1 - options.dropout_rate))
        passage_aware_representatins = tf.nn.dropout(
            passage_aware_representatins, (1 - options.dropout_rate))

    # ======Highway layer======
    if options.with_match_highway:
        with tf.variable_scope("left_matching_highway"):
            question_aware_representatins = multi_highway_layer(
                question_aware_representatins, question_aware_dim,
                options.highway_layer_num)
        with tf.variable_scope("right_matching_highway"):
            passage_aware_representatins = multi_highway_layer(
                passage_aware_representatins, passage_aware_dim,
                options.highway_layer_num)

    # ========Aggregation Layer======
    aggregation_representation = []
    aggregation_dim = 0

    qa_aggregation_input = question_aware_representatins
    pa_aggregation_input = passage_aware_representatins
    with tf.variable_scope('aggregation_layer'):
        for i in range(options.aggregation_layer_num
                       ):  # support multiple aggregation layer
            qa_aggregation_input = tf.multiply(
                qa_aggregation_input, tf.expand_dims(passage_mask, axis=-1))
            (fw_rep, bw_rep,
             cur_aggregation_representation) = layer_utils.my_lstm_layer(
                 qa_aggregation_input,
                 options.aggregation_lstm_dim,
                 input_lengths=passage_lengths,
                 scope_name='left_layer-{}'.format(i),
                 reuse=False,
                 is_training=is_training,
                 dropout_rate=options.dropout_rate,
                 use_cudnn=options.use_cudnn)
            fw_rep = layer_utils.collect_final_step_of_lstm(
                fw_rep, passage_lengths - 1)
            bw_rep = bw_rep[:, 0, :]
            aggregation_representation.append(fw_rep)
            aggregation_representation.append(bw_rep)
            aggregation_dim += 2 * options.aggregation_lstm_dim
            qa_aggregation_input = cur_aggregation_representation  # [batch_size, passage_len, 2*aggregation_lstm_dim]

            pa_aggregation_input = tf.multiply(
                pa_aggregation_input, tf.expand_dims(question_mask, axis=-1))
            (fw_rep, bw_rep,
             cur_aggregation_representation) = layer_utils.my_lstm_layer(
                 pa_aggregation_input,
                 options.aggregation_lstm_dim,
                 input_lengths=question_lengths,
                 scope_name='right_layer-{}'.format(i),
                 reuse=False,
                 is_training=is_training,
                 dropout_rate=options.dropout_rate,
                 use_cudnn=options.use_cudnn)
            fw_rep = layer_utils.collect_final_step_of_lstm(
                fw_rep, question_lengths - 1)
            bw_rep = bw_rep[:, 0, :]
            aggregation_representation.append(fw_rep)
            aggregation_representation.append(bw_rep)
            aggregation_dim += 2 * options.aggregation_lstm_dim
            pa_aggregation_input = cur_aggregation_representation  # [batch_size, passage_len, 2*aggregation_lstm_dim]

    aggregation_representation = tf.concat(
        axis=1,
        values=aggregation_representation)  # [batch_size, aggregation_dim]

    # ======Highway layer======
    if options.with_aggregation_highway:
        with tf.variable_scope("aggregation_highway"):
            agg_shape = tf.shape(aggregation_representation)
            batch_size = agg_shape[0]
            aggregation_representation = tf.reshape(
                aggregation_representation, [1, batch_size, aggregation_dim])
            aggregation_representation = multi_highway_layer(
                aggregation_representation, aggregation_dim,
                options.highway_layer_num)
            aggregation_representation = tf.reshape(
                aggregation_representation, [batch_size, aggregation_dim])

    return (aggregation_representation, aggregation_dim)
Exemplo n.º 8
0
    def create_model_graph(self,
                           num_classes,
                           word_vocab=None,
                           char_vocab=None,
                           lemma_vocab=None,
                           is_training=True,
                           global_step=None):
        options = self.options
        # ======word representation layer======
        with tf.variable_scope("Input_Embedding_Layer"):
            if word_vocab is not None:
                word_vec_trainable = True
                cur_device = '/gpu:0'
                if options.fix_word_vec:
                    word_vec_trainable = False
                    cur_device = '/cpu:0'
                with tf.device(cur_device):
                    self.word_embedding = tf.get_variable(
                        "word_embedding",
                        trainable=word_vec_trainable,
                        initializer=tf.constant(word_vocab.word_vecs),
                        dtype=tf.float32)

                    # self.kg_embedding = tf.get_variable("kg", trainable=True, regularizer=regularizer,
                    #                                     initializer=tf.constant(lemma_vocab.word_vecs), dtype=tf.float32)
                    self.kg_embedding = tf.get_variable(
                        "kg",
                        shape=(lemma_vocab.word_vecs.shape[0], options.kg_dim),
                        initializer=initializer,
                        trainable=True,
                        dtype=tf.float32)

            c_emb = tf.nn.embedding_lookup(self.word_embedding,
                                           self.in_passage_words)
            q_emb = tf.nn.embedding_lookup(self.word_embedding,
                                           self.in_question_words)
            c_kg_emb = tf.nn.embedding_lookup(self.kg_embedding,
                                              self.in_passage_words_lemma)
            q_kg_emb = tf.nn.embedding_lookup(self.kg_embedding,
                                              self.in_question_words_lemma)

            if is_training:
                c_emb = tf.nn.dropout(c_emb, 1 - self.dropout)
                q_emb = tf.nn.dropout(q_emb, 1 - self.dropout)
                c_kg_emb = tf.nn.dropout(c_kg_emb, 1 - self.dropout)
                q_kg_emb = tf.nn.dropout(q_kg_emb, 1 - self.dropout)

            input_shape = tf.shape(self.in_question_words)
            batch_size = input_shape[0]
            question_len = input_shape[1]
            input_shape = tf.shape(self.in_passage_words)
            passage_len = input_shape[1]

            if options.with_char and char_vocab is not None:
                input_shape = tf.shape(self.in_question_chars)
                batch_size = input_shape[0]
                q_char_len = input_shape[2]
                input_shape = tf.shape(self.in_passage_chars)
                p_char_len = input_shape[2]
                char_dim = char_vocab.word_dim
                self.char_embedding = tf.get_variable(
                    "char_embedding",
                    initializer=tf.constant(char_vocab.word_vecs),
                    dtype=tf.float32)

                in_question_char_repres = tf.nn.embedding_lookup(
                    self.char_embedding, self.in_question_chars
                )  # [batch_size, question_len, q_char_len, char_dim]
                in_question_char_repres = tf.reshape(
                    in_question_char_repres, shape=[-1, q_char_len, char_dim])
                question_char_lengths = tf.reshape(self.question_char_lengths,
                                                   [-1])
                quesiton_char_mask = tf.sequence_mask(
                    question_char_lengths, q_char_len,
                    dtype=tf.float32)  # [batch_size*question_len, q_char_len]
                in_question_char_repres = tf.multiply(
                    in_question_char_repres,
                    tf.expand_dims(quesiton_char_mask, axis=-1))

                in_passage_char_repres = tf.nn.embedding_lookup(
                    self.char_embedding, self.in_passage_chars
                )  # [batch_size, passage_len, p_char_len, char_dim]
                in_passage_char_repres = tf.reshape(
                    in_passage_char_repres, shape=[-1, p_char_len, char_dim])
                passage_char_lengths = tf.reshape(self.passage_char_lengths,
                                                  [-1])
                passage_char_mask = tf.sequence_mask(
                    passage_char_lengths, p_char_len,
                    dtype=tf.float32)  # [batch_size*passage_len, p_char_len]
                in_passage_char_repres = tf.multiply(
                    in_passage_char_repres,
                    tf.expand_dims(passage_char_mask, axis=-1))

                question_char_outputs = conv(in_question_char_repres,
                                             self.options.char_lstm_dim,
                                             bias=True,
                                             activation=tf.nn.tanh,
                                             kernel_size=5,
                                             name="char_conv",
                                             reuse=False)
                question_char_outputs = tf.reduce_max(question_char_outputs,
                                                      axis=1)
                question_char_outputs = tf.reshape(
                    question_char_outputs,
                    [batch_size, question_len, options.char_lstm_dim])

                passage_char_outputs = conv(in_passage_char_repres,
                                            self.options.char_lstm_dim,
                                            bias=True,
                                            activation=tf.nn.tanh,
                                            kernel_size=5,
                                            name="char_conv",
                                            reuse=True)

                passage_char_outputs = tf.reduce_max(passage_char_outputs,
                                                     axis=1)
                passage_char_outputs = tf.reshape(
                    passage_char_outputs,
                    [batch_size, passage_len, options.char_lstm_dim])

                c_emb = tf.concat([c_emb, passage_char_outputs], axis=2)
                q_emb = tf.concat([q_emb, question_char_outputs], axis=2)

            c_mask = tf.sequence_mask(
                self.passage_lengths, passage_len,
                dtype=tf.float32)  # [batch_size, passage_len]
            q_mask = tf.sequence_mask(
                self.question_lengths, question_len,
                dtype=tf.float32)  # [batch_size, question_len]

        with tf.variable_scope("Embedding_Encoder_Layer"):
            q_emb = tf.multiply(q_emb, tf.expand_dims(q_mask, axis=-1))
            c_emb = tf.multiply(c_emb, tf.expand_dims(c_mask, axis=-1))

            q_kg_emb = tf.multiply(
                q_kg_emb, tf.expand_dims(tf.cast(q_mask, tf.float32), axis=-1))
            c_kg_emb = tf.multiply(
                c_kg_emb, tf.expand_dims(tf.cast(c_mask, tf.float32), axis=-1))

            (q_fw, q_bw, q) = layer_utils.my_lstm_layer(
                q_emb,
                options.context_lstm_dim,
                input_lengths=self.question_lengths,
                scope_name="context_represent",
                reuse=False,
                is_training=is_training,
                dropout_rate=self.dropout,
                use_cudnn=options.use_cudnn)

            (c_fw, c_bw,
             c) = layer_utils.my_lstm_layer(c_emb,
                                            options.context_lstm_dim,
                                            input_lengths=self.passage_lengths,
                                            scope_name="context_represent",
                                            reuse=True,
                                            is_training=is_training,
                                            dropout_rate=self.dropout,
                                            use_cudnn=options.use_cudnn)
            q = tf.multiply(q, tf.expand_dims(q_mask, axis=-1))
            c = tf.multiply(c, tf.expand_dims(c_mask, axis=-1))
            if is_training:
                q = tf.nn.dropout(q, 1 - self.dropout)
                c = tf.nn.dropout(c, 1 - self.dropout)
        with tf.variable_scope('co-att', reuse=tf.AUTO_REUSE):

            s = tf.einsum("abd,acd->abc", c, q)
            # cRq, loss = Complex(c_kg_emb, q_kg_emb, c_mask, q_mask, options.kg_dim, options.relation_dim, loss_type='factorization')
            # cRq, loss, r = Analogy(c_kg_emb, q_kg_emb, c_mask, q_mask, options.scalar_dim,
            #                     options.kg_dim, options.relation_dim, loss_type='factorization')
            # cRq, loss = DisMult(c_kg_emb, q_kg_emb, c_mask, q_mask, options.kg_dim, options.relation_dim, loss_type='factorization')
            cRq, r = Rescal(c_kg_emb, q_kg_emb, c_mask, q_mask, options.kg_dim,
                            options.relation_dim)

            # if is_training:
            v = tf.get_variable("v", [1, 1, 1, options.relation_dim],
                                dtype=tf.float32)
            score = tf.reduce_sum(cRq * v, axis=-1)
            s = s + options.lamda1 * score
            s = mask_relevancy_matrix(s, q_mask, c_mask)
            s_q = tf.nn.softmax(s, dim=1)
            self.v = v

            q2c = tf.einsum("abd,abc->acd", c, s_q)
            q2c_kg = tf.einsum("abd,abc->acd", c_kg_emb, s_q)
            q2c_kg_r = tf.einsum("abcr,abc->acr", cRq, s_q)
            s_c = tf.nn.softmax(s, dim=2)
            c2q = tf.einsum("abd,acb->acd", q, s_c)
            c2q_kg = tf.einsum("abd,acb->acd", q_kg_emb, s_c)
            c2q_kg_r = tf.einsum("abcr,abc->abr", cRq, s_c)

        with tf.variable_scope("Model_Encoder_Layer"):
            passage_inputs = tf.concat(
                [c2q, c, c2q * c, c - c2q, c_kg_emb, c2q_kg, c2q_kg_r], axis=2)
            question_inputs = tf.concat(
                [q2c, q, q2c * q, q - q2c, q_kg_emb, q2c_kg, q2c_kg_r], axis=2)
            passage_inputs = tf.layers.dense(inputs=passage_inputs,
                                             units=2 *
                                             options.context_lstm_dim,
                                             activation=tf.nn.relu,
                                             use_bias=True,
                                             name='pro',
                                             reuse=False)
            question_inputs = tf.layers.dense(inputs=question_inputs,
                                              units=2 *
                                              options.context_lstm_dim,
                                              activation=tf.nn.relu,
                                              use_bias=True,
                                              name='pro',
                                              reuse=True)
            question_inputs = tf.multiply(question_inputs,
                                          tf.expand_dims(q_mask, axis=-1))
            passage_inputs = tf.multiply(passage_inputs,
                                         tf.expand_dims(c_mask, axis=-1))

            (fw_rep, bw_rep,
             cur_aggregation_representation) = layer_utils.my_lstm_layer(
                 question_inputs,
                 options.aggregation_lstm_dim,
                 input_lengths=self.question_lengths,
                 scope_name='aggregate_layer',
                 reuse=False,
                 is_training=is_training,
                 dropout_rate=self.dropout,
                 use_cudnn=options.use_cudnn)

            question_inputs = cur_aggregation_representation
            # question_outputs_vec = tf.concat([fw_rep, bw_rep], axis=1)
            (fw_rep, bw_rep,
             cur_aggregation_representation) = layer_utils.my_lstm_layer(
                 passage_inputs,
                 options.aggregation_lstm_dim,
                 input_lengths=self.passage_lengths,
                 scope_name='aggregate_layer',
                 reuse=True,
                 is_training=is_training,
                 dropout_rate=self.dropout,
                 use_cudnn=options.use_cudnn)

            passage_inputs = cur_aggregation_representation

            question_inputs = tf.multiply(question_inputs,
                                          tf.expand_dims(q_mask, axis=-1))
            passage_inputs = tf.multiply(passage_inputs,
                                         tf.expand_dims(c_mask, axis=-1))

            if is_training:
                question_inputs = tf.nn.dropout(question_inputs,
                                                1 - self.dropout)
                passage_inputs = tf.nn.dropout(passage_inputs,
                                               1 - self.dropout)

            passage_outputs_mean = tf.div(
                tf.reduce_sum(passage_inputs, 1),
                tf.expand_dims(tf.cast(self.passage_lengths, tf.float32), -1))
            question_outputs_mean = tf.div(
                tf.reduce_sum(question_inputs, 1),
                tf.expand_dims(tf.cast(self.question_lengths, tf.float32), -1))
            passage_outputs_max = tf.reduce_max(passage_inputs, axis=1)
            question_outputs_max = tf.reduce_max(question_inputs, axis=1)

            passage_outputs_att = soft_attention_with_kg(passage_inputs,
                                                         c_kg_emb,
                                                         c2q_kg_r,
                                                         c_mask,
                                                         options.att_dim,
                                                         scope="soft_att",
                                                         reuse=False)
            question_outputs_att = soft_attention_with_kg(question_inputs,
                                                          q_kg_emb,
                                                          q2c_kg_r,
                                                          q_mask,
                                                          options.att_dim,
                                                          scope="soft_att",
                                                          reuse=True)

            question_outputs = tf.concat([
                question_outputs_max, question_outputs_mean,
                question_outputs_att
            ],
                                         axis=1)
            passage_outputs = tf.concat([
                passage_outputs_max, passage_outputs_mean, passage_outputs_att
            ],
                                        axis=1)

            match_representation = tf.concat(
                axis=1, values=[question_outputs, passage_outputs])
        # ========Prediction Layer=========
        match_dim = int(match_representation.shape[1])
        w_0 = tf.get_variable("w_0", [match_dim, match_dim / 2],
                              dtype=tf.float32)
        b_0 = tf.get_variable("b_0", [match_dim / 2], dtype=tf.float32)
        w_1 = tf.get_variable("w_1", [match_dim / 2, num_classes],
                              dtype=tf.float32)
        b_1 = tf.get_variable("b_1", [num_classes], dtype=tf.float32)

        if is_training:
            match_representation = tf.nn.dropout(match_representation,
                                                 (1 - self.dropout))
        logits = tf.matmul(match_representation, w_0) + b_0
        logits = tf.nn.relu(logits)
        if is_training: logits = tf.nn.dropout(logits, (1 - self.dropout))
        logits = tf.matmul(logits, w_1) + b_1

        self.prob = tf.nn.softmax(logits)
        self.predictions = tf.argmax(self.prob, 1)
        gold_matrix = tf.one_hot(self.truth, num_classes, dtype=tf.float32)
        correct = tf.nn.in_top_k(logits, self.truth, 1)
        self.eval_correct = tf.reduce_sum(tf.cast(correct, tf.int32))
        if not is_training: return

        if options.loss_type == 'logistic':
            matrix = self.matrix * 2 - 1
            matrix = mask_relevancy_4dmatrix(matrix, q_mask, c_mask)
            score = -1 * tf.log(tf.nn.sigmoid(matrix * cRq))
        else:
            score = self.matrix - cRq
            score = 1 / 2 * score * score

        score = mask_relevancy_4dmatrix(score, q_mask, c_mask)
        KGE_loss = tf.reduce_sum(score, axis=-1)

        self.loss = tf.reduce_sum(
            tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                    labels=gold_matrix))
        self.loss = self.loss + options.lamda2 * tf.reduce_sum(
            tf.layers.flatten(KGE_loss))

        tvars = tf.trainable_variables()
        if self.options.lambda_l2 > 0.0:
            l2_loss = tf.add_n([
                tf.nn.l2_loss(v) for v in tvars if tf.trainable_variables()
                if not 'embedding' in v.name
            ])
            self.loss = self.loss + self.options.lambda_l2 * l2_loss

        if self.options.optimize_type == 'adadelta':
            optimizer = tf.train.AdadeltaOptimizer(
                learning_rate=self.options.learning_rate)
        elif self.options.optimize_type == 'adam':
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.options.learning_rate)
        elif self.options.optimize_type == 'adagard':
            optimizer = tf.train.AdagradOptimizer(
                learning_rate=self.options.learning_rate)

        grads = layer_utils.compute_gradients(self.loss, tvars)
        grads, _ = tf.clip_by_global_norm(grads, self.options.grad_clipper)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars),
                                                  global_step=global_step)
        # self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        if self.options.with_moving_average:
            # Track the moving averages of all trainable variables.
            MOVING_AVERAGE_DECAY = 0.9999  # The decay to use for the moving average.
            variable_averages = tf.train.ExponentialMovingAverage(
                MOVING_AVERAGE_DECAY, global_step)
            variables_averages_op = variable_averages.apply(
                tf.trainable_variables())
            train_ops = [self.train_op, variables_averages_op]
            self.train_op = tf.group(*train_ops)
Exemplo n.º 9
0
def MCAN_match_func(in_question_repres,
                    in_passage_repres,
                    question_lengths,
                    passage_lengths,
                    question_mask,
                    passage_mask,
                    input_dim,
                    is_training,
                    scope="default",
                    options=None):
    question_reps = in_question_repres
    passage_reps = in_passage_repres

    relevancy_matrix = cal_relevancy_matrix(question_reps, passage_reps)
    relevancy_matrix = mask_relevancy_matrix(relevancy_matrix, question_mask,
                                             passage_mask)

    in_passage_repres = tf.concat([
        in_passage_repres,
        tf.reduce_max(relevancy_matrix, axis=2, keep_dims=True)
    ],
                                  axis=-1)
    in_passage_repres = tf.concat([
        in_passage_repres,
        tf.reduce_mean(relevancy_matrix, axis=2, keep_dims=True)
    ],
                                  axis=-1)

    qa_aggregation_input = in_passage_repres
    pa_aggregation_input = in_question_repres
    aggregation_representation = []
    aggregation_dim = 0
    with tf.variable_scope('aggregation_layer'):
        for i in range(options.aggregation_layer_num
                       ):  # support multiple aggregation layer
            if passage_mask != None:
                qa_aggregation_input = tf.multiply(
                    qa_aggregation_input, tf.expand_dims(passage_mask,
                                                         axis=-1))
            (fw_rep, bw_rep,
             cur_aggregation_representation) = layer_utils.my_lstm_layer(
                 qa_aggregation_input,
                 options.aggregation_lstm_dim,
                 input_lengths=passage_lengths,
                 scope_name=scope + '_left_layer-{}'.format(i),
                 reuse=False,
                 is_training=is_training,
                 dropout_rate=options.dropout_rate,
                 use_cudnn=options.use_cudnn)
            fw_rep = layer_utils.collect_final_step_of_lstm(
                fw_rep, passage_lengths - 1)
            bw_rep = bw_rep[:, 0, :]
            aggregation_representation.append(fw_rep)
            aggregation_representation.append(bw_rep)
            aggregation_dim += 2 * options.aggregation_lstm_dim
            qa_aggregation_input = cur_aggregation_representation  # [batch_size, passage_len, 2*aggregation_lstm_dim]
            if question_mask != None:
                pa_aggregation_input = tf.multiply(
                    pa_aggregation_input, tf.expand_dims(question_mask,
                                                         axis=-1))
            (fw_rep, bw_rep,
             cur_aggregation_representation) = layer_utils.my_lstm_layer(
                 pa_aggregation_input,
                 options.aggregation_lstm_dim,
                 input_lengths=question_lengths,
                 scope_name=scope + '_right_layer-{}'.format(i),
                 reuse=False,
                 is_training=is_training,
                 dropout_rate=options.dropout_rate,
                 use_cudnn=options.use_cudnn)
            fw_rep = layer_utils.collect_final_step_of_lstm(
                fw_rep, question_lengths - 1)
            bw_rep = bw_rep[:, 0, :]
            aggregation_representation.append(fw_rep)
            aggregation_representation.append(bw_rep)
            aggregation_dim += 2 * options.aggregation_lstm_dim
            pa_aggregation_input = cur_aggregation_representation  # [batch_size, passage_len, 2*aggregation_lstm_dim]

    aggregation_representation = tf.concat(
        axis=1,
        values=aggregation_representation)  # [batch_size, aggregation_dim]

    # ======Highway layer======
    if options.with_aggregation_highway:
        with tf.variable_scope(scope + "_aggregation_highway"):
            agg_shape = tf.shape(aggregation_representation)
            batch_size = agg_shape[0]
            aggregation_representation = tf.reshape(
                aggregation_representation, [1, batch_size, aggregation_dim])
            aggregation_representation = multi_highway_layer(
                aggregation_representation, aggregation_dim,
                options.highway_layer_num)
            aggregation_representation = tf.reshape(
                aggregation_representation, [batch_size, aggregation_dim])

    return (aggregation_representation, aggregation_dim)
Exemplo n.º 10
0
def FusionNet_match_Amit(feature_dim,
                         feature_each_dim,
                         passage,
                         question,
                         passage_length,
                         question_length,
                         passage_mask,
                         question_mask,
                         onehot_binary=None,
                         options=None,
                         scope_name='FusionNet_Amit_match_layer',
                         is_training=True,
                         dropout_rate=0.2,
                         reuse=False):
    batch_size = tf.shape(passage)[0]
    passage_len = tf.shape(passage)[1]
    question_len = tf.shape(question)[1]
    word_dim, char_dim, POS_dim, NER_dim, cove_dim, lm_dim = feature_each_dim

    with tf.variable_scope(scope_name, reuse=reuse):
        # Fully Aware MultiLevel Fusion (FAMF) Word  Layer
        with tf.variable_scope('famf_word_layer'):
            famf_word_level_dim = word_dim  # assuming famf_word_level_dim=dim-of-glove=300
            p_wordlevel_input = tf.slice(
                passage, [0, 0, 0],
                [batch_size, passage_len, word_dim
                 ])  # only use word embedding for word layer
            q_wordlevel_input = tf.slice(question, [0, 0, 0],
                                         [batch_size, question_len, word_dim])
            alphas = layer_utils.calcuate_attention(
                p_wordlevel_input,
                q_wordlevel_input,
                famf_word_level_dim,
                famf_word_level_dim,
                scope_name="famf_word_layer_attention",
                att_type=options.att_type,
                mask1=passage_mask,
                mask2=question_mask,
                att_dim=250,
                is_training=is_training,
                dropout_rate=dropout_rate)
            # (in_value_1, in_value_2, feature_dim1, feature_dim2, scope_name='att',
            #            att_type='symmetric', att_dim=20, remove_diagnoal=False, mask1=None, mask2=None, is_training=False, dropout_rate=0.2,
            #            cosine_attention_scale=200)
            weighted_by_question_words = tf.matmul(
                alphas,
                layer_utils.dropout_layer(q_wordlevel_input,
                                          dropout_rate,
                                          is_training=is_training))

        # Reading layer
        with tf.variable_scope('reading'):
            q_rep_reading_input = question  # [glove, cove, NER, POS]
            p_rep_reading_input = tf.concat(
                axis=2,
                values=[passage, onehot_binary, weighted_by_question_words
                        ])  # use all embeddings for reading and understanding.
            # [glove, cove, NER, POS, binary,famf_word_attention]

            with tf.variable_scope('reading_layer_1'):
                reading_layer_lstm_dim = 125

                q_rep_reading_1_output = layer_utils.my_lstm_layer(
                    q_rep_reading_input,
                    reading_layer_lstm_dim,
                    scope_name='bilstm_reading_1_q',
                    reuse=False,
                    is_training=is_training,
                    dropout_rate=options.dropout_rate)[2]  # [B, Q, 250 ]

                p_rep_reading_1_output = layer_utils.my_lstm_layer(
                    p_rep_reading_input,
                    reading_layer_lstm_dim,
                    scope_name='bilstm_reading_1_p',
                    reuse=False,
                    is_training=is_training,
                    dropout_rate=options.dropout_rate)[2]  # [B, Q, 250 ]

            with tf.variable_scope('reading_layer_2'):
                q_rep_reading_2_output = layer_utils.my_lstm_layer(
                    q_rep_reading_1_output,
                    reading_layer_lstm_dim,
                    scope_name='bilstm_reading_1_q',
                    reuse=False,
                    is_training=is_training,
                    dropout_rate=options.dropout_rate)[2]  # [B, Q, 250 ]

                p_rep_reading_2_output = layer_utils.my_lstm_layer(
                    p_rep_reading_1_output,
                    reading_layer_lstm_dim,
                    scope_name='bilstm_reading_1_p',
                    reuse=False,
                    is_training=is_training,
                    dropout_rate=options.dropout_rate)[2]  # [B, Q, 250 ]

        # Understanding Layer
        with tf.variable_scope('question_understanding_layer'):
            q_rep_understanding_input = tf.concat(
                axis=2,
                values=(q_rep_reading_1_output, q_rep_reading_2_output))
            U_q = layer_utils.my_lstm_layer(
                q_rep_understanding_input,
                reading_layer_lstm_dim,
                scope_name='bilstm_understanding_q',
                reuse=False,
                is_training=is_training,
                dropout_rate=options.dropout_rate)[2]  # [B, Q, 250 ]

            U_q_dim = reading_layer_lstm_dim * 2

        # FAMF : higher level
        with tf.variable_scope('famf_higher_layer'):
            famf_higher_layer_w_dim1 = 500
            famf_higher_layer_w_dim2 = 250
            famf_q_input = []
            famf_p_input = []

            # famf_p_input.append(in_passage_word_repres)
            famf_p_input.append(p_wordlevel_input)
            famf_higher_layer_w_dim1 += word_dim
            famf_p_input.append(p_rep_reading_1_output)
            famf_p_input.append(p_rep_reading_2_output)

            # famf_q_input.append(in_question_word_repres)
            famf_q_input.append(q_wordlevel_input)
            famf_q_input.append(q_rep_reading_1_output)
            famf_q_input.append(q_rep_reading_2_output)

            cove_dim_begin = word_dim + char_dim + POS_dim + NER_dim
            if cove_dim != 0:
                #cove_dim_begin = word_dim + char_dim + POS_dim + NER_dim
                p_cove_repres = tf.slice(passage, [0, 0, cove_dim_begin],
                                         [batch_size, passage_len, cove_dim])
                q_cove_repres = tf.slice(question, [0, 0, cove_dim_begin],
                                         [batch_size, question_len, cove_dim])
                famf_p_input.append(p_cove_repres)
                famf_q_input.append(q_cove_repres)
                famf_higher_layer_w_dim1 += cove_dim

            if lm_dim != 0:
                lm_dim_begin = cove_dim_begin + cove_dim
                p_lm_repres = tf.slice(passage, [0, 0, lm_dim_begin],
                                       [batch_size, passage_len, lm_dim])
                q_lm_repres = tf.slice(question, [0, 0, lm_dim_begin],
                                       [batch_size, question_len, lm_dim])
                famf_p_input.append(p_lm_repres)
                famf_q_input.append(q_lm_repres)
                famf_higher_layer_w_dim1 += lm_dim

            famf_p_input = tf.concat(axis=2, values=famf_p_input)  # (B, P, D )
            famf_q_input = tf.concat(axis=2, values=famf_q_input)  # (B, Q, D )

            alphas = layer_utils.calcuate_attention(
                famf_p_input,
                famf_q_input,
                famf_higher_layer_w_dim1,
                famf_higher_layer_w_dim1,
                scope_name="famf_high_lowlevel",
                att_type=options.att_type,
                mask1=passage_mask,
                mask2=question_mask,
                att_dim=famf_higher_layer_w_dim2,
                is_training=is_training,
                dropout_rate=dropout_rate)
            h_Cl = tf.matmul(
                alphas,
                layer_utils.dropout_layer(q_rep_reading_1_output,
                                          dropout_rate,
                                          is_training=is_training))

            alphas = layer_utils.calcuate_attention(
                famf_p_input,
                famf_q_input,
                famf_higher_layer_w_dim1,
                famf_higher_layer_w_dim1,
                scope_name="famf_high_highlevel",
                att_type=options.att_type,
                mask1=passage_mask,
                mask2=question_mask,
                att_dim=famf_higher_layer_w_dim2,
                is_training=is_training,
                dropout_rate=dropout_rate)
            h_Ch = tf.matmul(
                alphas,
                layer_utils.dropout_layer(q_rep_reading_2_output,
                                          dropout_rate,
                                          is_training=is_training))

            alphas = layer_utils.calcuate_attention(
                famf_p_input,
                famf_q_input,
                famf_higher_layer_w_dim1,
                famf_higher_layer_w_dim1,
                scope_name="famf_high_understandinglevel",
                att_type=options.att_type,
                mask1=passage_mask,
                mask2=question_mask,
                att_dim=famf_higher_layer_w_dim2,
                is_training=is_training,
                dropout_rate=dropout_rate)
            u_C = tf.matmul(
                alphas,
                layer_utils.dropout_layer(U_q,
                                          dropout_rate,
                                          is_training=is_training))

            with tf.variable_scope('famf_higher_layer_passage_lstm'):
                p_rep_highlayer_input = []
                p_rep_highlayer_input.append(p_rep_reading_1_output)
                p_rep_highlayer_input.append(p_rep_reading_2_output)
                p_rep_highlayer_input.append(h_Cl)
                p_rep_highlayer_input.append(h_Ch)
                p_rep_highlayer_input.append(u_C)
                p_rep_highlayer_input = tf.concat(
                    axis=2,
                    values=p_rep_highlayer_input)  # (B, P, D ) D=(250*5)

                famf_higher_layer_passage_lstm_dim = 125

                V_c = layer_utils.my_lstm_layer(
                    p_rep_highlayer_input,
                    famf_higher_layer_passage_lstm_dim,
                    scope_name='bilstm_higher_layer_p',
                    reuse=False,
                    is_training=is_training,
                    dropout_rate=options.dropout_rate)[2]  # [B, Q, 250 ]

        # FAMF: Self-boosted
        with tf.variable_scope('famf_selfboosted_layer'):
            famf_self_boosted_input = []
            famf_self_boosted_w_dim1 = 250 * 6

            # famf_self_boosted_input.append(in_passage_word_repres)
            famf_self_boosted_input.append(p_wordlevel_input)
            famf_self_boosted_w_dim1 += word_dim
            famf_self_boosted_input.append(p_rep_reading_1_output)
            famf_self_boosted_input.append(p_rep_reading_2_output)
            famf_self_boosted_input.append(h_Cl)
            famf_self_boosted_input.append(h_Ch)
            famf_self_boosted_input.append(u_C)
            famf_self_boosted_input.append(V_c)

            if cove_dim != 0:
                famf_self_boosted_input.append(
                    tf.slice(passage, [0, 0, cove_dim_begin],
                             [batch_size, passage_len, cove_dim]))
                famf_self_boosted_w_dim1 += cove_dim  # 300 + (250 * 6) + 600(if cove) + 300 (if lm)

            # if lm_dim != 0: not used in old codebase
            famf_self_boosted_w_dim2 = 50  # 250 does not fit in memory

            famf_self_boosted_input = tf.concat(
                axis=2, values=famf_self_boosted_input
            )  # (B, P, D ) D=(600 ,300 , 250*6 ) = 2400

            useProjectionLayer = True
            if useProjectionLayer:
                projection_dim = 50
                famf_self_boosted_input_dropout = famf_self_boosted_input
                famf_self_boosted_projection = layer_utils.projection_layer(
                    famf_self_boosted_input_dropout,
                    famf_self_boosted_w_dim1,
                    projection_dim,
                    scope="self-match-projection")
                famf_self_boosted_w_dim1 = projection_dim
                vv_C_input = famf_self_boosted_projection
            else:
                vv_C_input = famf_self_boosted_input

            alphas = layer_utils.calcuate_attention(
                vv_C_input,
                vv_C_input,
                famf_self_boosted_w_dim1,
                famf_self_boosted_w_dim1,
                scope_name="famf_selfboosted_layer_attention",
                att_type=options.att_type,
                mask1=passage_mask,
                mask2=passage_mask,
                att_dim=famf_self_boosted_w_dim2,
                is_training=is_training,
                dropout_rate=dropout_rate)
            vv_C = tf.matmul(
                alphas,
                layer_utils.dropout_layer(V_c,
                                          dropout_rate,
                                          is_training=is_training))

            p_rep_selfboosted_layer_input = tf.concat(
                axis=2, values=(famf_self_boosted_input, vv_C))
            return (p_rep_selfboosted_layer_input, 0)
Exemplo n.º 11
0
def BiMPM_match(feature_dim,
                passage,
                question,
                passage_length,
                question_length,
                passage_mask,
                question_mask,
                onehot_binary=None,
                options=None,
                scope_name='BiMPM_match_layer',
                is_training=True,
                dropout_rate=0.2,
                reuse=False):
    match_results = []
    match_dim = 0
    with tf.variable_scope(scope_name, reuse=reuse):
        # word-level matching
        (word_match_reps, word_match_dim, word_PoQ_reps,
         word_QoP_reps) = onelayer_BiMPM_match(feature_dim,
                                               passage,
                                               question,
                                               passage_mask,
                                               question_mask,
                                               options=options,
                                               scope_name='word_level_BiMPM',
                                               is_training=is_training,
                                               dropout_rate=dropout_rate,
                                               reuse=False)
        match_results.append(word_match_reps)
        match_dim += word_match_dim

        # contextual level matching
        passage_reps = [passage, word_PoQ_reps]
        passage_dim = 2 * feature_dim
        # if onehot_binary is not None:
        #     passage_reps.append(onehot_binary)
        #     passage_dim += 11

        question_reps = [question]
        if options.with_QoP: question_reps.append(word_QoP_reps)

        passage_context = passage
        if onehot_binary is not None:
            passage_context = tf.concat(
                axis=2, values=[passage_context, onehot_binary])
        question_context = question
        for i in xrange(options.context_layer_num):
            cur_passage_reps = tf.concat(axis=2, values=passage_reps)
            cur_question_reps = tf.concat(axis=2, values=question_reps)

            # lstm over passage and question individually
            passage_context = layer_utils.my_lstm_layer(
                passage_context,
                options.context_lstm_dim,
                scope_name="passage_context_lstm_{}".format(i),
                reuse=False,
                is_training=is_training,
                dropout_rate=dropout_rate)[2]
            passage_context = tf.multiply(
                passage_context, tf.expand_dims(passage_mask, axis=-1))
            question_context = layer_utils.my_lstm_layer(
                question_context,
                options.context_lstm_dim,
                scope_name="question_context_lstm_{}".format(i),
                reuse=False,
                is_training=is_training,
                dropout_rate=dropout_rate)[2]
            question_context = tf.multiply(
                question_context, tf.expand_dims(question_mask, axis=-1))

            # matching
            (cur_match_reps, cur_match_dim, cur_PoQ_reps,
             cur_QoP_reps) = onelayer_BiMPM_match(
                 2 * options.context_lstm_dim,
                 passage_context,
                 question_context,
                 passage_mask,
                 question_mask,
                 accum_dim=passage_dim,
                 passage_accum=cur_passage_reps,
                 question_accum=cur_question_reps,
                 options=options,
                 scope_name='context_BiMPM_{}'.format(i),
                 is_training=is_training,
                 dropout_rate=dropout_rate,
                 reuse=False)

            match_results.append(cur_match_reps)
            match_dim += cur_match_dim

            if options.accumulate_match_input:
                passage_reps.append(passage_context)
                passage_reps.append(cur_PoQ_reps)
                # passage_reps.append(cur_match_reps)
                passage_dim += 4 * options.context_lstm_dim
                question_reps.append(question_context)
                if options.with_QoP: question_reps.append(cur_QoP_reps)
            else:
                # passage_reps = [passage_context, cur_PoQ_reps, cur_match_reps]
                passage_reps = [passage_context, cur_PoQ_reps]
                passage_dim = 4 * options.context_lstm_dim
                question_reps = [question_context]
                if options.with_QoP: question_reps.append(cur_QoP_reps)

        match_results = tf.concat(axis=2, values=match_results)
        if options.with_self_match:
            cur_passage_reps = tf.concat(axis=2, values=passage_reps)
            cur_passage_reps_projection = layer_utils.projection_layer(
                cur_passage_reps,
                passage_dim,
                options.self_compress_dim,
                scope="self-match-projection")
            self_atten_scores = layer_utils.calcuate_attention(
                cur_passage_reps_projection,
                cur_passage_reps_projection,
                options.self_compress_dim,
                options.self_compress_dim,
                scope_name="self_boost_att",
                att_type=options.att_type,
                att_dim=options.att_dim,
                remove_diagnoal=True,
                mask1=passage_mask,
                mask2=passage_mask,
                is_training=is_training,
                dropout_rate=dropout_rate)
            self_match_reps = tf.matmul(
                self_atten_scores,
                layer_utils.dropout_layer(match_results,
                                          dropout_rate,
                                          is_training=is_training))
            match_results = tf.concat(axis=2,
                                      values=[match_results, self_match_reps])
            match_dim = 2 * match_dim
    return (match_results, match_dim)
Exemplo n.º 12
0
def FusionNet_match(feature_dim,
                    passage,
                    question,
                    passage_length,
                    question_length,
                    passage_mask,
                    question_mask,
                    onehot_binary=None,
                    options=None,
                    scope_name='FusionNet_match_layer',
                    is_training=True,
                    dropout_rate=0.2,
                    reuse=False):
    # passage_mask = None
    # question_mask = None

    with tf.variable_scope(scope_name, reuse=reuse):
        #======= Fully Aware MultiLevel Fusion (FAMF) Word  Layer
        # word_atten_scores = layer_utils.calcuate_attention \
        word_atten_scores = layer_utils.calcuate_attention(
            passage,
            question,
            feature_dim,
            feature_dim,
            scope_name="FAMF_word",
            att_type=options.att_type,
            att_dim=options.att_dim,
            remove_diagnoal=False,
            mask1=passage_mask,
            mask2=question_mask,
            is_training=is_training,
            dropout_rate=dropout_rate)
        weighted_by_question_words = tf.matmul(
            word_atten_scores,
            layer_utils.dropout_layer(question,
                                      dropout_rate,
                                      is_training=is_training))

        #====== Reading layer
        passage_tmp = [passage, weighted_by_question_words]
        passage_tmp_dim = 2 * feature_dim
        if onehot_binary is not None:
            passage_tmp.append(onehot_binary)
            passage_tmp_dim += 11
        passage_tmp = tf.concat(axis=2, values=passage_tmp)
        passage_context1 = layer_utils.my_lstm_layer(
            passage_tmp,
            options.context_lstm_dim,
            scope_name="passage_context1_lstm",
            reuse=False,
            is_training=is_training,
            dropout_rate=dropout_rate)[2]
        passage_context2 = layer_utils.my_lstm_layer(
            passage_context1,
            options.context_lstm_dim,
            scope_name="passage_context2_lstm",
            reuse=False,
            is_training=is_training,
            dropout_rate=dropout_rate)[2]

        question_context1 = layer_utils.my_lstm_layer(
            question,
            options.context_lstm_dim,
            scope_name="question_context1_lstm",
            reuse=False,
            is_training=is_training,
            dropout_rate=dropout_rate)[2]
        question_context2 = layer_utils.my_lstm_layer(
            question_context1,
            options.context_lstm_dim,
            scope_name="question_context2_lstm",
            reuse=False,
            is_training=is_training,
            dropout_rate=dropout_rate)[2]

        # ==== Understanding Layer
        quesiton_understand_input = tf.concat(axis=2,
                                              values=(question_context1,
                                                      question_context2))
        quesiton_understand_output = layer_utils.my_lstm_layer(
            quesiton_understand_input,
            options.context_lstm_dim,
            scope_name="question_under_lstm",
            reuse=False,
            is_training=is_training,
            dropout_rate=dropout_rate)[2]

        # ==== FAMF : higher level
        famf_passage_input = tf.concat(axis=2,
                                       values=(passage, passage_context1,
                                               passage_context2))
        famf_question_input = tf.concat(axis=2,
                                        values=(question, question_context1,
                                                question_context2))

        passage_in_dim = feature_dim + 4 * options.context_lstm_dim

        lower_level_atten_scores = layer_utils.calcuate_attention(
            famf_passage_input,
            famf_question_input,
            passage_in_dim,
            passage_in_dim,
            scope_name="lower_level_att",
            att_type=options.att_type,
            att_dim=options.att_dim,
            remove_diagnoal=False,
            mask1=passage_mask,
            mask2=question_mask,
            is_training=is_training,
            dropout_rate=dropout_rate)
        high_level_atten_scores = layer_utils.calcuate_attention(
            famf_passage_input,
            famf_question_input,
            passage_in_dim,
            passage_in_dim,
            scope_name="high_level_att",
            att_type=options.att_type,
            att_dim=options.att_dim,
            remove_diagnoal=False,
            mask1=passage_mask,
            mask2=question_mask,
            is_training=is_training,
            dropout_rate=dropout_rate)
        understand_atten_scores = layer_utils.calcuate_attention(
            famf_passage_input,
            famf_question_input,
            passage_in_dim,
            passage_in_dim,
            scope_name="understand_att",
            att_type=options.att_type,
            att_dim=options.att_dim,
            remove_diagnoal=False,
            mask1=passage_mask,
            mask2=question_mask,
            is_training=is_training,
            dropout_rate=dropout_rate)
        h_Cl = tf.matmul(
            lower_level_atten_scores,
            layer_utils.dropout_layer(question_context1,
                                      dropout_rate,
                                      is_training=is_training))
        h_Ch = tf.matmul(
            high_level_atten_scores,
            layer_utils.dropout_layer(question_context2,
                                      dropout_rate,
                                      is_training=is_training))
        u_C = tf.matmul(
            understand_atten_scores,
            layer_utils.dropout_layer(quesiton_understand_output,
                                      dropout_rate,
                                      is_training=is_training))

        # ====famf_higher_layer_passage_lstm
        V_c_input = tf.concat(
            axis=2,
            values=[passage_context1, passage_context2, h_Cl, h_Ch, u_C])
        V_c = layer_utils.my_lstm_layer(
            V_c_input,
            options.context_lstm_dim,
            scope_name="famf_higher_layer_passage_lstm",
            reuse=False,
            is_training=is_training,
            dropout_rate=dropout_rate)[2]

        # VV_c_input = tf.concat(axis=2, values=[passage_tmp, V_c_input, V_c])
        # input_dim = 12*options.context_lstm_dim + passage_tmp_dim
        VV_c_input = tf.concat(axis=2, values=[passage, V_c_input, V_c])
        input_dim = 12 * options.context_lstm_dim + feature_dim
        # ==== FAMF: Self-boosted
        if options.with_self_match:
            VV_c_input_projection = layer_utils.projection_layer(
                VV_c_input,
                input_dim,
                options.self_compress_dim,
                scope="self-boost-projection")
            self_atten_scores = layer_utils.calcuate_attention(
                VV_c_input_projection,
                VV_c_input_projection,
                options.self_compress_dim,
                options.self_compress_dim,
                scope_name="self_boost_att",
                att_type=options.att_type,
                att_dim=options.att_dim,
                remove_diagnoal=options.remove_diagonal,
                mask1=passage_mask,
                mask2=passage_mask,
                is_training=is_training,
                dropout_rate=dropout_rate)
            VV_c = tf.matmul(
                self_atten_scores,
                layer_utils.dropout_layer(V_c,
                                          dropout_rate,
                                          is_training=is_training))
            VV_c_input = tf.concat(axis=2, values=[VV_c_input, VV_c])
            input_dim += 2 * options.context_lstm_dim
        # match_results = layer_utils.my_lstm_layer(VV_c_input, options.context_lstm_dim, scope_name="match_result", reuse=False,
        #                                     is_training=is_training, dropout_rate=dropout_rate)[2]
        # match_dim = 2 * options.context_lstm_dim
    # return (match_results, match_dim)
    return (VV_c_input, input_dim)
Exemplo n.º 13
0
def multi_perspective_match(feature_dim,
                            repres1,
                            repres2,
                            is_training=True,
                            dropout_rate=0.2,
                            options=None,
                            scope_name='mp-match',
                            reuse=False):
    '''
        :param repres1: [batch_size, len, feature_dim]
        :param repres2: [batch_size, len, feature_dim]
        :return:
    '''
    repres1 = layer_utils.dropout_layer(repres1,
                                        dropout_rate,
                                        is_training=is_training)
    repres2 = layer_utils.dropout_layer(repres2,
                                        dropout_rate,
                                        is_training=is_training)
    input_shape = tf.shape(repres1)
    batch_size = input_shape[0]
    seq_length = input_shape[1]
    matching_result = []
    cosine_norm = True
    with tf.variable_scope(scope_name, reuse=reuse):
        match_dim = 0
        if options.with_cosine:
            cosine_value = layer_utils.cosine_distance(repres1,
                                                       repres2,
                                                       cosine_norm=cosine_norm)
            cosine_value = tf.reshape(cosine_value,
                                      [batch_size, seq_length, 1])
            matching_result.append(cosine_value)
            match_dim += 1

        concat_rep = tf.concat(axis=2, values=[repres1, repres2])
        if options.with_nn_match:
            nn_match_W = tf.get_variable(
                "nn_match_W", [2 * feature_dim, options.nn_match_dim],
                dtype=tf.float32)
            nn_match_b = tf.get_variable("nn_match_b", [options.nn_match_dim],
                                         dtype=tf.float32)
            cur_rep = tf.reshape(concat_rep,
                                 [batch_size * seq_length, 2 * feature_dim])
            cur_match_result = tf.tanh(
                tf.matmul(cur_rep, nn_match_W) + nn_match_b)
            cur_match_result = tf.reshape(
                cur_match_result,
                [batch_size, seq_length, options.nn_match_dim])
            matching_result.append(cur_match_result)
            match_dim += options.nn_match_dim

        if options.with_mp_cosine:
            if options.mp_cosine_proj_dim > 0:
                mp_cosine_projection = tf.get_variable(
                    "mp_cosine_projection",
                    [feature_dim, options.mp_cosine_proj_dim],
                    dtype=tf.float32)
                mp_cosine_params = tf.get_variable(
                    "mp_cosine",
                    shape=[
                        1, options.cosine_MP_dim, options.mp_cosine_proj_dim
                    ],
                    dtype=tf.float32)
                repres1_flat = tf.reshape(
                    repres1, [batch_size * seq_length, feature_dim])
                repres2_flat = tf.reshape(
                    repres2, [batch_size * seq_length, feature_dim])
                repres1_flat = tf.tanh(
                    tf.matmul(repres1_flat, mp_cosine_projection))
                repres2_flat = tf.tanh(
                    tf.matmul(repres2_flat, mp_cosine_projection))
                repres1_flat = tf.expand_dims(repres1_flat, axis=1)
                repres2_flat = tf.expand_dims(repres2_flat, axis=1)
                mp_cosine_matching = layer_utils.cosine_distance(
                    tf.multiply(repres1_flat, mp_cosine_params),
                    repres2_flat,
                    cosine_norm=cosine_norm)
                mp_cosine_matching = tf.reshape(
                    mp_cosine_matching,
                    [batch_size, seq_length, options.cosine_MP_dim])
            else:
                mp_cosine_params = tf.get_variable(
                    "mp_cosine",
                    shape=[1, 1, options.cosine_MP_dim, feature_dim],
                    dtype=tf.float32)
                repres1_flat = tf.expand_dims(repres1, axis=2)
                repres2_flat = tf.expand_dims(repres2, axis=2)
                mp_cosine_matching = layer_utils.cosine_distance(
                    tf.multiply(repres1_flat, mp_cosine_params),
                    repres2_flat,
                    cosine_norm=cosine_norm)
            matching_result.append(mp_cosine_matching)
            match_dim += options.cosine_MP_dim

        if options.with_match_lstm:
            (_, _, match_lstm_result) = layer_utils.my_lstm_layer(
                concat_rep,
                options.match_lstm_dim,
                scope_name="match_lstm",
                reuse=False,
                is_training=is_training,
                dropout_rate=dropout_rate)
            matching_result.append(match_lstm_result)
            match_dim += 2 * options.match_lstm_dim
    matching_result = tf.concat(axis=2, values=matching_result)
    return (matching_result, match_dim)
Exemplo n.º 14
0
def MPCM_match(feature_dim,
               passage,
               question,
               passage_length,
               question_length,
               passage_mask,
               question_mask,
               options=None,
               scope_name='MPCM_match_layer',
               is_training=True,
               dropout_rate=0.2,
               reuse=False):
    match_results = []
    match_dim = 0
    with tf.variable_scope(scope_name, reuse=reuse):
        if options.with_word_match:
            (word_match_reps, word_match_dim) = multi_granularity_match(
                feature_dim,
                passage,
                question,
                passage_length,
                question_length,
                passage_mask=passage_mask,
                question_mask=question_mask,
                is_training=is_training,
                dropout_rate=dropout_rate,
                options=options,
                with_full_matching=False,
                with_attentive_matching=True,
                with_max_attentive_matching=True,
                scope_name='word_match',
                reuse=False)
            match_results.append(word_match_reps)
            match_dim += word_match_dim

        if options.with_sequential_match:
            cur_passage_context = None
            cur_question_context = None
            for i in xrange(options.context_layer_num):
                if cur_passage_context is None:
                    cur_passage_context = passage
                    cur_question_context = question
                else:
                    cur_passage_context = tf.concat(
                        axis=2, values=[passage, cur_passage_context])
                    cur_question_context = tf.concat(
                        axis=2, values=[question, cur_question_context])
                (cur_passage_context_fw, cur_passage_context_bw,
                 cur_passage_context) = layer_utils.my_lstm_layer(
                     cur_passage_context,
                     options.context_lstm_dim,
                     scope_name="passage_context_lstm_{}".format(i),
                     reuse=False,
                     is_training=is_training,
                     dropout_rate=dropout_rate)
                cur_passage_context_fw = tf.multiply(
                    cur_passage_context_fw,
                    tf.expand_dims(passage_mask, axis=-1))
                cur_passage_context_bw = tf.multiply(
                    cur_passage_context_bw,
                    tf.expand_dims(passage_mask, axis=-1))
                cur_passage_context = tf.multiply(
                    cur_passage_context, tf.expand_dims(passage_mask, axis=-1))

                (cur_question_context_fw, cur_question_context_bw,
                 cur_question_context) = layer_utils.my_lstm_layer(
                     cur_question_context,
                     options.context_lstm_dim,
                     scope_name="question_context_lstm_{}".format(i),
                     reuse=False,
                     is_training=is_training,
                     dropout_rate=dropout_rate)
                cur_question_context_fw = tf.multiply(
                    cur_question_context_fw,
                    tf.expand_dims(question_mask, axis=-1))
                cur_question_context_bw = tf.multiply(
                    cur_question_context_bw,
                    tf.expand_dims(question_mask, axis=-1))
                cur_question_context = tf.multiply(
                    cur_question_context, tf.expand_dims(question_mask,
                                                         axis=-1))

                if options.with_attentive_match:
                    # forward matching
                    (cur_match_rep, cur_match_dim) = multi_granularity_match(
                        options.context_lstm_dim,
                        cur_passage_context_fw,
                        cur_question_context_fw,
                        passage_length,
                        question_length,
                        passage_mask=passage_mask,
                        question_mask=question_mask,
                        is_training=is_training,
                        dropout_rate=dropout_rate,
                        options=options,
                        with_full_matching=False,
                        with_attentive_matching=True,
                        with_max_attentive_matching=True,
                        scope_name='seq_forward_match_{}'.format(i))
                    match_dim += cur_match_dim
                    match_results.append(cur_match_rep)

                    # backward matching
                    (cur_match_rep, cur_match_dim) = multi_granularity_match(
                        options.context_lstm_dim,
                        cur_passage_context_bw,
                        cur_question_context_bw,
                        passage_length,
                        question_length,
                        passage_mask=passage_mask,
                        question_mask=question_mask,
                        is_training=is_training,
                        dropout_rate=dropout_rate,
                        options=options,
                        with_full_matching=False,
                        with_attentive_matching=True,
                        with_max_attentive_matching=True,
                        scope_name='seq_backward_match_{}'.format(i))
                    match_dim += cur_match_dim
                    match_results.append(cur_match_rep)

                if options.with_full_match:
                    # full matching
                    (cur_match_rep, cur_match_dim) = multi_granularity_match(
                        2 * options.context_lstm_dim,
                        cur_passage_context,
                        cur_question_context,
                        passage_length,
                        question_length,
                        passage_mask=passage_mask,
                        question_mask=question_mask,
                        is_training=is_training,
                        dropout_rate=dropout_rate,
                        options=options,
                        with_full_matching=True,
                        with_attentive_matching=False,
                        with_max_attentive_matching=False,
                        scope_name='seq_full_match_{}'.format(i))
                    match_dim += cur_match_dim
                    match_results.append(cur_match_rep)

                if options.with_word_phrase_match:
                    question_context_proj = layer_utils.projection_layer(
                        cur_question_context,
                        2 * options.context_lstm_dim,
                        feature_dim,
                        activation_func=tf.tanh,
                        scope="question_context_proj_{}".format(i))
                    (cur_match_rep, cur_match_dim) = multi_granularity_match(
                        feature_dim,
                        passage,
                        question_context_proj,
                        passage_length,
                        question_length,
                        passage_mask=passage_mask,
                        question_mask=question_mask,
                        is_training=is_training,
                        dropout_rate=dropout_rate,
                        options=options,
                        with_full_matching=False,
                        with_attentive_matching=True,
                        with_max_attentive_matching=True,
                        scope_name='word_phrase_match_{}'.format(i))

                    match_dim += cur_match_dim
                    match_results.append(cur_match_rep)

                if options.with_phrase_word_match:
                    passage_context_proj = layer_utils.projection_layer(
                        cur_passage_context,
                        2 * options.context_lstm_dim,
                        feature_dim,
                        activation_func=tf.tanh,
                        scope="passage_context_proj_{}".format(i))
                    (cur_match_rep, cur_match_dim) = multi_granularity_match(
                        feature_dim,
                        passage_context_proj,
                        question,
                        passage_length,
                        question_length,
                        passage_mask=passage_mask,
                        question_mask=question_mask,
                        is_training=is_training,
                        dropout_rate=dropout_rate,
                        options=options,
                        with_full_matching=False,
                        with_attentive_matching=True,
                        with_max_attentive_matching=True,
                        scope_name='phrase_word_match_{}'.format(i))
                    match_dim += cur_match_dim
                    match_results.append(cur_match_rep)

    match_results = tf.concat(axis=2, values=match_results)
    return (match_results, match_dim)