Exemplo n.º 1
0
    def add_embeddings(self):
        print('add embeddings')
        if self.embeddings is not None:
            print("load embedding")
            W = tf.Variable(np.array(self.embeddings),
                            name="W",
                            dtype="float32",
                            trainable=self.trainable)

        else:
            print("random embedding")
            W = tf.Variable(tf.random_uniform(
                [self.vocab_size, self.embedding_size], -1.0, 1.0),
                            name="W",
                            trainable=self.trainable)
        self.embedding_W = W

        # self.overlap_W = tf.Variable(a,name="W",trainable = True)
        self.para.append(self.embedding_W)

        self.q_embedding = tf.nn.embedding_lookup(self.embedding_W,
                                                  self.question)

        self.a_embedding = tf.nn.embedding_lookup(self.embedding_W,
                                                  self.answer)
        self.a_neg_embedding = tf.nn.embedding_lookup(self.embedding_W,
                                                      self.answer_negative)
        #real length
        self.q_len, self.q_mask = blocks.length(self.question)
        self.a_len, self.a_mask = blocks.length(self.answer)
        self.a_neg_len, self.a_neg_mask = blocks.length(self.answer_negative)
Exemplo n.º 2
0
    def create_placeholder(self):

        print(('Create placeholders'))
        # he length of the sentence is varied according to the batch,so the None,None
        self.question = tf.placeholder(tf.int32, [None, None],
                                       name='input_question')

        self.answer = tf.placeholder(tf.int32, [None, None],
                                     name='input_answer')
        self.answer_negative = tf.placeholder(tf.int32, [None, None],
                                              name='input_right')

        self.batch_size = tf.shape(self.question)[0]
        self.q_len, self.q_mask = blocks.length(self.question)
        self.a_len, self.a_mask = blocks.length(self.answer)
        self.a_neg_len, self.a_neg_mask = blocks.length(self.answer_negative)
        self.dropout_keep_prob_holder = tf.placeholder(
            tf.float32, name='dropout_keep_prob')
Exemplo n.º 3
0
    def create_placeholder(self):
        print(('Create placeholders'))
        # he length of the sentence is varied according to the batch,so the None,None
        self.question = tf.placeholder(tf.int32, [None, None],
                                       name='input_question')
        self.max_input_left = tf.shape(self.question)[1]

        self.batch_size = tf.shape(self.question)[0]
        self.answer = tf.placeholder(tf.int32, [None, None],
                                     name='input_answer')
        self.max_input_right = tf.shape(self.answer)[1]
        self.answer_negative = tf.placeholder(tf.int32, [None, None],
                                              name='input_right')
        self.pos_position = tf.placeholder(tf.int32, [None, None],
                                           name='pos_position')
        self.neg_position = tf.placeholder(tf.int32, [None, None],
                                           name='neg_position')
        self.q_len, self.q_mask = blocks.length(self.question)
        self.a_len, self.a_mask = blocks.length(self.answer)
        self.a_neg_len, self.a_neg_mask = blocks.length(self.answer_negative)
Exemplo n.º 4
0
    def construct_hex_vec_selfatt(self, inputs, params, phs):
        keep_rate, stop_grad, _ = phs

        premise_x, hypothesis_x = inputs

        with tf.variable_scope("hex_superficial_selfatt", reuse=tf.AUTO_REUSE):

            emb_premise = tf.nn.embedding_lookup(self.embeddings, premise_x)
            emb_premise_drop = tf.nn.dropout(emb_premise, keep_rate)

            emb_hypothesis = tf.nn.embedding_lookup(self.embeddings,
                                                    hypothesis_x)
            emb_hypothesis_drop = tf.nn.dropout(emb_hypothesis, keep_rate)

            prem_seq_lengths, prem_mask = blocks.length(premise_x)
            hyp_seq_lengths, hyp_mask = blocks.length(hypothesis_x)

            prem_self_att = blocks.simple_self_attention_block(
                emb_premise_drop,
                params['dim_emb'],
                prem_seq_lengths,
                prem_mask,
                scope='superficial_prem_self_att')
            hypo_self_att = blocks.simple_self_attention_block(
                emb_hypothesis_drop,
                params['dim_emb'],
                hyp_seq_lengths,
                hyp_mask,
                scope='superficial_hypo_self_att')

            premise_rep = tf.reduce_sum(prem_self_att, 1)
            hypothesis_rep = tf.reduce_sum(hypo_self_att, 1)

            ## Combinations
            h_diff = premise_rep - hypothesis_rep
            h_mul = premise_rep * hypothesis_rep

            ### MLP
            mlp_input = tf.concat([premise_rep, hypothesis_rep, h_diff, h_mul],
                                  1)
        return premise_rep, hypothesis_rep, mlp_input
Exemplo n.º 5
0
    def forward_model(self, inputs, weights, params, phs):

        keep_rate, stop_grad, zero_protect_ph = phs

        premise_x, hypothesis_x = inputs

        ## Function for embedding lookup and dropout at embedding layer
        def emb_drop(x):
            if params['emb_on_cpu']:
                with tf.device('/cpu:0'):
                    emb = tf.nn.embedding_lookup(weights['E'], x)
            else:
                emb = tf.nn.embedding_lookup(weights['E'], x)
            emb_drop = tf.nn.dropout(emb, keep_rate)

            return emb_drop

        # Get lengths of unpadded sentences
        prem_seq_lengths, prem_mask = blocks.length(premise_x)
        hyp_seq_lengths, hyp_mask = blocks.length(hypothesis_x)

        ### BiLSTM layer ###
        premise_in = emb_drop(premise_x)
        hypothesis_in = emb_drop(hypothesis_x)

        results_premise_outs, results_c1 = blocks.biLSTMs(
            premise_in,
            dim=self.dim,
            seq_len=prem_seq_lengths,
            name='shared',
            cell_type=self.cell_type,
            cells=None,
            num_layers=self.num_layers,
            skip_connect=self.skip_connection,
            stop_grad=stop_grad,
            res_connect=self.res_connection,
            dropout_rate=0)
        results_hypothesis_outs, results_c2 = blocks.biLSTMs(
            hypothesis_in,
            dim=self.dim,
            seq_len=hyp_seq_lengths,
            name='shared',
            cell_type=self.cell_type,
            cells=None,
            num_layers=self.num_layers,
            skip_connect=self.skip_connection,
            stop_grad=stop_grad,
            res_connect=self.res_connection,
            dropout_rate=0)
        premise_outs = results_premise_outs[-1]
        hypothesis_outs = results_hypothesis_outs[-1]
        c1 = results_c1[-1]
        c2 = results_c2[-1]

        premise_bi = tf.concat(premise_outs, axis=2)
        hypothesis_bi = tf.concat(hypothesis_outs, axis=2)

        ### Mean pooling
        premise_sum = tf.reduce_sum(premise_bi, 1)
        premise_ave = tf.div(
            premise_sum,
            tf.expand_dims(tf.cast(prem_seq_lengths, tf.float32), -1))

        hypothesis_sum = tf.reduce_sum(hypothesis_bi, 1)
        hypothesis_ave = tf.div(
            hypothesis_sum,
            tf.expand_dims(tf.cast(hyp_seq_lengths, tf.float32), -1))

        ### Mou et al. concat layer ###
        diff = tf.subtract(premise_ave, hypothesis_ave)
        mul = tf.multiply(premise_ave, hypothesis_ave)
        h = tf.concat([premise_ave, hypothesis_ave, diff, mul], 1)

        # MLP layer
        h_mlp = tf.nn.relu(tf.matmul(h, weights['W_mlp']) + weights['b_mlp'])
        # Dropout applied to classifier
        h_drop = tf.nn.dropout(h_mlp, keep_rate)

        # Get prediction
        logits = tf.matmul(h_drop, weights['W_cl']) + weights['b_cl']

        prem_vec, hyp_vec = premise_ave, hypothesis_ave

        return prem_vec, hyp_vec, results_premise_outs, results_hypothesis_outs, logits, prem_seq_lengths, prem_mask, hyp_seq_lengths, hyp_mask, h_drop, premise_in, hypothesis_in, h_mlp