def add_embeddings(self): print('add embeddings') if self.embeddings is not None: print("load embedding") W = tf.Variable(np.array(self.embeddings), name="W", dtype="float32", trainable=self.trainable) else: print("random embedding") W = tf.Variable(tf.random_uniform( [self.vocab_size, self.embedding_size], -1.0, 1.0), name="W", trainable=self.trainable) self.embedding_W = W # self.overlap_W = tf.Variable(a,name="W",trainable = True) self.para.append(self.embedding_W) self.q_embedding = tf.nn.embedding_lookup(self.embedding_W, self.question) self.a_embedding = tf.nn.embedding_lookup(self.embedding_W, self.answer) self.a_neg_embedding = tf.nn.embedding_lookup(self.embedding_W, self.answer_negative) #real length self.q_len, self.q_mask = blocks.length(self.question) self.a_len, self.a_mask = blocks.length(self.answer) self.a_neg_len, self.a_neg_mask = blocks.length(self.answer_negative)
def create_placeholder(self): print(('Create placeholders')) # he length of the sentence is varied according to the batch,so the None,None self.question = tf.placeholder(tf.int32, [None, None], name='input_question') self.answer = tf.placeholder(tf.int32, [None, None], name='input_answer') self.answer_negative = tf.placeholder(tf.int32, [None, None], name='input_right') self.batch_size = tf.shape(self.question)[0] self.q_len, self.q_mask = blocks.length(self.question) self.a_len, self.a_mask = blocks.length(self.answer) self.a_neg_len, self.a_neg_mask = blocks.length(self.answer_negative) self.dropout_keep_prob_holder = tf.placeholder( tf.float32, name='dropout_keep_prob')
def create_placeholder(self): print(('Create placeholders')) # he length of the sentence is varied according to the batch,so the None,None self.question = tf.placeholder(tf.int32, [None, None], name='input_question') self.max_input_left = tf.shape(self.question)[1] self.batch_size = tf.shape(self.question)[0] self.answer = tf.placeholder(tf.int32, [None, None], name='input_answer') self.max_input_right = tf.shape(self.answer)[1] self.answer_negative = tf.placeholder(tf.int32, [None, None], name='input_right') self.pos_position = tf.placeholder(tf.int32, [None, None], name='pos_position') self.neg_position = tf.placeholder(tf.int32, [None, None], name='neg_position') self.q_len, self.q_mask = blocks.length(self.question) self.a_len, self.a_mask = blocks.length(self.answer) self.a_neg_len, self.a_neg_mask = blocks.length(self.answer_negative)
def construct_hex_vec_selfatt(self, inputs, params, phs): keep_rate, stop_grad, _ = phs premise_x, hypothesis_x = inputs with tf.variable_scope("hex_superficial_selfatt", reuse=tf.AUTO_REUSE): emb_premise = tf.nn.embedding_lookup(self.embeddings, premise_x) emb_premise_drop = tf.nn.dropout(emb_premise, keep_rate) emb_hypothesis = tf.nn.embedding_lookup(self.embeddings, hypothesis_x) emb_hypothesis_drop = tf.nn.dropout(emb_hypothesis, keep_rate) prem_seq_lengths, prem_mask = blocks.length(premise_x) hyp_seq_lengths, hyp_mask = blocks.length(hypothesis_x) prem_self_att = blocks.simple_self_attention_block( emb_premise_drop, params['dim_emb'], prem_seq_lengths, prem_mask, scope='superficial_prem_self_att') hypo_self_att = blocks.simple_self_attention_block( emb_hypothesis_drop, params['dim_emb'], hyp_seq_lengths, hyp_mask, scope='superficial_hypo_self_att') premise_rep = tf.reduce_sum(prem_self_att, 1) hypothesis_rep = tf.reduce_sum(hypo_self_att, 1) ## Combinations h_diff = premise_rep - hypothesis_rep h_mul = premise_rep * hypothesis_rep ### MLP mlp_input = tf.concat([premise_rep, hypothesis_rep, h_diff, h_mul], 1) return premise_rep, hypothesis_rep, mlp_input
def forward_model(self, inputs, weights, params, phs): keep_rate, stop_grad, zero_protect_ph = phs premise_x, hypothesis_x = inputs ## Function for embedding lookup and dropout at embedding layer def emb_drop(x): if params['emb_on_cpu']: with tf.device('/cpu:0'): emb = tf.nn.embedding_lookup(weights['E'], x) else: emb = tf.nn.embedding_lookup(weights['E'], x) emb_drop = tf.nn.dropout(emb, keep_rate) return emb_drop # Get lengths of unpadded sentences prem_seq_lengths, prem_mask = blocks.length(premise_x) hyp_seq_lengths, hyp_mask = blocks.length(hypothesis_x) ### BiLSTM layer ### premise_in = emb_drop(premise_x) hypothesis_in = emb_drop(hypothesis_x) results_premise_outs, results_c1 = blocks.biLSTMs( premise_in, dim=self.dim, seq_len=prem_seq_lengths, name='shared', cell_type=self.cell_type, cells=None, num_layers=self.num_layers, skip_connect=self.skip_connection, stop_grad=stop_grad, res_connect=self.res_connection, dropout_rate=0) results_hypothesis_outs, results_c2 = blocks.biLSTMs( hypothesis_in, dim=self.dim, seq_len=hyp_seq_lengths, name='shared', cell_type=self.cell_type, cells=None, num_layers=self.num_layers, skip_connect=self.skip_connection, stop_grad=stop_grad, res_connect=self.res_connection, dropout_rate=0) premise_outs = results_premise_outs[-1] hypothesis_outs = results_hypothesis_outs[-1] c1 = results_c1[-1] c2 = results_c2[-1] premise_bi = tf.concat(premise_outs, axis=2) hypothesis_bi = tf.concat(hypothesis_outs, axis=2) ### Mean pooling premise_sum = tf.reduce_sum(premise_bi, 1) premise_ave = tf.div( premise_sum, tf.expand_dims(tf.cast(prem_seq_lengths, tf.float32), -1)) hypothesis_sum = tf.reduce_sum(hypothesis_bi, 1) hypothesis_ave = tf.div( hypothesis_sum, tf.expand_dims(tf.cast(hyp_seq_lengths, tf.float32), -1)) ### Mou et al. concat layer ### diff = tf.subtract(premise_ave, hypothesis_ave) mul = tf.multiply(premise_ave, hypothesis_ave) h = tf.concat([premise_ave, hypothesis_ave, diff, mul], 1) # MLP layer h_mlp = tf.nn.relu(tf.matmul(h, weights['W_mlp']) + weights['b_mlp']) # Dropout applied to classifier h_drop = tf.nn.dropout(h_mlp, keep_rate) # Get prediction logits = tf.matmul(h_drop, weights['W_cl']) + weights['b_cl'] prem_vec, hyp_vec = premise_ave, hypothesis_ave return prem_vec, hyp_vec, results_premise_outs, results_hypothesis_outs, logits, prem_seq_lengths, prem_mask, hyp_seq_lengths, hyp_mask, h_drop, premise_in, hypothesis_in, h_mlp