def __init__(self, batch_size, num_unroll_steps, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm, l2_reg_lambda=0.0, adjust_weight=False,label_weight=[],is_training=True): # define input variable self.batch_size = batch_size self.embeddings = embeddings self.embedding_size = embedding_size self.adjust_weight = adjust_weight self.label_weight = label_weight self.rnn_size = rnn_size self.num_rnn_layers = num_rnn_layers self.num_unroll_steps = num_unroll_steps self.max_grad_norm = max_grad_norm self.l2_reg_lambda = l2_reg_lambda self.is_training = is_training self.keep_prob = tf.placeholder(tf.float32, name="keep_drop") self.lr = tf.Variable(0.0,trainable=False) self.new_lr = tf.placeholder(tf.float32, shape=[],name="new_learning_rate") self._lr_update = tf.assign(self.lr, self.new_lr) self.ori_input_quests = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) self.cand_input_quests = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) self.neg_input_quests = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) self.test_input_q = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) self.test_input_a = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) #embedding layer with tf.device("/cpu:0"),tf.name_scope("embedding_layer"): W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W") ori_quests =tf.nn.embedding_lookup(W, self.ori_input_quests) cand_quests =tf.nn.embedding_lookup(W, self.cand_input_quests) neg_quests =tf.nn.embedding_lookup(W, self.neg_input_quests) test_q =tf.nn.embedding_lookup(W, self.test_input_q) test_a =tf.nn.embedding_lookup(W, self.test_input_a) #build LSTM network with tf.variable_scope("LSTM_scope", reuse=None): ori_q = LSTM(ori_quests, self.rnn_size, self.batch_size) ori_q_feat = tf.nn.tanh(max_pooling(ori_q)) with tf.variable_scope("LSTM_scope", reuse=True): cand_a = LSTM(cand_quests, self.rnn_size, self.batch_size) neg_a = LSTM(neg_quests, self.rnn_size, self.batch_size) cand_q_feat = tf.nn.tanh(max_pooling(cand_a)) neg_q_feat = tf.nn.tanh(max_pooling(neg_a)) test_q_out = LSTM(test_q, self.rnn_size, self.batch_size) test_q_out = tf.nn.tanh(max_pooling(test_q_out)) test_a_out = LSTM(test_a, self.rnn_size, self.batch_size) test_a_out = tf.nn.tanh(max_pooling(test_a_out)) self.ori_cand = feature2cos_sim(ori_q_feat, cand_q_feat) self.ori_neg = feature2cos_sim(ori_q_feat, neg_q_feat) self.loss, self.acc = cal_loss_and_acc(self.ori_cand, self.ori_neg) self.test_q_a = feature2cos_sim(test_q_out, test_a_out)
def __init__(self, batch_size, num_unroll_steps, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm, attention_matrix_size, loss_ratio, l2_reg_lambda=0.0, adjust_weight=False, label_weight=[], is_training=True, m=0.1): # define input variable self.batch_size = batch_size self.embeddings = embeddings self.embedding_size = embedding_size self.adjust_weight = adjust_weight self.label_weight = label_weight self.rnn_size = rnn_size self.num_rnn_layers = num_rnn_layers self.num_unroll_steps = num_unroll_steps self.max_grad_norm = max_grad_norm self.l2_reg_lambda = l2_reg_lambda self.is_training = is_training self.keep_prob = tf.placeholder(tf.float32, name="keep_drop") self.lr = tf.Variable(0.0, trainable=False) self.new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self.lr, self.new_lr) self.ori_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.cand_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.neg_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.test_input_q = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps], name='test_q') self.test_input_a = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps], name='test_a') self.cat_ids = tf.placeholder(tf.int32, [None, CAT_NUMBER], name='cat_ids') #embedding layer with tf.device("/cpu:0"), tf.name_scope("embedding_layer"): W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W") ori_quests = tf.nn.embedding_lookup(W, self.ori_input_quests) cand_quests = tf.nn.embedding_lookup(W, self.cand_input_quests) neg_quests = tf.nn.embedding_lookup(W, self.neg_input_quests) test_q = tf.nn.embedding_lookup(W, self.test_input_q) test_a = tf.nn.embedding_lookup(W, self.test_input_a) # run lstm without attention with tf.variable_scope("LSTM_scope") as scope: ori_q = biLSTM(ori_quests, self.rnn_size) ori_q_feat = tf.nn.tanh(max_pooling(ori_q)) scope.reuse_variables() cand_a = biLSTM(cand_quests, self.rnn_size) neg_a = biLSTM(neg_quests, self.rnn_size) cand_q_feat = tf.nn.tanh(max_pooling(cand_a)) neg_q_feat = tf.nn.tanh(max_pooling(neg_a)) test_q_out = biLSTM(test_q, self.rnn_size) test_q_out = tf.nn.tanh(max_pooling(test_q_out)) test_a_out = biLSTM(test_a, self.rnn_size) test_a_out = tf.nn.tanh(max_pooling(test_a_out)) # build LSTM network # with tf.variable_scope("LSTM_scope") as scope: # ori_q = biLSTM(ori_quests, self.rnn_size) # #ori_q_feat = tf.nn.tanh(max_pooling(ori_q)) # # scope.reuse_variables() # # cand_a = biLSTM(cand_quests, self.rnn_size) # neg_a = biLSTM(neg_quests, self.rnn_size) # #cand_q_feat = tf.nn.tanh(max_pooling(cand_a)) # #neg_q_feat = tf.nn.tanh(max_pooling(neg_a)) # # test_q_out = biLSTM(test_q, self.rnn_size) # #test_q_out = tf.nn.tanh(max_pooling(test_q_out)) # test_a_out = biLSTM(test_a, self.rnn_size) # #test_a_out = tf.nn.tanh(max_pooling(test_a_out)) # with tf.name_scope("att_weight"): # # attention params # att_W = { # 'Wam': tf.Variable(tf.truncated_normal([2 * self.rnn_size, attention_matrix_size], stddev=0.1)), # 'Wqm': tf.Variable(tf.truncated_normal([2 * self.rnn_size, attention_matrix_size], stddev=0.1)), # 'Wms': tf.Variable(tf.truncated_normal([attention_matrix_size, 1], stddev=0.1)) # } # ori_q_feat, cand_q_feat = get_feature(ori_q, cand_a, att_W) # ori_nq_feat, neg_q_feat = get_feature(ori_q, neg_a, att_W) # test_q_out, test_a_out = get_feature(test_q_out, test_a_out, att_W) # multitasking with tf.name_scope("multitasking"): feature_size = int(ori_q_feat.get_shape()[1]) w = tf.get_variable(name='weights', shape=(feature_size, CAT_NUMBER), initializer=tf.random_normal_initializer()) b = tf.get_variable(name='bias', shape=(1, CAT_NUMBER), initializer=tf.zeros_initializer()) # positive_qa = tf.concat([out_ori,out_cand],1,name="embedding_for_multitask") logits = tf.matmul(ori_q_feat, w) + b entropy = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=self.cat_ids, name='loss') loss_multitask = tf.reduce_mean(entropy) # acc self.ori_cand_score = feature2cos_sim(ori_q_feat, cand_q_feat) self.ori_neg_score = feature2cos_sim(ori_q_feat, neg_q_feat) loss_origin, self.acc = cal_loss_and_acc(self.ori_cand_score, self.ori_neg_score, m) self.loss = loss_origin * (1 - loss_ratio) + loss_multitask * loss_ratio self.test_q_a = feature2cos_sim(test_q_out, test_a_out) #multitasking_acc with tf.name_scope("multi_acc"): self.preds = tf.nn.softmax(logits) self.correct_preds = tf.equal(tf.argmax(self.preds, 1), tf.argmax(self.cat_ids, 1)) self.multi_acc = tf.reduce_sum( tf.cast(self.correct_preds, tf.float32))
def __init__(self, batch_size, quest_len, answer_len, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm,loss_ratio, l2_reg_lambda=0.0, adjust_weight=False,label_weight=[],is_training=True,m=0.1): # define input variable self.batch_size = batch_size self.embeddings = embeddings self.embedding_size = embedding_size self.adjust_weight = adjust_weight self.label_weight = label_weight self.rnn_size = rnn_size self.num_rnn_layers = num_rnn_layers self.quest_len = quest_len self.answer_len = answer_len self.max_grad_norm = max_grad_norm self.l2_reg_lambda = l2_reg_lambda self.is_training = is_training self.keep_prob = tf.placeholder(tf.float32, name="keep_drop") self.lr = tf.Variable(0.0,trainable=False) self.new_lr = tf.placeholder(tf.float32, shape=[],name="new_learning_rate") self._lr_update = tf.assign(self.lr, self.new_lr) self.ori_input_quests = tf.placeholder(tf.int32, shape=[None, self.quest_len], name="ori_quest") self.cand_input_quests = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="cand_quest") self.neg_input_quests = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="neg_quest") self.test_input_q = tf.placeholder(tf.int32, shape=[None, self.quest_len], name="test_input_q") self.test_input_a = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="test_input_a") self.cat_ids = tf.placeholder(tf.int32, [None, CAT_NUMBER], name='cat_ids') #embedding layer with tf.device("/cpu:0"),tf.name_scope("embedding_layer"): W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W") ori_quests =tf.nn.embedding_lookup(W, self.ori_input_quests) cand_quests =tf.nn.embedding_lookup(W, self.cand_input_quests) neg_quests =tf.nn.embedding_lookup(W, self.neg_input_quests) test_quest =tf.nn.embedding_lookup(W, self.test_input_q) test_answer =tf.nn.embedding_lookup(W, self.test_input_a) #ori_quests = tf.nn.dropout(ori_quests, self.keep_prob) #cand_quests = tf.nn.dropout(cand_quests, self.keep_prob) #neg_quests = tf.nn.dropout(neg_quests, self.keep_prob) #build LSTM network with tf.variable_scope("LSTM_scope", reuse=None): ori_q = biLSTM(ori_quests, self.rnn_size) with tf.variable_scope("LSTM_scope", reuse=True): cand_a = biLSTM(cand_quests, self.rnn_size) neg_a = biLSTM(neg_quests, self.rnn_size) test_q = biLSTM(test_quest, self.rnn_size) test_a = biLSTM(test_answer, self.rnn_size) #----------------------------- cal attention ------------------------------- with tf.variable_scope("attention", reuse=None) as scope: U = tf.get_variable("U", [2 * self.rnn_size, 2 * rnn_size], initializer=tf.truncated_normal_initializer(stddev=0.1)) G = tf.matmul(tf.matmul(ori_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), cand_a, adjoint_b=True) delta_q = tf.nn.softmax(tf.reduce_max(G, 2)) delta_a = tf.nn.softmax(tf.reduce_max(G, 1)) neg_G = tf.matmul(tf.matmul(ori_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), neg_a, adjoint_b=True) delta_neg_q = tf.nn.softmax(tf.reduce_max(neg_G, 2)) delta_neg_a = tf.nn.softmax(tf.reduce_max(neg_G, 1)) with tf.variable_scope("attention", reuse=True) as scope: test_G = tf.matmul(tf.matmul(test_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), test_a, adjoint_b=True) delta_test_q = tf.nn.softmax(tf.reduce_max(test_G, 2)) delta_test_a = tf.nn.softmax(tf.reduce_max(test_G, 1)) #-------------------------- recalculate lstm output ------------------------- #ori_q_feat = tf.squeeze(tf.matmul(ori_q, tf.reshape(delta_q, [-1, self.quest_len, 1]), adjoint_a=True)) #cand_q_feat = tf.squeeze(tf.matmul(cand_a, tf.reshape(delta_a, [-1, self.answer_len, 1]), adjoint_a=True)) #neg_ori_q_feat = tf.squeeze(tf.matmul(ori_q, tf.reshape(delta_neg_q, [-1, self.quest_len, 1]), adjoint_a=True)) #neg_q_feat = tf.squeeze(tf.matmul(neg_a, tf.reshape(delta_neg_a, [-1, self.answer_len, 1]), adjoint_a=True)) #test_q_out = tf.squeeze(tf.matmul(test_q, tf.reshape(delta_test_q, [-1, self.quest_len, 1]), adjoint_a=True)) #test_a_out = tf.squeeze(tf.matmul(test_a, tf.reshape(delta_test_a, [-1, self.answer_len, 1]), adjoint_a=True)) ori_q_feat = max_pooling(tf.multiply(ori_q, tf.reshape(delta_q, [-1, self.quest_len, 1]))) cand_q_feat = max_pooling(tf.multiply(cand_a, tf.reshape(delta_a, [-1, self.answer_len, 1]))) neg_ori_q_feat = max_pooling(tf.multiply(ori_q, tf.reshape(delta_neg_q, [-1, self.quest_len, 1]))) neg_q_feat = max_pooling(tf.multiply(neg_a, tf.reshape(delta_neg_a, [-1, self.answer_len, 1]))) test_q_out = max_pooling(tf.multiply(test_q, tf.reshape(delta_test_q, [-1, self.quest_len, 1]))) test_a_out = max_pooling(tf.multiply(test_a, tf.reshape(delta_test_a, [-1, self.answer_len, 1]))) #-------------------------- recalculate lstm output end --------------------- # dropout #self.out_ori = tf.nn.dropout(self.out_ori, self.keep_prob) #self.out_cand = tf.nn.dropout(self.out_cand, self.keep_prob) #self.out_neg = tf.nn.dropout(self.out_neg, self.keep_prob) # multitasking with tf.name_scope("multitasking"): feature_size = int(ori_q_feat.get_shape()[1]) fc1 = tf.layers.dense(ori_q_feat, feature_size * 2, activation=tf.nn.relu, name='fc1') fc2 = tf.layers.dense(fc1, feature_size, activation=tf.nn.relu, name='fc2') logits = tf.layers.dense(fc2, CAT_NUMBER, activation=tf.nn.sigmoid) # feature_size = int(ori_q_feat.get_shape()[1]) # w = tf.get_variable(name='weights', shape=(feature_size, CAT_NUMBER, initializer=tf.random_normal_initializer()) # b = tf.get_variable(name='bias', shape=(1, CAT_NUMBER), initializer=tf.zeros_initializer()) # positive_qa = tf.concat([out_ori,out_cand],1,name="embedding_for_multitask") # logits = tf.matmul(ori_q_feat, w) + b entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.cat_ids, name='loss') loss_multitask = tf.reduce_mean(entropy) # acc self.ori_cand_score = feature2cos_sim(ori_q_feat, cand_q_feat) self.ori_neg_score = feature2cos_sim(ori_q_feat, neg_q_feat) loss_origin, self.acc = cal_loss_and_acc(self.ori_cand_score, self.ori_neg_score, m) self.loss = loss_origin * (1 - loss_ratio) + loss_multitask * loss_ratio self.test_q_a = feature2cos_sim(test_q_out, test_a_out) # multitasking_acc with tf.name_scope("multi_acc"): self.preds = tf.nn.softmax(logits) self.correct_preds = tf.equal(tf.argmax(self.preds, 1), tf.argmax(self.cat_ids, 1)) self.multi_acc = tf.reduce_sum(tf.cast(self.correct_preds, tf.float32))
def __init__(self, batch_size, quest_len, answer_len, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm, l2_reg_lambda=0.0, adjust_weight=False, label_weight=[], is_training=True): # define input variable self.batch_size = batch_size self.embeddings = embeddings self.embedding_size = embedding_size self.adjust_weight = adjust_weight self.label_weight = label_weight self.rnn_size = rnn_size self.num_rnn_layers = num_rnn_layers self.quest_len = quest_len self.answer_len = answer_len self.max_grad_norm = max_grad_norm self.l2_reg_lambda = l2_reg_lambda self.is_training = is_training self.keep_prob = tf.placeholder(tf.float32, name="keep_drop") self.lr = tf.Variable(0.0, trainable=False) self.new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self.lr, self.new_lr) self.ori_input_quests = tf.placeholder(tf.int32, shape=[None, self.quest_len], name="ori_quest") self.cand_input_quests = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="cand_quest") self.neg_input_quests = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="neg_quest") self.test_input_quests = tf.placeholder(tf.int32, shape=[None, self.quest_len], name="test_quest") self.test_input_answer = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="test_cand_quest") #embedding layer with tf.device("/cpu:0"), tf.name_scope("embedding_layer"): W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W") ori_quests = tf.nn.embedding_lookup(W, self.ori_input_quests) cand_quests = tf.nn.embedding_lookup(W, self.cand_input_quests) neg_quests = tf.nn.embedding_lookup(W, self.neg_input_quests) test_quest = tf.nn.embedding_lookup(W, self.test_input_quests) test_answer = tf.nn.embedding_lookup(W, self.test_input_answer) #ori_quests = tf.nn.dropout(ori_quests, self.keep_prob) #cand_quests = tf.nn.dropout(cand_quests, self.keep_prob) #neg_quests = tf.nn.dropout(neg_quests, self.keep_prob) #build LSTM network with tf.variable_scope("LSTM_scope", reuse=None): ori_q = BILSTM(ori_quests, self.rnn_size) with tf.variable_scope("LSTM_scope", reuse=True): cand_a = BILSTM(cand_quests, self.rnn_size) neg_a = BILSTM(neg_quests, self.rnn_size) test_q = BILSTM(test_quest, self.rnn_size) test_a = BILSTM(test_answer, self.rnn_size) #----------------------------- cal attention ------------------------------- with tf.variable_scope("attention", reuse=None) as scope: U = tf.get_variable( "U", [2 * self.rnn_size, 2 * rnn_size], initializer=tf.truncated_normal_initializer(stddev=0.1)) G = tf.nn.tanh( tf.batch_matmul(tf.batch_matmul( ori_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), cand_a, adj_y=True)) delta_q = tf.nn.softmax(tf.reduce_max(G, 2)) delta_a = tf.nn.softmax(tf.reduce_max(G, 1)) neg_G = tf.nn.tanh( tf.batch_matmul(tf.batch_matmul( ori_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), neg_a, adj_y=True)) delta_neg_q = tf.nn.softmax(tf.reduce_max(neg_G, 2)) delta_neg_a = tf.nn.softmax(tf.reduce_max(neg_G, 1)) with tf.variable_scope("attention", reuse=True) as scope: test_G = tf.nn.tanh( tf.batch_matmul(tf.batch_matmul( test_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), test_a, adj_y=True)) delta_test_q = tf.nn.softmax(tf.reduce_max(test_G, 2)) delta_test_a = tf.nn.softmax(tf.reduce_max(test_G, 1)) #-------------------------- recalculate lstm output ------------------------- #ori_q_feat = tf.squeeze(tf.batch_matmul(ori_q, tf.reshape(delta_q, [-1, self.quest_len, 1]), adj_x=True)) #cand_q_feat = tf.squeeze(tf.batch_matmul(cand_a, tf.reshape(delta_a, [-1, self.answer_len, 1]), adj_x=True)) #neg_ori_q_feat = tf.squeeze(tf.batch_matmul(ori_q, tf.reshape(delta_neg_q, [-1, self.quest_len, 1]), adj_x=True)) #neg_q_feat = tf.squeeze(tf.batch_matmul(neg_a, tf.reshape(delta_neg_a, [-1, self.answer_len, 1]), adj_x=True)) #test_q_feat = tf.squeeze(tf.batch_matmul(test_q, tf.reshape(delta_test_q, [-1, self.quest_len, 1]), adj_x=True)) #test_a_feat = tf.squeeze(tf.batch_matmul(test_a, tf.reshape(delta_test_a, [-1, self.answer_len, 1]), adj_x=True)) ori_q_feat = max_pooling( tf.mul(ori_q, tf.reshape(delta_q, [-1, self.quest_len, 1]))) cand_q_feat = max_pooling( tf.mul(cand_a, tf.reshape(delta_a, [-1, self.answer_len, 1]))) neg_ori_q_feat = max_pooling( tf.mul(ori_q, tf.reshape(delta_neg_q, [-1, self.quest_len, 1]))) neg_q_feat = max_pooling( tf.mul(neg_a, tf.reshape(delta_neg_a, [-1, self.answer_len, 1]))) test_q_feat = max_pooling( tf.mul(test_q, tf.reshape(delta_test_q, [-1, self.quest_len, 1]))) test_a_feat = max_pooling( tf.mul(test_a, tf.reshape(delta_test_a, [-1, self.answer_len, 1]))) #-------------------------- recalculate lstm output end --------------------- # dropout #self.out_ori = tf.nn.dropout(self.out_ori, self.keep_prob) #self.out_cand = tf.nn.dropout(self.out_cand, self.keep_prob) #self.out_neg = tf.nn.dropout(self.out_neg, self.keep_prob) # cal cosine simulation self.ori_cand = feature2cos_sim(ori_q_feat, cand_q_feat) self.ori_neg = feature2cos_sim(neg_ori_q_feat, neg_q_feat) self.test_q_a = feature2cos_sim(test_q_feat, test_a_feat) self.loss, self.acc = cal_loss_and_acc(self.ori_cand, self.ori_neg)
def __init__(self, batch_size, num_unroll_steps, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm, attention_matrix_size, loss_ratio, l2_reg_lambda=0.0, adjust_weight=False, label_weight=[], is_training=True, m=0.1): """ LSTM-BASED DEEP LEARNING MODELS FOR NON-FACTOID ANSWER SELECTION """ # define input variable self.batch_size = batch_size self.embeddings = embeddings self.embedding_size = embedding_size self.adjust_weight = adjust_weight self.label_weight = label_weight self.rnn_size = rnn_size self.num_rnn_layers = num_rnn_layers self.num_unroll_steps = num_unroll_steps self.max_grad_norm = max_grad_norm self.l2_reg_lambda = l2_reg_lambda self.is_training = is_training self.keep_prob = tf.placeholder(tf.float32, name="keep_drop") self.lr = tf.Variable(0.0, trainable=False) self.new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self.lr, self.new_lr) self.ori_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.cand_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.neg_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.test_input_q = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) self.test_input_a = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) self.cat_ids = tf.placeholder(tf.int32, [None, CAT_NUMBER], name='cat_ids') #embedding layer with tf.device("/cpu:0"), tf.name_scope("embedding_layer"): W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W") ori_quests = tf.nn.embedding_lookup(W, self.ori_input_quests) cand_quests = tf.nn.embedding_lookup(W, self.cand_input_quests) neg_quests = tf.nn.embedding_lookup(W, self.neg_input_quests) test_q = tf.nn.embedding_lookup(W, self.test_input_q) test_a = tf.nn.embedding_lookup(W, self.test_input_a) #build LSTM network U = tf.Variable(tf.truncated_normal( [2 * self.rnn_size, self.embedding_size], stddev=0.1), name="U") with tf.variable_scope("LSTM_scope", reuse=None): ori_q = biLSTM(ori_quests, self.rnn_size) ori_q_feat = tf.nn.tanh(max_pooling(ori_q)) with tf.variable_scope("LSTM_scope", reuse=True): cand_att_weight = tf.sigmoid( tf.matmul( cand_quests, tf.reshape(tf.expand_dims(tf.matmul(ori_q_feat, U), 1), [-1, self.embedding_size, 1]))) neg_att_weight = tf.sigmoid( tf.matmul( neg_quests, tf.reshape(tf.expand_dims(tf.matmul(ori_q_feat, U), 1), [-1, self.embedding_size, 1]))) cand_a = biLSTM( tf.multiply( cand_quests, tf.tile(cand_att_weight, [1, 1, self.embedding_size])), self.rnn_size) neg_a = biLSTM( tf.multiply( neg_quests, tf.tile(neg_att_weight, [1, 1, self.embedding_size])), self.rnn_size) cand_q_feat = tf.nn.tanh(max_pooling(cand_a)) neg_q_feat = tf.nn.tanh(max_pooling(neg_a)) test_q_out = biLSTM(test_q, self.rnn_size) test_q_out = tf.nn.tanh(max_pooling(test_q_out)) test_att_weight = tf.sigmoid( tf.matmul( test_a, tf.reshape(tf.expand_dims(tf.matmul(test_q_out, U), 1), [-1, self.embedding_size, 1]))) test_a_out = biLSTM( tf.multiply( test_a, tf.tile(test_att_weight, [1, 1, self.embedding_size])), self.rnn_size) test_a_out = tf.nn.tanh(max_pooling(test_a_out)) # multitasking with tf.name_scope("multitasking"): feature_size = int(ori_q_feat.get_shape()[1]) fc1 = tf.layers.dense(ori_q_feat, feature_size * 2, activation=tf.nn.relu, name='fc1') fc2 = tf.layers.dense(fc1, feature_size, activation=tf.nn.relu, name='fc2') logits = tf.layers.dense(fc2, CAT_NUMBER, activation=tf.nn.sigmoid) # feature_size = int(ori_q_feat.get_shape()[1]) # w = tf.get_variable(name='weights', shape=(feature_size, CAT_NUMBER, initializer=tf.random_normal_initializer()) # b = tf.get_variable(name='bias', shape=(1, CAT_NUMBER), initializer=tf.zeros_initializer()) # positive_qa = tf.concat([out_ori,out_cand],1,name="embedding_for_multitask") # logits = tf.matmul(ori_q_feat, w) + b entropy = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=self.cat_ids, name='loss') loss_multitask = tf.reduce_mean(entropy) # acc self.ori_cand_score = feature2cos_sim(ori_q_feat, cand_q_feat) self.ori_neg_score = feature2cos_sim(ori_q_feat, neg_q_feat) loss_origin, self.acc = cal_loss_and_acc(self.ori_cand_score, self.ori_neg_score, m) self.loss = loss_origin * (1 - loss_ratio) + loss_multitask * loss_ratio self.test_q_a = feature2cos_sim(test_q_out, test_a_out) # multitasking_acc with tf.name_scope("multi_acc"): self.preds = tf.nn.softmax(logits) self.correct_preds = tf.equal(tf.argmax(self.preds, 1), tf.argmax(self.cat_ids, 1)) self.multi_acc = tf.reduce_sum( tf.cast(self.correct_preds, tf.float32)) def assign_new_lr(self, session, lr_value): session.run(self._lr_update, feed_dict={self.new_lr: lr_value})
def __init__(self, batch_size, num_unroll_steps, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm, attention_matrix_size, l2_reg_lambda=0.0, adjust_weight=False, label_weight=[], is_training=True): """ LSTM-BASED DEEP LEARNING MODELS FOR NON-FACTOID ANSWER SELECTION """ # define input variable self.batch_size = batch_size self.embeddings = embeddings self.embedding_size = embedding_size self.adjust_weight = adjust_weight self.label_weight = label_weight self.rnn_size = rnn_size self.num_rnn_layers = num_rnn_layers self.num_unroll_steps = num_unroll_steps self.max_grad_norm = max_grad_norm self.l2_reg_lambda = l2_reg_lambda self.is_training = is_training self.keep_prob = tf.placeholder(tf.float32, name="keep_drop") self.lr = tf.Variable(0.0, trainable=False) self.new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self.lr, self.new_lr) self.ori_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.cand_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.neg_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.test_input_q = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) self.test_input_a = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) #embedding layer with tf.device("/cpu:0"), tf.name_scope("embedding_layer"): W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W") ori_quests = tf.nn.embedding_lookup(W, self.ori_input_quests) cand_quests = tf.nn.embedding_lookup(W, self.cand_input_quests) neg_quests = tf.nn.embedding_lookup(W, self.neg_input_quests) test_q = tf.nn.embedding_lookup(W, self.test_input_q) test_a = tf.nn.embedding_lookup(W, self.test_input_a) #build LSTM network U = tf.Variable(tf.truncated_normal( [2 * self.rnn_size, self.embedding_size], stddev=0.1), name="U") with tf.variable_scope("LSTM_scope", reuse=None): ori_q = biLSTM(ori_quests, self.rnn_size) ori_q_feat = tf.nn.tanh(max_pooling(ori_q)) with tf.variable_scope("LSTM_scope", reuse=True): cand_att_weight = tf.sigmoid( tf.batch_matmul( cand_quests, tf.reshape( tf.expand_dims(tf.batch_matmul(ori_q_feat, U), 1), [-1, self.embedding_size, 1]))) neg_att_weight = tf.sigmoid( tf.batch_matmul( neg_quests, tf.reshape( tf.expand_dims(tf.batch_matmul(ori_q_feat, U), 1), [-1, self.embedding_size, 1]))) cand_a = biLSTM( tf.mul(cand_quests, tf.tile(cand_att_weight, [1, 1, self.embedding_size])), self.rnn_size) neg_a = biLSTM( tf.mul(neg_quests, tf.tile(neg_att_weight, [1, 1, self.embedding_size])), self.rnn_size) cand_q_feat = tf.nn.tanh(max_pooling(cand_a)) neg_q_feat = tf.nn.tanh(max_pooling(neg_a)) test_q_out = biLSTM(test_q, self.rnn_size) test_q_out = tf.nn.tanh(max_pooling(test_q_out)) test_att_weight = tf.sigmoid( tf.batch_matmul( test_a, tf.reshape( tf.expand_dims(tf.batch_matmul(test_q_out, U), 1), [-1, self.embedding_size, 1]))) test_a_out = biLSTM( tf.mul(test_a, tf.tile(test_att_weight, [1, 1, self.embedding_size])), self.rnn_size) test_a_out = tf.nn.tanh(max_pooling(test_a_out)) self.ori_cand = feature2cos_sim(ori_q_feat, cand_q_feat) self.ori_neg = feature2cos_sim(ori_q_feat, neg_q_feat) self.loss, self.acc = cal_loss_and_acc(self.ori_cand, self.ori_neg) self.test_q_a = feature2cos_sim(test_q_out, test_a_out)
def __init__(self, batch_size, steps, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm, filter_sizes, num_filters, self_att=True, model_choice=0, mode="qa", rnn_windows=5, l2_reg_lambda=0.0, adjust_weight=False, label_weight=[], is_training=True): self.batch_size = batch_size self.embeddings = embeddings self.embedding_size = embedding_size self.adjust_weight = adjust_weight self.label_weight = label_weight self.rnn_size = rnn_size self.steps = steps self.max_grad_norm = max_grad_norm self.l2_reg_lambda = l2_reg_lambda self.is_training = is_training self.filter_sizes = list(map(int, filter_sizes.split(','))) self.num_filters = num_filters self.mode_choice = model_choice self.rnn_windows = rnn_windows self.att = self_att self.keep_pron = tf.placeholder(tf.float32, name='keep_prob') self.lr = tf.Variable(0.0, trainable=False) self.new_lr = tf.placeholder(tf.float32, shape=[], name='new-learning-rate') self.lr_update = tf.assign(self.lr, self.new_lr) self.ori_input = tf.placeholder(tf.int32, shape=[None, self.steps], name='ori_inputs_quests') self.cand_input = tf.placeholder(tf.int32, shape=[None, self.steps], name='cand_inputs_quests') self.neg_input = tf.placeholder(tf.int32, shape=[None, self.steps], name='neg_inputs_quests') self.test_q = tf.placeholder(tf.int32, shape=[None, self.steps], name='test_q') self.test_a = tf.placeholder(tf.int32, shape=[None, self.steps], name='test_a') self.ori_q_len = tf.count_nonzero(self.ori_input, 1) self.cand_a_len = tf.count_nonzero(self.cand_input, 1) self.neg_a_len = tf.count_nonzero(self.neg_input, 1) self.test_q_len = tf.count_nonzero(self.test_q, 1) self.test_a_len = tf.count_nonzero(self.test_a, 1) #embedding layer with tf.device('/cpu:0'), tf.name_scope('embedding_layer'): W = tf.Variable(tf.to_float(self.embeddings), trainable=Trur, name='W') ori_que = tf.nn.embedding_lookup(W, self.ori_input) cand_que = tf.nn.embedding_lookup(W, self.cand_input) neg_que = tf.nn.embedding_lookup(W, self.neg_input) test_que = tf.nn.embedding_lookup(W, self.test_q) test_ans = tf.nn.embedding_lookup(W, self.test_a) # biLSTM with tf.variable_scope("LSTM_scope1", reuse=None): ori_q = bilstm(ori_que, self.rnn_size) with tf.variable_scope("LSTM_scope1", reuse=True): test_q = bilstm(test_que, self.rnn_size) test_a = bilstm(test_ans, self.rnn_size) if mode == 'qq': cand_a = bilstm(cand_que, self.rnn_size) neg_a = bilstm(neg_que, self.rnn_size) if mode == 'qa': with tf.variable_scope("LSTM_scope2", reuse=None): cand_a = bilstm(cand_que, self.rnn_size) with tf.variable_scope("LSTM_scope2", reuse=True): neg_a = bilstm(neg_que, self.rnn_size) ori_q = mask(ori_q, self.ori_q_len, self.steps) cand_a = mask(cand_a, self.cand_a_len, self.steps) neg_a = mask(neg_a, self.neg_a_len, self.steps) test_q = mask(test_q, self.test_q_len, self.steps) test_a = mask(test_a, self.test_a_len, self.steps) for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): if self.mode_choice == 3: filter_size = [ filter_size, self.embedding_size, 1, self.num_filters ] else: filter_size = [ filter_size, self.rnn_size * 2, 1, self.num_filters ] W = tf.Variable(tf.truncated_normal(filter_size, stddev=0.1), name='Kernel_W') b = tf.Variable(tf.constant(0.1, shape=[self.num_filters]), name='Kernel_b') self.kernel.append((W, b)) in_dim = ori_que.get_shape()[2] with tf.variable_scope('door'): if self.mode_choice == 5 or self.mode_choice == 6: door_w = { 'd_w': tf.get_variable('d_w', [100, 300]), 'd_b': tf.get_variable('d_b', [1, 300]), } else: door_w = { 'd_w': tf.get_variable('d_w', [in_dim, in_dim]), 'd_b': tf.get_variable('d_b', [in_dim, in_dim]), } with tf.variable_scope('change'): if self.mode_choice == 5 or self.mode_choice == 6: change_w = { 'c_w': tf.get_variable('c_w', [300, 300]), 'c_b': tf.get_variable('c_b', [1, 300]), } else: change_w = { 'c_w': tf.get_variable('c_w', [in_dim, in_dim]), 'c_b': tf.get_variable('c_b', [1, in_dim]), } with tf.variable_scope('self_att'): self_att = { 'att_w': tf.get_variable('att_w', [300, 300]), 'att_b': tf.get_variable('att_b', [1, 300]), 'att_u': tf.get_variable('att_u', [300, 1]) } if self.mode_choice == 0: ##biLSTM + mask + highway+cnn_qa ori_q_highway, cand_a_highway = get_rnn2cnn_out( ori_q, cand_a, door_w, change_w, self.steps) _, neg_a_highway = get_rnn2cnn_out(ori_q, neg_a, door_w, change_w, self.steps) test_q_highway, test_a_highway = get_rnn2cnn_out( test_q, test_a, door_w, change_w, self.steps) print(ori_q_highway.shape) ori_q_fea, cand_a_fea, neg_a_fea = cnn_qa( ori_q=ori_q_highway, can_a=cand_a_highway, neg_a=neg_a_highway, seq_len=self.steps, hidden_size=2 * self.rnn_size, filter_sizes=self.filter_sizes, num_filters=self.num_filters) test_q_fea, test_a_fea, _ = cnn_qa(ori_q=test_q_highway, cand_a=test_a_highway, neg_a=neg_a_highway, seq_len=self.steps, hidden_size=2 * self.rnn_size, filter_sizes=self.filter_sizes, num_filters=self.num_filters) if self.mode_choice == 1: ##biLSTM + mask + max_pooling ori_q_fea, cand_a_fea = get_feature_mask(ori_q, cand_a, self.ori_q_len, self.cand_a_len, self.steps) _, neg_a_fea = get_rnn2cnn_out(ori_q, neg_a, self.ori_q_len, self.neg_a_len, self.steps) test_q_fea, test_a_fea = get_feature_mask(test_q, test_a, self.test_q_len, self.test_a_len, self.steps) if self.mode_choice == 2: ##biLSTM+ mask + highway + max_pooling ori_q_highway, cand_a_highway = get_rnn2cnn_out( ori_q, cand_a, door_w, change_w, self.steps) _, neg_a_highway = get_rnn2cnn_out(ori_q, neg_a, door_w, change_w, self.steps) test_q_highway, test_a_highway = get_rnn2cnn_out( test_q, test_a, door_w, change_w, self.steps) print(ori_q_highway.shape) ori_q_fea, cand_a_fea = get_feature(ori_q_highway, cand_a_highway) ori_nq_fea, neg_a_fea = get_feature(ori_q_highway, neg_a_highway) test_q_fea, test_a_fea, _ = get_feature(test_q_highway, test_a_highway) if self.mode_choice == 3: ##embedding + CNN ori_q_fea, cand_a_fea, neg_a_fea = cnn_qa( ori_q=ori_que, can_a=cand_que, neg_a=neg_que, seq_len=self.steps, hidden_size=self.embedding_size, filter_sizes=self.filter_sizes, num_filters=self.num_filters) test_q_fea, test_a_fea, _ = cnn_qa(ori_q=test_q, cand_a=test_a, neg_a=neg_que, seq_len=self.steps, hidden_size=self.embedding_size, filter_sizes=self.filter_sizes, num_filters=self.num_filters) if self.mode_choice == 4: ## biLSTM + mask + highway + highway + maxpooling ori_q_highway, cand_a_highway = get_rnn2cnn_out( ori_q, cand_a, door_w, change_w, self.steps) _, neg_a_highway = get_rnn2cnn_out(ori_q, neg_a, door_w, change_w, self.steps) test_q_highway, test_a_highway = get_rnn2cnn_out( test_q, test_a, door_w, change_w, self.steps) ori_q_2highway, cand_a_2highway = get_rnn2cnn_out( ori_q_highway, cand_a_highway, door_w, change_w, self.steps) _, neg_a_2highway = get_rnn2cnn_out(ori_q_highway, neg_a_highway, door_w, change_w, self.steps) test_q_2highway, test_a_2highway = get_rnn2cnn_out( test_q_highway, test_a_highway, door_w, change_w, self.steps) ori_q_fea, cand_a_fea = get_feature(ori_q_2highway, cand_a_2highway) ori_nq_fea, neg_a_fea = get_feature(ori_q_2highway, neg_a_2highway) test_q_fea, test_a_fea, _ = get_feature(test_q_2highway, test_a_2highway) if self.mode_choice == 5: ## biLSTM + mask + concat + highway + attention ori_q_concat = tf.concat([ori_q, ori_que], 2) cand_a_concat = tf.concat([cand_a, cand_que], 2) neg_a_concat = tf.concat([neg_a, neg_que], 2) test_q_concat = tf.concat([test_q, test_que], 2) test_a_concat = tf.concat([test_a, test_ans], 2) print(ori_q_concat.shape) ori_q_highway, cand_a_highway = get_rnn2cnn_out_hxh( ori_que, ori_q_concat, cand_que, cand_a_concat, door_w, change_w, self.steps) print(ori_q_highway) _, neg_a_highway = get_rnn2cnn_out_hxh(ori_que, ori_q_concat, neg_que, neg_a_concat, door_w, change_w, self.steps) test_q_highway, test_a_highway = get_rnn2cnn_out( test_que, test_q_concat, test_ans, test_a_concat, door_w, change_w, self.steps) if self.att: ori_q_fea = get_feature_att(ori_q_highway, self_att, self.ori_q_len, self.steps) cand_a_fea = get_feature_att(cand_a_concat, self_att, self.cand_a_len, self.steps) neg_a_fea = get_feature_att(neg_a_concat, self_att, self.neg_a_len, self.steps) test_q_fea = get_feature_att(test_q_highway, self_att, self.test_q_len, self.steps) test_a_fea = get_feature_att(test_a_concat, self_att, self.neg_a_len, self.steps) self.ori_q_fea = tf.reshape(ori_q_fea, [-1, 300], name='ori_q_feature') print('ori_q_shape is :', ori_q_fea.shape) else: ori_q_fea, cand_a_fea = get_feature(ori_q_highway, cand_a_highway) _, neg_a_fea = get_feature(ori_q_highway, neg_a_highway) test_q_fea, test_a_fea = get_feature(test_q_highway, test_a_highway) self.ori_q_fea = tf.reshape(ori_q_fea, [-1, 300], name='ori_q_feature') if self.mode_choice == 6: ##model 5 - attention + window + maxpooling ori_q_concat = tf.concat([ori_q, ori_que], 2) cand_a_concat = tf.concat([cand_a, cand_que], 2) neg_a_concat = tf.concat([neg_a, neg_que], 2) test_q_concat = tf.concat([test_q, test_que], 2) test_a_concat = tf.concat([test_a, test_ans], 2) print(ori_q_concat.shape) ori_q_highway, cand_a_highway = get_rnn2cnn_out_hxh( ori_que, ori_q_concat, cand_que, cand_a_concat, door_w, change_w, self.steps) print(ori_q_highway) _, neg_a_highway = get_rnn2cnn_out_hxh(ori_que, ori_q_concat, neg_que, neg_a_concat, door_w, change_w, self.steps) test_q_highway, test_a_highway = get_rnn2cnn_out( test_que, test_q_concat, test_ans, test_a_concat, door_w, change_w, self.steps) ori_q_list, cand_a_list, neg_a_list, test_q_list, test_a_list = [], [], [], [], [] for i in range(self.steps - self.rnn_windows + 1): ori_q_slice = tf.slice(ori_q_highway, [0, i, 0], [-1, self.rnn_windows, 300]) cand_a_slice = tf.slice(cand_a_highway, [0, i, 0], [-1, self.rnn_windows, 300]) neg_a_slice = tf.slice(neg_a_highway, [0, i, 0], [-1, self.rnn_windows, 300]) test_q_slice = tf.slice(test_q_highway, [0, i, 0], [-1, self.rnn_windows, 300]) test_a_slice = tf.slice(test_a_highway, [0, i, 0], [-1, self.rnn_windows, 300]) if i > 0: tf.get_variable_scope().reuse with tf.variable_scope("LSTM_scope3", reuse=None): ori_q_slice_fea = bilstm(ori_q_slice, self.rnn_size) with tf.variabel_scope("LSTM_scope4", reuse=True): cand_a_slice_fea = bilstm((cand_a_slice, self.rnn_size)) neg_a_slice_fea = bilstm(neg_a_slice, self.rnn_size) test_q_slice_fea = bilstm(test_q_slice, self.rnn_size) test_a_slice_fea = bilstm(test_a_slice, self.rnn_size) print(ori_q_slice_fea.shape) ori_q_f, cand_a_f = get_feature(ori_q_slice_fea, cand_a_slice_fea) _, neg_a_f = get_feature(ori_q_slice_fea, neg_a_slice_fea) test_q_f, test_a_f = get_feature(test_q_slice_fea, test_a_slice_fea) print('model 6 ori_q_fea.shape:', ori_q_f.shape) ori_q_list.append(ori_q_f) cand_a_list.append(cand_a_f) neg_a_list.append(neg_a_f) test_q_list.append(test_q_f) test_a_list.append(test_a_f) ori_q_feat = tf.transpose(ori_q_list, perm=[1, 0, 2]) cand_a_feat = tf.transpose(cand_a_list, perm=[1, 0, 2]) neg_a_feat = tf.transpose(neg_a_list, perm=[1, 0, 2]) test_q_feat = tf.transpose(test_q_list, perm=[1, 0, 2]) test_a_feat = tf.transpose(test_a_list, perm=[1, 0, 2]) ori_q_fea, cand_a_fea = get_feature(ori_q_feat, cand_a_feat) _, neg_a_fea = get_feature(ori_q_feat, neg_a_feat) test_q_fea, test_a_fea = get_feature(test_q_feat, test_a_feat) self.ori_q_fea = ori_q_fea self.ori_cand = feature2cos(ori_q_fea, cand_a_feat) self.ori_neg = feature2cos(ori_q_fea, neg_a_fea) self.loss, self.acc = cal_loss_and_acc(self.ori_cand, self.ori_neg) self.test_q_a = feature2cos(test_q_fea, test_a_fea)