Пример #1
0
    def __init__(self, batch_size, num_unroll_steps, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm, l2_reg_lambda=0.0, adjust_weight=False,label_weight=[],is_training=True):
        # define input variable
        self.batch_size = batch_size
        self.embeddings = embeddings
        self.embedding_size = embedding_size
        self.adjust_weight = adjust_weight
        self.label_weight = label_weight
        self.rnn_size = rnn_size
        self.num_rnn_layers = num_rnn_layers
        self.num_unroll_steps = num_unroll_steps
        self.max_grad_norm = max_grad_norm
        self.l2_reg_lambda = l2_reg_lambda
        self.is_training = is_training

        self.keep_prob = tf.placeholder(tf.float32, name="keep_drop")
        
        self.lr = tf.Variable(0.0,trainable=False)
        self.new_lr = tf.placeholder(tf.float32, shape=[],name="new_learning_rate")
        self._lr_update = tf.assign(self.lr, self.new_lr)

        self.ori_input_quests = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps])
        self.cand_input_quests = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps])
        self.neg_input_quests = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps])

        self.test_input_q = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps])
        self.test_input_a = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps])


        #embedding layer
        with tf.device("/cpu:0"),tf.name_scope("embedding_layer"):
            W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W")
            ori_quests =tf.nn.embedding_lookup(W, self.ori_input_quests)
            cand_quests =tf.nn.embedding_lookup(W, self.cand_input_quests)
            neg_quests =tf.nn.embedding_lookup(W, self.neg_input_quests)

            test_q =tf.nn.embedding_lookup(W, self.test_input_q)
            test_a =tf.nn.embedding_lookup(W, self.test_input_a)

        #build LSTM network
        with tf.variable_scope("LSTM_scope", reuse=None):
            ori_q = LSTM(ori_quests, self.rnn_size, self.batch_size)
            ori_q_feat = tf.nn.tanh(max_pooling(ori_q))
        with tf.variable_scope("LSTM_scope", reuse=True):
            cand_a = LSTM(cand_quests, self.rnn_size, self.batch_size)
            neg_a = LSTM(neg_quests, self.rnn_size, self.batch_size)
            cand_q_feat = tf.nn.tanh(max_pooling(cand_a))
            neg_q_feat = tf.nn.tanh(max_pooling(neg_a))

            test_q_out = LSTM(test_q, self.rnn_size, self.batch_size)
            test_q_out = tf.nn.tanh(max_pooling(test_q_out))
            test_a_out = LSTM(test_a, self.rnn_size, self.batch_size)
            test_a_out = tf.nn.tanh(max_pooling(test_a_out))

        self.ori_cand = feature2cos_sim(ori_q_feat, cand_q_feat)
        self.ori_neg = feature2cos_sim(ori_q_feat, neg_q_feat)
        self.loss, self.acc = cal_loss_and_acc(self.ori_cand, self.ori_neg)

        self.test_q_a = feature2cos_sim(test_q_out, test_a_out)
    def __init__(self,
                 batch_size,
                 num_unroll_steps,
                 embeddings,
                 embedding_size,
                 rnn_size,
                 num_rnn_layers,
                 max_grad_norm,
                 attention_matrix_size,
                 loss_ratio,
                 l2_reg_lambda=0.0,
                 adjust_weight=False,
                 label_weight=[],
                 is_training=True,
                 m=0.1):
        # define input variable
        self.batch_size = batch_size
        self.embeddings = embeddings
        self.embedding_size = embedding_size
        self.adjust_weight = adjust_weight
        self.label_weight = label_weight
        self.rnn_size = rnn_size
        self.num_rnn_layers = num_rnn_layers
        self.num_unroll_steps = num_unroll_steps
        self.max_grad_norm = max_grad_norm
        self.l2_reg_lambda = l2_reg_lambda
        self.is_training = is_training

        self.keep_prob = tf.placeholder(tf.float32, name="keep_drop")

        self.lr = tf.Variable(0.0, trainable=False)
        self.new_lr = tf.placeholder(tf.float32,
                                     shape=[],
                                     name="new_learning_rate")
        self._lr_update = tf.assign(self.lr, self.new_lr)

        self.ori_input_quests = tf.placeholder(
            tf.int32, shape=[None, self.num_unroll_steps])
        self.cand_input_quests = tf.placeholder(
            tf.int32, shape=[None, self.num_unroll_steps])
        self.neg_input_quests = tf.placeholder(
            tf.int32, shape=[None, self.num_unroll_steps])

        self.test_input_q = tf.placeholder(tf.int32,
                                           shape=[None, self.num_unroll_steps],
                                           name='test_q')
        self.test_input_a = tf.placeholder(tf.int32,
                                           shape=[None, self.num_unroll_steps],
                                           name='test_a')
        self.cat_ids = tf.placeholder(tf.int32, [None, CAT_NUMBER],
                                      name='cat_ids')

        #embedding layer
        with tf.device("/cpu:0"), tf.name_scope("embedding_layer"):
            W = tf.Variable(tf.to_float(self.embeddings),
                            trainable=True,
                            name="W")
            ori_quests = tf.nn.embedding_lookup(W, self.ori_input_quests)
            cand_quests = tf.nn.embedding_lookup(W, self.cand_input_quests)
            neg_quests = tf.nn.embedding_lookup(W, self.neg_input_quests)

            test_q = tf.nn.embedding_lookup(W, self.test_input_q)
            test_a = tf.nn.embedding_lookup(W, self.test_input_a)

        # run lstm without attention
        with tf.variable_scope("LSTM_scope") as scope:
            ori_q = biLSTM(ori_quests, self.rnn_size)
            ori_q_feat = tf.nn.tanh(max_pooling(ori_q))

            scope.reuse_variables()

            cand_a = biLSTM(cand_quests, self.rnn_size)
            neg_a = biLSTM(neg_quests, self.rnn_size)
            cand_q_feat = tf.nn.tanh(max_pooling(cand_a))
            neg_q_feat = tf.nn.tanh(max_pooling(neg_a))

            test_q_out = biLSTM(test_q, self.rnn_size)
            test_q_out = tf.nn.tanh(max_pooling(test_q_out))
            test_a_out = biLSTM(test_a, self.rnn_size)
            test_a_out = tf.nn.tanh(max_pooling(test_a_out))

        # build LSTM network
        # with tf.variable_scope("LSTM_scope") as scope:
        #     ori_q = biLSTM(ori_quests, self.rnn_size)
        #     #ori_q_feat = tf.nn.tanh(max_pooling(ori_q))
        #
        #     scope.reuse_variables()
        #
        #     cand_a = biLSTM(cand_quests, self.rnn_size)
        #     neg_a = biLSTM(neg_quests, self.rnn_size)
        #     #cand_q_feat = tf.nn.tanh(max_pooling(cand_a))
        #     #neg_q_feat = tf.nn.tanh(max_pooling(neg_a))
        #
        #     test_q_out = biLSTM(test_q, self.rnn_size)
        #     #test_q_out = tf.nn.tanh(max_pooling(test_q_out))
        #     test_a_out = biLSTM(test_a, self.rnn_size)
        #     #test_a_out = tf.nn.tanh(max_pooling(test_a_out))

        # with tf.name_scope("att_weight"):
        #     # attention params
        #     att_W = {
        #     	'Wam': tf.Variable(tf.truncated_normal([2 * self.rnn_size, attention_matrix_size], stddev=0.1)),
        #     	'Wqm': tf.Variable(tf.truncated_normal([2 * self.rnn_size, attention_matrix_size], stddev=0.1)),
        #     	'Wms': tf.Variable(tf.truncated_normal([attention_matrix_size, 1], stddev=0.1))
        #     }
        #     ori_q_feat, cand_q_feat = get_feature(ori_q, cand_a, att_W)
        #     ori_nq_feat, neg_q_feat = get_feature(ori_q, neg_a, att_W)
        #     test_q_out, test_a_out = get_feature(test_q_out, test_a_out, att_W)

        # multitasking
        with tf.name_scope("multitasking"):

            feature_size = int(ori_q_feat.get_shape()[1])

            w = tf.get_variable(name='weights',
                                shape=(feature_size, CAT_NUMBER),
                                initializer=tf.random_normal_initializer())
            b = tf.get_variable(name='bias',
                                shape=(1, CAT_NUMBER),
                                initializer=tf.zeros_initializer())

            # positive_qa = tf.concat([out_ori,out_cand],1,name="embedding_for_multitask")

            logits = tf.matmul(ori_q_feat, w) + b

            entropy = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits, labels=self.cat_ids, name='loss')
            loss_multitask = tf.reduce_mean(entropy)

        # acc
        self.ori_cand_score = feature2cos_sim(ori_q_feat, cand_q_feat)
        self.ori_neg_score = feature2cos_sim(ori_q_feat, neg_q_feat)
        loss_origin, self.acc = cal_loss_and_acc(self.ori_cand_score,
                                                 self.ori_neg_score, m)

        self.loss = loss_origin * (1 -
                                   loss_ratio) + loss_multitask * loss_ratio

        self.test_q_a = feature2cos_sim(test_q_out, test_a_out)

        #multitasking_acc
        with tf.name_scope("multi_acc"):
            self.preds = tf.nn.softmax(logits)
            self.correct_preds = tf.equal(tf.argmax(self.preds, 1),
                                          tf.argmax(self.cat_ids, 1))
            self.multi_acc = tf.reduce_sum(
                tf.cast(self.correct_preds, tf.float32))
Пример #3
0
    def __init__(self, batch_size, quest_len, answer_len, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm,loss_ratio, l2_reg_lambda=0.0, adjust_weight=False,label_weight=[],is_training=True,m=0.1):
        # define input variable
        self.batch_size = batch_size
        self.embeddings = embeddings
        self.embedding_size = embedding_size
        self.adjust_weight = adjust_weight
        self.label_weight = label_weight
        self.rnn_size = rnn_size
        self.num_rnn_layers = num_rnn_layers
        self.quest_len = quest_len 
        self.answer_len = answer_len 
        self.max_grad_norm = max_grad_norm
        self.l2_reg_lambda = l2_reg_lambda
        self.is_training = is_training

        self.keep_prob = tf.placeholder(tf.float32, name="keep_drop")
        
        self.lr = tf.Variable(0.0,trainable=False)
        self.new_lr = tf.placeholder(tf.float32, shape=[],name="new_learning_rate")
        self._lr_update = tf.assign(self.lr, self.new_lr)

        self.ori_input_quests = tf.placeholder(tf.int32, shape=[None, self.quest_len], name="ori_quest")
        self.cand_input_quests = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="cand_quest")
        self.neg_input_quests = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="neg_quest")
        self.test_input_q = tf.placeholder(tf.int32, shape=[None, self.quest_len], name="test_input_q")
        self.test_input_a = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="test_input_a")
        self.cat_ids = tf.placeholder(tf.int32, [None, CAT_NUMBER], name='cat_ids')

        #embedding layer
        with tf.device("/cpu:0"),tf.name_scope("embedding_layer"):
            W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W")
            ori_quests =tf.nn.embedding_lookup(W, self.ori_input_quests)
            cand_quests =tf.nn.embedding_lookup(W, self.cand_input_quests)
            neg_quests =tf.nn.embedding_lookup(W, self.neg_input_quests)
            test_quest =tf.nn.embedding_lookup(W, self.test_input_q)
            test_answer =tf.nn.embedding_lookup(W, self.test_input_a)

        #ori_quests = tf.nn.dropout(ori_quests, self.keep_prob)
        #cand_quests = tf.nn.dropout(cand_quests, self.keep_prob)
        #neg_quests = tf.nn.dropout(neg_quests, self.keep_prob)


        #build LSTM network
        with tf.variable_scope("LSTM_scope", reuse=None):
            ori_q = biLSTM(ori_quests, self.rnn_size)
        with tf.variable_scope("LSTM_scope", reuse=True):
            cand_a = biLSTM(cand_quests, self.rnn_size)
            neg_a = biLSTM(neg_quests, self.rnn_size)
            test_q = biLSTM(test_quest, self.rnn_size)
            test_a = biLSTM(test_answer, self.rnn_size)

        #----------------------------- cal attention -------------------------------
        with tf.variable_scope("attention", reuse=None) as scope:
            U = tf.get_variable("U", [2 * self.rnn_size, 2 * rnn_size], initializer=tf.truncated_normal_initializer(stddev=0.1))
            G = tf.matmul(tf.matmul(ori_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), cand_a, adjoint_b=True)
            delta_q = tf.nn.softmax(tf.reduce_max(G, 2))
            delta_a = tf.nn.softmax(tf.reduce_max(G, 1))
            neg_G = tf.matmul(tf.matmul(ori_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), neg_a, adjoint_b=True)
            delta_neg_q = tf.nn.softmax(tf.reduce_max(neg_G, 2))
            delta_neg_a = tf.nn.softmax(tf.reduce_max(neg_G, 1))
        with tf.variable_scope("attention", reuse=True) as scope:
            test_G = tf.matmul(tf.matmul(test_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), test_a, adjoint_b=True)
            delta_test_q = tf.nn.softmax(tf.reduce_max(test_G, 2))
            delta_test_a = tf.nn.softmax(tf.reduce_max(test_G, 1))

        #-------------------------- recalculate lstm output -------------------------
        #ori_q_feat = tf.squeeze(tf.matmul(ori_q, tf.reshape(delta_q, [-1, self.quest_len, 1]), adjoint_a=True))
        #cand_q_feat = tf.squeeze(tf.matmul(cand_a, tf.reshape(delta_a, [-1, self.answer_len, 1]), adjoint_a=True))
        #neg_ori_q_feat = tf.squeeze(tf.matmul(ori_q, tf.reshape(delta_neg_q, [-1, self.quest_len, 1]), adjoint_a=True))
        #neg_q_feat = tf.squeeze(tf.matmul(neg_a, tf.reshape(delta_neg_a, [-1, self.answer_len, 1]), adjoint_a=True))
        #test_q_out = tf.squeeze(tf.matmul(test_q, tf.reshape(delta_test_q, [-1, self.quest_len, 1]), adjoint_a=True))
        #test_a_out = tf.squeeze(tf.matmul(test_a, tf.reshape(delta_test_a, [-1, self.answer_len, 1]), adjoint_a=True))
        ori_q_feat = max_pooling(tf.multiply(ori_q, tf.reshape(delta_q, [-1, self.quest_len, 1])))
        cand_q_feat = max_pooling(tf.multiply(cand_a, tf.reshape(delta_a, [-1, self.answer_len, 1])))
        neg_ori_q_feat = max_pooling(tf.multiply(ori_q, tf.reshape(delta_neg_q, [-1, self.quest_len, 1])))
        neg_q_feat = max_pooling(tf.multiply(neg_a, tf.reshape(delta_neg_a, [-1, self.answer_len, 1])))
        test_q_out = max_pooling(tf.multiply(test_q, tf.reshape(delta_test_q, [-1, self.quest_len, 1])))
        test_a_out = max_pooling(tf.multiply(test_a, tf.reshape(delta_test_a, [-1, self.answer_len, 1])))

        #-------------------------- recalculate lstm output end ---------------------
        # dropout
        #self.out_ori = tf.nn.dropout(self.out_ori, self.keep_prob)
        #self.out_cand = tf.nn.dropout(self.out_cand, self.keep_prob)
        #self.out_neg = tf.nn.dropout(self.out_neg, self.keep_prob)

        # multitasking
        with tf.name_scope("multitasking"):
            feature_size = int(ori_q_feat.get_shape()[1])

            fc1 = tf.layers.dense(ori_q_feat, feature_size * 2, activation=tf.nn.relu, name='fc1')
            fc2 = tf.layers.dense(fc1, feature_size, activation=tf.nn.relu, name='fc2')
            logits = tf.layers.dense(fc2, CAT_NUMBER, activation=tf.nn.sigmoid)

            # feature_size = int(ori_q_feat.get_shape()[1])

            # w = tf.get_variable(name='weights', shape=(feature_size, CAT_NUMBER, initializer=tf.random_normal_initializer())
            # b = tf.get_variable(name='bias', shape=(1, CAT_NUMBER), initializer=tf.zeros_initializer())

            # positive_qa = tf.concat([out_ori,out_cand],1,name="embedding_for_multitask")

            # logits = tf.matmul(ori_q_feat, w) + b

            entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.cat_ids, name='loss')
            loss_multitask = tf.reduce_mean(entropy)

        # acc
        self.ori_cand_score = feature2cos_sim(ori_q_feat, cand_q_feat)
        self.ori_neg_score = feature2cos_sim(ori_q_feat, neg_q_feat)
        loss_origin, self.acc = cal_loss_and_acc(self.ori_cand_score, self.ori_neg_score, m)

        self.loss = loss_origin * (1 - loss_ratio) + loss_multitask * loss_ratio

        self.test_q_a = feature2cos_sim(test_q_out, test_a_out)

        # multitasking_acc
        with tf.name_scope("multi_acc"):
            self.preds = tf.nn.softmax(logits)
            self.correct_preds = tf.equal(tf.argmax(self.preds, 1), tf.argmax(self.cat_ids, 1))
            self.multi_acc = tf.reduce_sum(tf.cast(self.correct_preds, tf.float32))
Пример #4
0
    def __init__(self,
                 batch_size,
                 quest_len,
                 answer_len,
                 embeddings,
                 embedding_size,
                 rnn_size,
                 num_rnn_layers,
                 max_grad_norm,
                 l2_reg_lambda=0.0,
                 adjust_weight=False,
                 label_weight=[],
                 is_training=True):
        # define input variable
        self.batch_size = batch_size
        self.embeddings = embeddings
        self.embedding_size = embedding_size
        self.adjust_weight = adjust_weight
        self.label_weight = label_weight
        self.rnn_size = rnn_size
        self.num_rnn_layers = num_rnn_layers
        self.quest_len = quest_len
        self.answer_len = answer_len
        self.max_grad_norm = max_grad_norm
        self.l2_reg_lambda = l2_reg_lambda
        self.is_training = is_training

        self.keep_prob = tf.placeholder(tf.float32, name="keep_drop")

        self.lr = tf.Variable(0.0, trainable=False)
        self.new_lr = tf.placeholder(tf.float32,
                                     shape=[],
                                     name="new_learning_rate")
        self._lr_update = tf.assign(self.lr, self.new_lr)

        self.ori_input_quests = tf.placeholder(tf.int32,
                                               shape=[None, self.quest_len],
                                               name="ori_quest")
        self.cand_input_quests = tf.placeholder(tf.int32,
                                                shape=[None, self.answer_len],
                                                name="cand_quest")
        self.neg_input_quests = tf.placeholder(tf.int32,
                                               shape=[None, self.answer_len],
                                               name="neg_quest")
        self.test_input_quests = tf.placeholder(tf.int32,
                                                shape=[None, self.quest_len],
                                                name="test_quest")
        self.test_input_answer = tf.placeholder(tf.int32,
                                                shape=[None, self.answer_len],
                                                name="test_cand_quest")

        #embedding layer
        with tf.device("/cpu:0"), tf.name_scope("embedding_layer"):
            W = tf.Variable(tf.to_float(self.embeddings),
                            trainable=True,
                            name="W")
            ori_quests = tf.nn.embedding_lookup(W, self.ori_input_quests)
            cand_quests = tf.nn.embedding_lookup(W, self.cand_input_quests)
            neg_quests = tf.nn.embedding_lookup(W, self.neg_input_quests)
            test_quest = tf.nn.embedding_lookup(W, self.test_input_quests)
            test_answer = tf.nn.embedding_lookup(W, self.test_input_answer)

        #ori_quests = tf.nn.dropout(ori_quests, self.keep_prob)
        #cand_quests = tf.nn.dropout(cand_quests, self.keep_prob)
        #neg_quests = tf.nn.dropout(neg_quests, self.keep_prob)

        #build LSTM network
        with tf.variable_scope("LSTM_scope", reuse=None):
            ori_q = BILSTM(ori_quests, self.rnn_size)
        with tf.variable_scope("LSTM_scope", reuse=True):
            cand_a = BILSTM(cand_quests, self.rnn_size)
            neg_a = BILSTM(neg_quests, self.rnn_size)
            test_q = BILSTM(test_quest, self.rnn_size)
            test_a = BILSTM(test_answer, self.rnn_size)

        #----------------------------- cal attention -------------------------------
        with tf.variable_scope("attention", reuse=None) as scope:
            U = tf.get_variable(
                "U", [2 * self.rnn_size, 2 * rnn_size],
                initializer=tf.truncated_normal_initializer(stddev=0.1))
            G = tf.nn.tanh(
                tf.batch_matmul(tf.batch_matmul(
                    ori_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])),
                                cand_a,
                                adj_y=True))
            delta_q = tf.nn.softmax(tf.reduce_max(G, 2))
            delta_a = tf.nn.softmax(tf.reduce_max(G, 1))
            neg_G = tf.nn.tanh(
                tf.batch_matmul(tf.batch_matmul(
                    ori_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])),
                                neg_a,
                                adj_y=True))
            delta_neg_q = tf.nn.softmax(tf.reduce_max(neg_G, 2))
            delta_neg_a = tf.nn.softmax(tf.reduce_max(neg_G, 1))
        with tf.variable_scope("attention", reuse=True) as scope:
            test_G = tf.nn.tanh(
                tf.batch_matmul(tf.batch_matmul(
                    test_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])),
                                test_a,
                                adj_y=True))
            delta_test_q = tf.nn.softmax(tf.reduce_max(test_G, 2))
            delta_test_a = tf.nn.softmax(tf.reduce_max(test_G, 1))

        #-------------------------- recalculate lstm output -------------------------
        #ori_q_feat = tf.squeeze(tf.batch_matmul(ori_q, tf.reshape(delta_q, [-1, self.quest_len, 1]), adj_x=True))
        #cand_q_feat = tf.squeeze(tf.batch_matmul(cand_a, tf.reshape(delta_a, [-1, self.answer_len, 1]), adj_x=True))
        #neg_ori_q_feat = tf.squeeze(tf.batch_matmul(ori_q, tf.reshape(delta_neg_q, [-1, self.quest_len, 1]), adj_x=True))
        #neg_q_feat = tf.squeeze(tf.batch_matmul(neg_a, tf.reshape(delta_neg_a, [-1, self.answer_len, 1]), adj_x=True))
        #test_q_feat = tf.squeeze(tf.batch_matmul(test_q, tf.reshape(delta_test_q, [-1, self.quest_len, 1]), adj_x=True))
        #test_a_feat = tf.squeeze(tf.batch_matmul(test_a, tf.reshape(delta_test_a, [-1, self.answer_len, 1]), adj_x=True))
        ori_q_feat = max_pooling(
            tf.mul(ori_q, tf.reshape(delta_q, [-1, self.quest_len, 1])))
        cand_q_feat = max_pooling(
            tf.mul(cand_a, tf.reshape(delta_a, [-1, self.answer_len, 1])))
        neg_ori_q_feat = max_pooling(
            tf.mul(ori_q, tf.reshape(delta_neg_q, [-1, self.quest_len, 1])))
        neg_q_feat = max_pooling(
            tf.mul(neg_a, tf.reshape(delta_neg_a, [-1, self.answer_len, 1])))
        test_q_feat = max_pooling(
            tf.mul(test_q, tf.reshape(delta_test_q, [-1, self.quest_len, 1])))
        test_a_feat = max_pooling(
            tf.mul(test_a, tf.reshape(delta_test_a, [-1, self.answer_len, 1])))

        #-------------------------- recalculate lstm output end ---------------------
        # dropout
        #self.out_ori = tf.nn.dropout(self.out_ori, self.keep_prob)
        #self.out_cand = tf.nn.dropout(self.out_cand, self.keep_prob)
        #self.out_neg = tf.nn.dropout(self.out_neg, self.keep_prob)

        # cal cosine simulation
        self.ori_cand = feature2cos_sim(ori_q_feat, cand_q_feat)
        self.ori_neg = feature2cos_sim(neg_ori_q_feat, neg_q_feat)
        self.test_q_a = feature2cos_sim(test_q_feat, test_a_feat)
        self.loss, self.acc = cal_loss_and_acc(self.ori_cand, self.ori_neg)
Пример #5
0
    def __init__(self,
                 batch_size,
                 num_unroll_steps,
                 embeddings,
                 embedding_size,
                 rnn_size,
                 num_rnn_layers,
                 max_grad_norm,
                 attention_matrix_size,
                 loss_ratio,
                 l2_reg_lambda=0.0,
                 adjust_weight=False,
                 label_weight=[],
                 is_training=True,
                 m=0.1):
        """
        LSTM-BASED DEEP LEARNING MODELS FOR NON-FACTOID ANSWER SELECTION
        """
        # define input variable
        self.batch_size = batch_size
        self.embeddings = embeddings
        self.embedding_size = embedding_size
        self.adjust_weight = adjust_weight
        self.label_weight = label_weight
        self.rnn_size = rnn_size
        self.num_rnn_layers = num_rnn_layers
        self.num_unroll_steps = num_unroll_steps
        self.max_grad_norm = max_grad_norm
        self.l2_reg_lambda = l2_reg_lambda
        self.is_training = is_training

        self.keep_prob = tf.placeholder(tf.float32, name="keep_drop")

        self.lr = tf.Variable(0.0, trainable=False)
        self.new_lr = tf.placeholder(tf.float32,
                                     shape=[],
                                     name="new_learning_rate")
        self._lr_update = tf.assign(self.lr, self.new_lr)

        self.ori_input_quests = tf.placeholder(
            tf.int32, shape=[None, self.num_unroll_steps])
        self.cand_input_quests = tf.placeholder(
            tf.int32, shape=[None, self.num_unroll_steps])
        self.neg_input_quests = tf.placeholder(
            tf.int32, shape=[None, self.num_unroll_steps])
        self.test_input_q = tf.placeholder(tf.int32,
                                           shape=[None, self.num_unroll_steps])
        self.test_input_a = tf.placeholder(tf.int32,
                                           shape=[None, self.num_unroll_steps])
        self.cat_ids = tf.placeholder(tf.int32, [None, CAT_NUMBER],
                                      name='cat_ids')

        #embedding layer
        with tf.device("/cpu:0"), tf.name_scope("embedding_layer"):
            W = tf.Variable(tf.to_float(self.embeddings),
                            trainable=True,
                            name="W")
            ori_quests = tf.nn.embedding_lookup(W, self.ori_input_quests)
            cand_quests = tf.nn.embedding_lookup(W, self.cand_input_quests)
            neg_quests = tf.nn.embedding_lookup(W, self.neg_input_quests)
            test_q = tf.nn.embedding_lookup(W, self.test_input_q)
            test_a = tf.nn.embedding_lookup(W, self.test_input_a)

        #build LSTM network
        U = tf.Variable(tf.truncated_normal(
            [2 * self.rnn_size, self.embedding_size], stddev=0.1),
                        name="U")
        with tf.variable_scope("LSTM_scope", reuse=None):
            ori_q = biLSTM(ori_quests, self.rnn_size)
            ori_q_feat = tf.nn.tanh(max_pooling(ori_q))
        with tf.variable_scope("LSTM_scope", reuse=True):
            cand_att_weight = tf.sigmoid(
                tf.matmul(
                    cand_quests,
                    tf.reshape(tf.expand_dims(tf.matmul(ori_q_feat, U), 1),
                               [-1, self.embedding_size, 1])))
            neg_att_weight = tf.sigmoid(
                tf.matmul(
                    neg_quests,
                    tf.reshape(tf.expand_dims(tf.matmul(ori_q_feat, U), 1),
                               [-1, self.embedding_size, 1])))
            cand_a = biLSTM(
                tf.multiply(
                    cand_quests,
                    tf.tile(cand_att_weight, [1, 1, self.embedding_size])),
                self.rnn_size)
            neg_a = biLSTM(
                tf.multiply(
                    neg_quests,
                    tf.tile(neg_att_weight, [1, 1, self.embedding_size])),
                self.rnn_size)
            cand_q_feat = tf.nn.tanh(max_pooling(cand_a))
            neg_q_feat = tf.nn.tanh(max_pooling(neg_a))
            test_q_out = biLSTM(test_q, self.rnn_size)
            test_q_out = tf.nn.tanh(max_pooling(test_q_out))
            test_att_weight = tf.sigmoid(
                tf.matmul(
                    test_a,
                    tf.reshape(tf.expand_dims(tf.matmul(test_q_out, U), 1),
                               [-1, self.embedding_size, 1])))
            test_a_out = biLSTM(
                tf.multiply(
                    test_a,
                    tf.tile(test_att_weight, [1, 1, self.embedding_size])),
                self.rnn_size)
            test_a_out = tf.nn.tanh(max_pooling(test_a_out))

        # multitasking
        with tf.name_scope("multitasking"):
            feature_size = int(ori_q_feat.get_shape()[1])

            fc1 = tf.layers.dense(ori_q_feat,
                                  feature_size * 2,
                                  activation=tf.nn.relu,
                                  name='fc1')
            fc2 = tf.layers.dense(fc1,
                                  feature_size,
                                  activation=tf.nn.relu,
                                  name='fc2')
            logits = tf.layers.dense(fc2, CAT_NUMBER, activation=tf.nn.sigmoid)

            # feature_size = int(ori_q_feat.get_shape()[1])

            # w = tf.get_variable(name='weights', shape=(feature_size, CAT_NUMBER, initializer=tf.random_normal_initializer())
            # b = tf.get_variable(name='bias', shape=(1, CAT_NUMBER), initializer=tf.zeros_initializer())

            # positive_qa = tf.concat([out_ori,out_cand],1,name="embedding_for_multitask")

            # logits = tf.matmul(ori_q_feat, w) + b

            entropy = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits, labels=self.cat_ids, name='loss')
            loss_multitask = tf.reduce_mean(entropy)

        # acc
        self.ori_cand_score = feature2cos_sim(ori_q_feat, cand_q_feat)
        self.ori_neg_score = feature2cos_sim(ori_q_feat, neg_q_feat)
        loss_origin, self.acc = cal_loss_and_acc(self.ori_cand_score,
                                                 self.ori_neg_score, m)

        self.loss = loss_origin * (1 -
                                   loss_ratio) + loss_multitask * loss_ratio

        self.test_q_a = feature2cos_sim(test_q_out, test_a_out)

        # multitasking_acc
        with tf.name_scope("multi_acc"):
            self.preds = tf.nn.softmax(logits)
            self.correct_preds = tf.equal(tf.argmax(self.preds, 1),
                                          tf.argmax(self.cat_ids, 1))
            self.multi_acc = tf.reduce_sum(
                tf.cast(self.correct_preds, tf.float32))

        def assign_new_lr(self, session, lr_value):
            session.run(self._lr_update, feed_dict={self.new_lr: lr_value})
Пример #6
0
    def __init__(self,
                 batch_size,
                 num_unroll_steps,
                 embeddings,
                 embedding_size,
                 rnn_size,
                 num_rnn_layers,
                 max_grad_norm,
                 attention_matrix_size,
                 l2_reg_lambda=0.0,
                 adjust_weight=False,
                 label_weight=[],
                 is_training=True):
        """
        LSTM-BASED DEEP LEARNING MODELS FOR NON-FACTOID ANSWER SELECTION
        """
        # define input variable
        self.batch_size = batch_size
        self.embeddings = embeddings
        self.embedding_size = embedding_size
        self.adjust_weight = adjust_weight
        self.label_weight = label_weight
        self.rnn_size = rnn_size
        self.num_rnn_layers = num_rnn_layers
        self.num_unroll_steps = num_unroll_steps
        self.max_grad_norm = max_grad_norm
        self.l2_reg_lambda = l2_reg_lambda
        self.is_training = is_training

        self.keep_prob = tf.placeholder(tf.float32, name="keep_drop")

        self.lr = tf.Variable(0.0, trainable=False)
        self.new_lr = tf.placeholder(tf.float32,
                                     shape=[],
                                     name="new_learning_rate")
        self._lr_update = tf.assign(self.lr, self.new_lr)

        self.ori_input_quests = tf.placeholder(
            tf.int32, shape=[None, self.num_unroll_steps])
        self.cand_input_quests = tf.placeholder(
            tf.int32, shape=[None, self.num_unroll_steps])
        self.neg_input_quests = tf.placeholder(
            tf.int32, shape=[None, self.num_unroll_steps])
        self.test_input_q = tf.placeholder(tf.int32,
                                           shape=[None, self.num_unroll_steps])
        self.test_input_a = tf.placeholder(tf.int32,
                                           shape=[None, self.num_unroll_steps])

        #embedding layer
        with tf.device("/cpu:0"), tf.name_scope("embedding_layer"):
            W = tf.Variable(tf.to_float(self.embeddings),
                            trainable=True,
                            name="W")
            ori_quests = tf.nn.embedding_lookup(W, self.ori_input_quests)
            cand_quests = tf.nn.embedding_lookup(W, self.cand_input_quests)
            neg_quests = tf.nn.embedding_lookup(W, self.neg_input_quests)
            test_q = tf.nn.embedding_lookup(W, self.test_input_q)
            test_a = tf.nn.embedding_lookup(W, self.test_input_a)

        #build LSTM network
        U = tf.Variable(tf.truncated_normal(
            [2 * self.rnn_size, self.embedding_size], stddev=0.1),
                        name="U")
        with tf.variable_scope("LSTM_scope", reuse=None):
            ori_q = biLSTM(ori_quests, self.rnn_size)
            ori_q_feat = tf.nn.tanh(max_pooling(ori_q))
        with tf.variable_scope("LSTM_scope", reuse=True):
            cand_att_weight = tf.sigmoid(
                tf.batch_matmul(
                    cand_quests,
                    tf.reshape(
                        tf.expand_dims(tf.batch_matmul(ori_q_feat, U), 1),
                        [-1, self.embedding_size, 1])))
            neg_att_weight = tf.sigmoid(
                tf.batch_matmul(
                    neg_quests,
                    tf.reshape(
                        tf.expand_dims(tf.batch_matmul(ori_q_feat, U), 1),
                        [-1, self.embedding_size, 1])))
            cand_a = biLSTM(
                tf.mul(cand_quests,
                       tf.tile(cand_att_weight, [1, 1, self.embedding_size])),
                self.rnn_size)
            neg_a = biLSTM(
                tf.mul(neg_quests,
                       tf.tile(neg_att_weight, [1, 1, self.embedding_size])),
                self.rnn_size)
            cand_q_feat = tf.nn.tanh(max_pooling(cand_a))
            neg_q_feat = tf.nn.tanh(max_pooling(neg_a))
            test_q_out = biLSTM(test_q, self.rnn_size)
            test_q_out = tf.nn.tanh(max_pooling(test_q_out))
            test_att_weight = tf.sigmoid(
                tf.batch_matmul(
                    test_a,
                    tf.reshape(
                        tf.expand_dims(tf.batch_matmul(test_q_out, U), 1),
                        [-1, self.embedding_size, 1])))
            test_a_out = biLSTM(
                tf.mul(test_a,
                       tf.tile(test_att_weight, [1, 1, self.embedding_size])),
                self.rnn_size)
            test_a_out = tf.nn.tanh(max_pooling(test_a_out))

        self.ori_cand = feature2cos_sim(ori_q_feat, cand_q_feat)
        self.ori_neg = feature2cos_sim(ori_q_feat, neg_q_feat)
        self.loss, self.acc = cal_loss_and_acc(self.ori_cand, self.ori_neg)

        self.test_q_a = feature2cos_sim(test_q_out, test_a_out)