Exemplo n.º 1
0
class agent():
	def __init__(self,is_training):
		# Place Holder

		self.word = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_word')
		self.pos1 = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_pos1')
		self.pos2 = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_pos2')
		self.length = tf.placeholder(dtype=tf.int32, shape=[None], name='input_length')
		self.mask = tf.placeholder(dtype=tf.int32, shape=[None, FLAGS.max_length], name='input_mask')
		self.label = tf.placeholder(dtype=tf.int32, shape=[None], name='label')
		self.label_for_select = tf.placeholder(dtype=tf.int32, shape=[None], name='label_for_select')
		self.scope = tf.placeholder(dtype=tf.int32, shape=[FLAGS.batch_size + 1], name='scope')
		self.weights = tf.placeholder(dtype=tf.float32, shape=[None])
		self.data_word_vec = np.load(os.path.join(FLAGS.export_path, 'vec.npy'))
		self.reward_holder = tf.placeholder(dtype = tf.float32, shape=[None],name='reward')
		self.action_holder = tf.placeholder(dtype = tf.int32, shape=[None],name='action')

		# Network
		self.embedding = Embedding(is_training, self.data_word_vec, self.word, self.pos1, self.pos2)
		self.encoder = Encoder(is_training, FLAGS.drop_prob)
		self.selector = Selector(FLAGS.num_classes, is_training, FLAGS.drop_prob)
		self.classifier = Classifier(is_training, self.label, self.weights)
		#compute
		self.word_embedding = self.embedding.word_embedding()
		self.pos_embedding = self.embedding.pos_embedding()
		self.embedding = self.embedding.concat_embedding(self.word_embedding, self.pos_embedding)
		self.x = self.encoder.cnn(self.embedding, FLAGS.hidden_size, self.mask, activation=tf.nn.relu)
		self.logit, self.repre = self.selector.no_bag(self.x)

		self.outputvalue=self.classifier.outputvalue(self.logit)
		self.output = self.classifier.output(self.logit)
		self.softmax_outputs=tf.nn.softmax(self.logit)

		self.action_onehot=tf.one_hot(indices=self.action_holder,depth=FLAGS.num_classes,dtype=tf.float32)
		self.step=tf.multiply(self.softmax_outputs,self.action_onehot)
		self.action_outputvalue=tf.reduce_sum(self.step,axis=1)
		self.temp=tf.log(self.action_outputvalue)*self.reward_holder
		self.loss=-tf.reduce_mean(tf.log(self.action_outputvalue)*self.reward_holder)#reduce_mean里面的是188个句子的loss
		self.loss_pre = self.classifier.softmax_cross_entropy(self.logit)

		self.optimizer_pre = tf.train.GradientDescentOptimizer(0.5)
		self.grads_and_vars = self.optimizer_pre.compute_gradients(self.loss_pre)
		self.train_op = self.optimizer_pre.apply_gradients(self.grads_and_vars)
		self.optimizer = tf.train.AdamOptimizer(0.0001)
		self.train_op_rl=self.optimizer.minimize(self.loss)
		self.tvars = tf.trainable_variables()
Exemplo n.º 2
0
class PCNN():
    def __init__(self, is_training):
        #Place Holder

        self.word = tf.placeholder(dtype=tf.int32,
                                   shape=[None, FLAGS.max_length],
                                   name='input_word')
        self.pos1 = tf.placeholder(dtype=tf.int32,
                                   shape=[None, FLAGS.max_length],
                                   name='input_pos1')
        self.pos2 = tf.placeholder(dtype=tf.int32,
                                   shape=[None, FLAGS.max_length],
                                   name='input_pos2')
        self.length = tf.placeholder(dtype=tf.int32,
                                     shape=[None],
                                     name='input_length')
        self.mask = tf.placeholder(dtype=tf.int32,
                                   shape=[None, FLAGS.max_length],
                                   name='input_mask')
        self.scope = tf.placeholder(dtype=tf.int32,
                                    shape=[FLAGS.batch_size + 1],
                                    name='scope')
        self.bag_label = tf.placeholder(dtype=tf.int32,
                                        shape=[None],
                                        name='bag_label')
        self.sentence_label = tf.placeholder(dtype=tf.int32,
                                             shape=[None],
                                             name='sentence_label')
        self.label_weights = tf.placeholder(dtype=tf.float32,
                                            shape=[FLAGS.batch_size])
        self.data_word_vec = np.load(os.path.join(FLAGS.export_path,
                                                  'vec.npy'))

        #Network
        self.embedding = Embedding(is_training, self.data_word_vec, self.word,
                                   self.pos1, self.pos2)
        self.encoder = Encoder(is_training, FLAGS.drop_prob)
        self.selector = Selector(FLAGS.num_classes, is_training,
                                 FLAGS.drop_prob)
        self.classifier = Classifier(is_training, self.bag_label,
                                     self.label_weights)
        #compute
        self.word_embedding = self.embedding.word_embedding()
        self.pos_embedding = self.embedding.pos_embedding()
        self.embedding = self.embedding.concat_embedding(
            self.word_embedding, self.pos_embedding)
        self.x = self.encoder.pcnn(self.embedding,
                                   FLAGS.hidden_size,
                                   self.mask,
                                   activation=tf.nn.relu)
        self.logit, self.repre = self.selector.attention(
            self.x, self.scope, self.sentence_label)

        #用于判断ds和selected哪一个好,与优化无关
        self.label_onehot = tf.one_hot(indices=self.bag_label,
                                       depth=FLAGS.num_classes,
                                       dtype=tf.float32)
        self.bag_loss_temp = tf.nn.softmax_cross_entropy_with_logits(
            labels=self.label_onehot, logits=self.logit)
        self.bag_loss = tf.reshape(self.bag_loss_temp, [1, -1])
        self.loss_mean = tf.reduce_mean(self.bag_loss)
        #计算reward
        self.softmax_output = tf.nn.softmax(self.logit)
        self.reward = tf.log(
            tf.reduce_sum(self.label_onehot * self.softmax_output, axis=1))
        #self.loss_mine = -tf.reduce_mean(self.reward, axis=0)这个就是loss一样的,只是没有加上权重
        #计算梯度下降
        self.loss = self.classifier.softmax_cross_entropy(self.logit)
        #self.loss_one = self.classifier.softmax_cross_entropy(self.logit_one)
        self.output = self.classifier.output(self.logit)
        #self.output_one = self.classifier.output(self.logit_one)
        self.outputvalue = self.classifier.outputvalue(self.logit)
        self.test_output = tf.argmax(self.logit, 1)  #输出什么关系
        self.test_outputvalue = tf.reduce_max(self.logit, axis=1)  #输出关系的概率
        # Optimizer
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        tf.summary.scalar('learning_rate', FLAGS.learning_rate)
        self.optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
        self.grads_and_vars = self.optimizer.compute_gradients(self.loss)
        self.train_op = self.optimizer.apply_gradients(
            self.grads_and_vars, global_step=self.global_step)