示例#1
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = core_rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = core_rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = core_rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size, state_is_tuple=True)

        self.cell = cell = core_rnn_cell.MultiRNNCell([cell] * args.num_layers, state_is_tuple=True) 

        self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name="input_data")
        self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name="targets")
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size])
                print "seq_length = ", args.seq_length, "embedding_lookup = ", tf.nn.embedding_lookup(embedding, self.input_data)
                #inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data))
                inputs = tf.split( tf.nn.embedding_lookup(embedding, self.input_data)  , args.seq_length,1)
                print "inputs 1:",inputs
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]
                print "inputs 2:",inputs
        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        # yonghua
        # inputs, initial_state, cell, scope
        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm')
        #sys.stdout.write("outputs : %s\tlast_state : %s" % (outputs, last_state))
        #output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        output = tf.reshape(tf.concat(outputs,1), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits, name="prob_results")
        loss = seq2seq.sequence_loss_by_example([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([args.batch_size * args.seq_length])],
                args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False,name="LR_")
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
    def __init__(self, vocabularySize, config_param):
        self.vocabularySize = vocabularySize
        self.config = config_param

        self._inputX = tf.placeholder(
            tf.int32, [self.config.batch_size, self.config.sequence_size],
            "InputsX")
        self._inputTargetsY = tf.placeholder(
            tf.int32, [self.config.batch_size, self.config.sequence_size],
            "InputTargetsY")

        #Converting Input in an Embedded form
        with tf.device(
                "/cpu:0"):  #Tells Tensorflow what GPU to use specifically
            embedding = tf.get_variable(
                "embedding", [self.vocabularySize, self.config.embeddingSize])
            embeddingLookedUp = tf.nn.embedding_lookup(embedding, self._inputX)
            inputs = tf.split(axis=1,
                              num_or_size_splits=self.config.sequence_size,
                              value=embeddingLookedUp)
            inputTensorsAsList = [tf.squeeze(input_, [1]) for input_ in inputs]

        #Define Tensor RNN
        singleRNNCell = rnn_cell.BasicRNNCell(self.config.hidden_size)
        self.multilayerRNN = rnn_cell.MultiRNNCell([singleRNNCell] *
                                                   self.config.num_layers)
        self._initial_state = self.multilayerRNN.zero_state(
            self.config.batch_size, tf.float32)

        #Defining Logits
        hidden_layer_output, last_state = rnn.static_rnn(
            self.multilayerRNN,
            inputTensorsAsList,
            initial_state=self._initial_state)
        hidden_layer_output = tf.reshape(
            tf.concat(axis=1, values=hidden_layer_output),
            [-1, self.config.hidden_size])
        self._logits = tf.nn.xw_plus_b(
            hidden_layer_output,
            tf.get_variable("softmax_w",
                            [self.config.hidden_size, self.vocabularySize]),
            tf.get_variable("softmax_b", [self.vocabularySize]))
        self._predictionSoftmax = tf.nn.softmax(self._logits)

        #Define the loss
        loss = seq2seq.sequence_loss_by_example(
            [self._logits], [tf.reshape(self._inputTargetsY, [-1])],
            [tf.ones([self.config.batch_size * self.config.sequence_size])],
            self.vocabularySize)
        self._cost = tf.div(tf.reduce_sum(loss), self.config.batch_size)

        self._final_state = last_state
示例#3
0
  def testSequenceLossByExample(self):
    with self.test_session() as sess:
      output_classes = 5
      logits = [
          constant_op.constant(
              i + 0.5, shape=[2, output_classes]) for i in range(3)
      ]
      targets = [
          constant_op.constant(
              i, dtypes.int32, shape=[2]) for i in range(3)
      ]
      weights = [constant_op.constant(1.0, shape=[2]) for i in range(3)]

      average_loss_per_example = (seq2seq_lib.sequence_loss_by_example(
          logits, targets, weights, average_across_timesteps=True))
      res = sess.run(average_loss_per_example)
      self.assertAllClose(np.asarray([1.609438, 1.609438]), res)

      loss_per_sequence = seq2seq_lib.sequence_loss_by_example(
          logits, targets, weights, average_across_timesteps=False)
      res = sess.run(loss_per_sequence)
      self.assertAllClose(np.asarray([4.828314, 4.828314]), res)
示例#4
0
  def testSequenceLossByExample(self):
    with self.test_session() as sess:
      output_classes = 5
      logits = [
          constant_op.constant(
              i + 0.5, shape=[2, output_classes]) for i in range(3)
      ]
      targets = [
          constant_op.constant(
              i, dtypes.int32, shape=[2]) for i in range(3)
      ]
      weights = [constant_op.constant(1.0, shape=[2]) for i in range(3)]

      average_loss_per_example = (seq2seq_lib.sequence_loss_by_example(
          logits, targets, weights, average_across_timesteps=True))
      res = sess.run(average_loss_per_example)
      self.assertAllClose(np.asarray([1.609438, 1.609438]), res)

      loss_per_sequence = seq2seq_lib.sequence_loss_by_example(
          logits, targets, weights, average_across_timesteps=False)
      res = sess.run(loss_per_sequence)
      self.assertAllClose(np.asarray([4.828314, 4.828314]), res)
示例#5
0
 def compute_cost(self):
     """from tensorflow.contrib.legacy_seq2seq.python.ops.seq2seq import sequence_loss_by_example"""
     losses = sequence_loss_by_example(
         [tf.reshape(self.prediction, [-1], name='reshape_pred')],
         [tf.reshape(self.ys, [-1], name='reshape_target')],
         [tf.ones([self.batch_size * self.n_steps], dtype=tf.float32)],
         average_across_timesteps=True,
         softmax_loss_function=self.ms_error,
         name='losses'
     )
     self.cost = tf.div(
         tf.reduce_sum(losses, name='losses_sum'),
         self.batch_size,
         name='average_cost')
     tf.summary.scalar('cost', self.cost)
示例#6
0
def train_neural_network():
    logits, last_state, _, _, _ = recurrent_neural_network()
    # targets = tf.reshape(output_targets, [-1, digits_range])
    targets = tf.reshape(output_targets, [-1])

    loss = seq2seq.sequence_loss_by_example(logits=[logits], targets=[targets],
                                            weights=[tf.ones_like(targets, dtype=tf.float32)])
                                            # softmax_loss_function=softmax_cross_entropy)
    print(logits.get_shape())
    print(targets.get_shape())
    cost = tf.reduce_mean(loss)
    learning_rate = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), 6)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_op = optimizer.minimize(cost)
    # optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    # train_op = optimizer.apply_gradients(zip(grads, tvars))
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        lr = 0.1
        lr_decay = 0.002
        for epoch in range(1000):
            mini_lr = lr_decay * (0.97 ** (epoch * 1.0 / 1001))
            lr = lr * 1.0 / 10
            if lr < mini_lr:
                lr = mini_lr
            if epoch > 0 and epoch % 55 == 0:
                lr_decay /= 10.0
            sess.run(tf.assign(learning_rate, lr))
            n = 0
            batches = n_chunk - label_size - batch_size
            for batche in range(batches):
                train_loss, _, _ = sess.run([cost, last_state, train_op],
                                            feed_dict={input_data: x_inputs[n], output_targets: y_labels[n]})
                n += 1
                if n == batches / 4 or n == batches * 2 / 4 or n == batches * 3 / 4 \
                        or n == 1 or n == batches - 1:
                    print(epoch, batche, train_loss)
                    print lr
                if epoch > 34 and epoch % 7 == 0 and (n == batches / 4 or n == batches * 2 / 4 or n == batches * 3 / 4 \
                        or n == 1 or n == batches - 1):
                    saver.save(sess, 'ticket.module', global_step=epoch)
示例#7
0
def train_neural_network(total_train, total_regions, total_asfmap, wordtoix):
    keep = 0.5
    image_feat_size = 2048
    len_words = 3000
    input_image_feature = tf.placeholder(tf.float32, [1, image_feat_size])
    input_data = tf.placeholder(tf.int64, [1, None])
    keep_prob = tf.placeholder(tf.float32)
    output_targets = tf.placeholder(tf.int64, [1, None])
    # feat = tf.placeholder(tf.float32, [])
    logits, last_state, _, _, _, _, _, _, _ = neural_network(input_image_feature, input_data, keep_prob)
    targets = tf.reshape(output_targets, [-1])
    loss = seq2seq.sequence_loss_by_example([logits], [targets], [tf.ones_like(targets, dtype=tf.float32)], len_words)
    cost = tf.reduce_mean(loss)
    learning_rate = tf.Variable(0.0, trainable=False)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), 5)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_op = optimizer.apply_gradients(zip(grads, tvars))
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())

        for epoch in range(500):
            sess.run(tf.assign(learning_rate, 0.001 * (0.9 ** (epoch / 100))))
            for k in range(len(total_train)):
                train_data = total_train[k]
                regions = total_regions[k]
                asfmap = total_asfmap[k]
                for i in range(train_data.shape[0]):
                    train, test = sen2ix(regions[train_data[i, 4].astype('int32')]['phrase'], wordtoix)
                    train_loss, _, _ = sess.run([cost, last_state, train_op],
                                                feed_dict={input_data: train,
                                                           input_image_feature: asfmap[i].reshape(1, 2048),
                                                           output_targets: test, keep_prob: keep})

                if (epoch + 1) % 50 == 0:
                    print(epoch, train_loss)
        saver.save(sess, 'RNN_model/test.module')
    print "train end!"
示例#8
0
  def __init__(self, is_training, config, debug=False):
    self.batch_size = batch_size = config.batch_size
    self.num_steps = num_steps = config.num_steps
    self.size = size = config.hidden_size
    vocab_size = config.vocab_size
    self.num_layers = config.num_layers

    self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
    self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

    embedding = tf.get_variable("embedding", [vocab_size, size], dtype=data_type(is_lstm_layer=False))
    inputs = tf.nn.embedding_lookup(embedding, self._input_data, name="inputs_to_rnn")
    if debug:
        variable_summaries(inputs, "inputs_to_rnn")

    if is_training and config.keep_prob < 1:
        inputs = tf.nn.dropout(inputs, config.keep_prob)

    rnn = CudnnLSTM(config.num_layers, size, size, input_mode='linear_input', direction='unidirectional',
                                         dropout=config.keep_prob, seed=0, seed2=0)
    params_size_t = rnn.params_size()
    self._initial_input_h = tf.placeholder(data_type(is_lstm_layer=True), shape=[config.num_layers, batch_size, size]) #self._initial_input_h = tf.Variable(tf.zeros([config.num_layers, batch_size, size]))
    self._initial_input_c = tf.placeholder(data_type(is_lstm_layer=True), shape=[config.num_layers, batch_size, size]) #self._initial_input_c = tf.Variable(tf.zeros([config.num_layers, batch_size, size]))
    #self.params = tf.get_variable("params", [params_size_t], validate_shape=False, dtype=data_type(is_lstm_layer=False))
    self.params = tf.Variable(tf.random_uniform([params_size_t], minval=-config.init_scale, maxval=config.init_scale, dtype=data_type(is_lstm_layer=True)), validate_shape=False)
    self.params_size_t = rnn.params_size()

    outputs, output_h, output_c = rnn(is_training=is_training, input_data=tf.transpose(tf.cast(inputs, dtype=data_type(is_lstm_layer=True)), [1, 0, 2]), input_h=self.input_h,
                                     input_c=self.input_c, params=self.params)

    self._output_h = output_h
    self._output_c = output_c

    output = tf.reshape(tf.concat(values=tf.transpose(outputs, [1, 0, 2]), axis=1), [-1, size])

    if debug:
        variable_summaries(output, 'multiRNN_output')

    softmax_w = tf.get_variable("softmax_w", [size, vocab_size], dtype=data_type(is_lstm_layer=False))
    softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type(is_lstm_layer=False))
    logits = tf.matmul(output if output.dtype == data_type(is_lstm_layer=False) else tf.cast(output, data_type(is_lstm_layer=False)), softmax_w) + softmax_b

    if debug:
       variable_summaries(logits, 'logits')

    #loss = tf.contrib.nn.seq2seq.sequence_loss_by_example(
    loss = sequence_loss_by_example(   
        [logits],
        [tf.reshape(self._targets, [-1])],
        [tf.ones([batch_size * num_steps], dtype=data_type(is_lstm_layer=False))])

    self._cost = cost = tf.reduce_sum(loss) / batch_size
    if FLAGS.cost_function == 'avg':
      self._cost_to_optimize = cost_to_optimize = tf.reduce_mean(loss)
    else:
      self._cost_to_optimize = cost_to_optimize = cost

    tvars = tf.trainable_variables()
    for v in tvars:
        cost_to_optimize += FLAGS.reg_term * tf.cast(tf.nn.l2_loss(v), dtype=data_type(False)) / (batch_size*config.num_steps)
        self._cost_to_optimize = cost_to_optimize

    if debug:
        tf.summary.scalar('cost no regularization', cost)
        tf.summary.scalar('cost_to_optimize', cost_to_optimize)

    #self._final_state = state

    if not is_training:
        self.merged = tf.summary.merge_all()
        return

    self._lr = tf.Variable(0.0, trainable=False, dtype=data_type(is_lstm_layer=False))
    #if debug:
    #        tf.scalar_summary('learning rate', self._lr)

    #tvars = tf.trainable_variables()
    type2vars = dict()
    print("**************************")
    print("Trainable Variables")
    print("**************************")
    for var in tvars:
        print('Variable name: %s. With dtype: %s and shape: %s' % (var.name, var.dtype, var.get_shape()))
        if var.dtype not in type2vars:
            type2vars[var.dtype] = [var]
        else:
            type2vars[var.dtype].append(var)

    print("**************************")
    print("Gradients Variables")
    print("**************************")
    _grads = tf.gradients(cost_to_optimize, tvars)
    type2grads = dict()
    for g in _grads:
        print('Gradient name: %s. With dtype: %s' % (g.name, g.dtype))
        if g.dtype not in type2grads:
            type2grads[g.dtype] = [g]
        else:
            type2grads[g.dtype].append(g)

    type2clippedGrads = dict()
    for dtype in type2grads:
        cgrads, _ = tf.clip_by_global_norm(type2grads[dtype], config.max_grad_norm)
        type2clippedGrads[dtype] = cgrads


    if debug:
        for (gkey, vkey) in zip(type2clippedGrads.keys(),type2vars.keys()):
            for (clipped_gradient, variable) in zip(type2clippedGrads[gkey], type2vars[vkey]):
                variable_summaries(clipped_gradient, "clipped_dcost/d"+variable.name)
                variable_summaries(variable, variable.name)


    if FLAGS.optimizer == 'MomentumOptimizer':
        optimizer = tf.train.MomentumOptimizer(learning_rate=self._lr, momentum=0.9)
    elif FLAGS.optimizer == 'AdamOptimizer':
        optimizer = tf.train.AdamOptimizer()
    elif FLAGS.optimizer == 'RMSPropOptimizer':
        optimizer = tf.train.RMSPropOptimizer(learning_rate=self._lr)
    elif FLAGS.optimizer == 'AdagradOptimizer':
        optimizer = tf.train.AdagradOptimizer(learning_rate=self._lr)
    else:
        optimizer = tf.train.GradientDescentOptimizer(self._lr)

    allgrads = []
    allvars = []
    for dtype in type2clippedGrads:
        allgrads += type2clippedGrads[dtype]

    #WARNING: key order assumption
    for dtype in type2vars:
        allvars += type2vars[dtype]

    self._train_op = optimizer.apply_gradients(zip(allgrads, allvars))

    self._new_lr = tf.placeholder(dtype=data_type(False), shape=[], name="new_learning_rate")
    self._lr_update = tf.assign(self._lr, self._new_lr)
    self.merged = tf.summary.merge_all()
示例#9
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        additional_cell_args = {}
        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        elif args.model == 'gridlstm':
            cell_fn = grid_rnn.Grid2LSTMCell
            additional_cell_args.update({
                'use_peepholes': True,
                'forget_bias': 1.0
            })
        elif args.model == 'gridgru':
            cell_fn = grid_rnn.Grid2GRUCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size, **additional_cell_args)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding",
                                            [args.vocab_size, args.rnn_size])
                inputs = tf.split(axis=1,
                                  num_or_size_splits=args.seq_length,
                                  value=tf.nn.embedding_lookup(
                                      embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if infer else None,
            scope='rnnlm')
        # output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size])
        output = tf.reshape(tf.concat(axis=1, values=outputs),
                            [-1, args.rnn_size])
        self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])], args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
示例#10
0
    def __init__(self, is_training, batch_size, num_steps):
        self.batch_size = batch_size
        self.num_steps = num_steps

        self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self.targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        # 一行代码实现LSTM模型,并且考虑了dropout和deepRNN以及是否是训练过程
        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_SIZE)
        if is_training:
            # TODO:注:这里为什么是output_keep_prob的    概率呢?
            lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=KEEP_PRO)
        cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * NUM_LAYERS)

        # TODO:初始状态为什么是batch_size个?
        self.initial_state = cell.zero_state(batch_size, tf.int32)

        # TODO:word2vec技术 为什么维度是这个?
        embedding = tf.get_variable("embedding", [VOCAB_SIZE, HIDDEN_SIZE])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)
        if is_training:
            inputs = tf.nn.dropout(inputs, KEEP_PRO)

        # 用于存储num_step个值产生的所有的输出,并计算损失函数
        outputs = []
        # 用于记录每个循环体的状态变化,用类自身的进行初始化
        state = self.initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()
                cell_output, state = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)

        # TODO:将输出reshape为一个这样的形式,从而进行后续操作?为什么要这样的形式
        output = tf.reshape(tf.concat(1, outputs), [-1, HIDDEN_SIZE])

        # 全连接神经网络,而且这没有初始化
        weights = tf.get_variable("weights", [HIDDEN_SIZE, VOCAB_SIZE],
                                  tf.float32)
        biases = tf.get_variable("biases", [VOCAB_SIZE])
        logits = tf.matmul(output, weights) + biases

        # 定义交叉熵函数,而且结合了权重
        loss = sequence_loss_by_example(
            [logits],
            [tf.reshape(self.targets, [-1])],
            # 注意权重
            [tf.ones([batch_size * num_steps], dtype=tf.float32)])

        # 计算平均损失
        self.cost = tf.reduce_sum(loss) / batch_size
        self.final_state = state  # 保留最终的更新状态

        # 如果不是考虑在训练集上的数据,直接返回
        if not is_training:
            return

        training_variables = tf.trainable_variables()
        # 注意到这里是全局的clip,
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(self.cost, training_variables), MAX_GRAD_NORM)

        # 定义优化方法
        optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
        self.train_op = optimizer.apply_gradients(
            zip(grads, training_variables))
示例#11
0
    def __init__(self, is_training, config, filename):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        self.size = size = config.hidden_size
        vocab_size = config.vocab_size

        filename_queue = tf.train.string_input_producer([filename],
                                                        num_epochs=None)
        # Unlike the TFRecordWriter, the TFRecordReader is symbolic
        reader = tf.TFRecordReader()
        # One can read a single serialized example from a filename
        # serialized_example is a Tensor of type string.
        _, serialized_example = reader.read(filename_queue)
        # The serialized example is converted back to actual values.
        # One needs to describe the format of the objects to be returned
        features = tf.parse_single_example(
            serialized_example,
            features={
                # We know the length of both fields. If not the
                # tf.VarLenFeature could be used
                'input_data':
                tf.FixedLenFeature([batch_size * num_steps], tf.int64),
                'target':
                tf.FixedLenFeature([batch_size * num_steps], tf.int64),
                'mask':
                tf.FixedLenFeature([batch_size * num_steps], tf.float32),
                'key_words':
                tf.FixedLenFeature([batch_size * config.num_keywords],
                                   tf.int64)
            })

        self._input_data = tf.cast(features['input_data'], tf.int32)
        self._targets = tf.cast(features['target'], tf.int32)
        self._input_word = tf.cast(features['key_words'], tf.int32)
        self._init_output = tf.placeholder(tf.float32, [batch_size, size])
        self._mask = tf.cast(features['mask'], tf.float32)

        self._input_data = tf.reshape(self._input_data, [batch_size, -1])
        self._targets = tf.reshape(self._targets, [batch_size, -1])
        self._input_word = tf.reshape(self._input_word, [batch_size, -1])
        self._mask = tf.reshape(self._mask, [batch_size, -1])

        LSTM_cell = tf.nn.rnn_cell.LSTMCell(size,
                                            forget_bias=0.0,
                                            state_is_tuple=False)
        if is_training and config.keep_prob < 1:
            LSTM_cell = tf.nn.rnn_cell.DropoutWrapper(
                LSTM_cell, output_keep_prob=config.keep_prob)
        cell = tf.nn.rnn_cell.MultiRNNCell([LSTM_cell] * config.num_layers,
                                           state_is_tuple=False)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable(
                'word_embedding', [vocab_size, config.word_embedding_size],
                trainable=True,
                initializer=tf.constant_initializer(word_vec))
            inputs = tf.nn.embedding_lookup(
                embedding, self._input_data
            )  #返回一个tensor,shape是(batch_size, num_steps, size)
            keyword_inputs = tf.nn.embedding_lookup(embedding,
                                                    self._input_word)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        gate = tf.ones([batch_size, config.num_keywords])

        atten_sum = tf.zeros([batch_size, config.num_keywords])

        with tf.variable_scope("coverage"):
            u_f = tf.get_variable("u_f", [
                config.num_keywords * config.word_embedding_size,
                config.num_keywords
            ])
            res1 = tf.sigmoid(
                tf.matmul(tf.reshape(keyword_inputs, [batch_size, -1]), u_f))
            phi_res = tf.reduce_sum(self._mask, 1, keep_dims=True) * res1

            self.output1 = phi_res

        outputs = []
        output_state = self._init_output
        state = self._initial_state
        with tf.variable_scope("RNN"):
            entropy_cost = []
            for time_step in range(num_steps):
                vs = []
                for s2 in range(config.num_keywords):
                    with tf.variable_scope("RNN_attention"):
                        if time_step > 0 or s2 > 0:
                            tf.get_variable_scope().reuse_variables()
                        u = tf.get_variable("u", [size, 1])
                        w1 = tf.get_variable("w1", [size, size])
                        w2 = tf.get_variable(
                            "w2", [config.word_embedding_size, size])
                        b = tf.get_variable("b1", [size])

                        vi = tf.matmul(
                            tf.tanh(
                                tf.add(
                                    tf.add(
                                        tf.matmul(output_state, w1),
                                        tf.matmul(keyword_inputs[:, s2, :],
                                                  w2)), b)), u)
                        vs.append(vi * gate[:, s2:s2 + 1])

                self.attention_vs = tf.concat(vs, axis=1)
                prob_p = tf.nn.softmax(self.attention_vs)

                gate = gate - (prob_p / phi_res)

                atten_sum += prob_p * self._mask[:, time_step:time_step + 1]

                mt = tf.add_n([
                    prob_p[:, i:i + 1] * keyword_inputs[:, i, :]
                    for i in range(config.num_keywords)
                ])

                with tf.variable_scope("RNN_sentence"):
                    if time_step > 0: tf.get_variable_scope().reuse_variables()
                    (cell_output, state) = cell(
                        tf.concat([inputs[:, time_step, :], mt], axis=1),
                        state)
                    outputs.append(cell_output)
                    output_state = cell_output

            self._end_output = cell_output

        self.output2 = atten_sum
        output = tf.reshape(tf.concat(outputs, axis=1), [-1, size])

        softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b

        try:
            loss = tf.nn.seq2seq.sequence_loss_by_example(
                [logits], [tf.reshape(self._targets, [-1])],
                [tf.reshape(self._mask, [-1])],
                average_across_timesteps=False)
        except:
            loss = sequence_loss_by_example([logits],
                                            [tf.reshape(self._targets, [-1])],
                                            [tf.reshape(self._mask, [-1])],
                                            average_across_timesteps=False)

        self.cost1 = tf.reduce_sum(loss)
        self.cost2 = tf.reduce_sum((phi_res - atten_sum)**2)

        self._cost = cost = (self.cost1 + 0.1 * self.cost2) / batch_size
        self._final_state = state
        self._prob = tf.nn.softmax(logits)

        if not is_training:
            prob = tf.nn.softmax(logits)
            self._sample = tf.argmax(prob, 1)
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
示例#12
0
    def __init__(self,
                 embedding,
                 initial_state,
                 attention_states,
                 size,
                 num_layers,
                 max_length,
                 num_samples=512,
                 feed_previous=False,
                 update_embedding_for_previous=True,
                 dtype=dtypes.float32,
                 scope=None,
                 initial_state_attention=False,
                 **kwargs):
        # account for _GO and _EOS
        self.max_length = max_length + 2

        self.lengths = kwargs.get('lengths', tf.placeholder(tf.int32, shape=[None], name="decoder_lengths"))
        self.inputs = kwargs.get('inputs', [tf.placeholder(tf.int32, shape=[None], name="decoder_input{0}".format(i)) for i in xrange(self.max_length)])
        self.weights = kwargs.get('weights', [tf.placeholder(tf.float32, shape=[None], name="decoder_weight{0}".format(i)) for i in xrange(self.max_length)])
        
        self.targets = [self.inputs[i + 1] for i in xrange(len(self.inputs) - 1)]
        self.targets.append(tf.zeros_like(self.targets[0]))

        num_symbols = embedding.get_shape()[0].value
        output_projection = None
        loss_function = None

        self.num_layers = num_layers
        self.cell = GRUCell(size) #tf.contrib.rnn.LayerNormBasicLSTMCell(size)
        if self.num_layers > 1:
            self.cell = tf.contrib.rnn.MultiRNNCell([self.cell] * self.num_layers)

        self.feed_previous = feed_previous
        
        if num_samples > 0 and num_samples < num_symbols:
            #with tf.device('/cpu:0'):
            w = tf.get_variable('proj_w', [self.cell.output_size, num_symbols])
            w_t = tf.transpose(w)
            b = tf.get_variable('proj_b', [num_symbols])
            output_projection = (w, b)
            def sampled_loss(labels, inputs):
                #with tf.device('/cpu:0'):
                labels = tf.reshape(labels, [-1, 1])
                local_w_t = tf.cast(w_t, tf.float32)
                local_b = tf.cast(b, tf.float32)
                local_inputs = tf.cast(inputs, tf.float32)
                return tf.nn.sampled_softmax_loss(
                    weights=local_w_t,
                    biases=local_b,
                    labels=labels,
                    inputs=local_inputs,
                    num_sampled=num_samples,
                    num_classes=num_symbols)
            loss_function = sampled_loss
        
        output_size = None
        if output_projection is None:
            self.cell = OutputProjectionWrapper(self.cell, num_symbols)
            output_size = num_symbols
        
        if output_size is None:
            output_size = self.cell.output_size
        if output_projection is not None:
            proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype)
            proj_weights.get_shape().assert_is_compatible_with([self.cell.output_size, num_symbols])
            proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype)
            proj_biases.get_shape().assert_is_compatible_with([num_symbols])

        with variable_scope.variable_scope(scope or "embedding_attention_decoder"):
            loop_fn_factory = self._extract_argmax_and_embed #self._extract_grumble_softmax_embed
            loop_function = loop_fn_factory(embedding, output_projection, update_embedding_for_previous) if feed_previous else None

            emb_inp = [embedding_ops.embedding_lookup(embedding, i) for i in self.inputs]
            self.outputs, self.state = attention_decoder(
                emb_inp,
                self.lengths,
                initial_state,
                attention_states,
                self.cell,
                output_size=output_size,
                loop_function=loop_function,
                initial_state_attention=initial_state_attention)

        targets = [self.inputs[i + 1] for i in xrange(len(self.inputs) - 1)]
        targets.append(tf.zeros_like(self.inputs[-1]))
        
        # loss for each instance in batch
        self.instance_loss = sequence_loss_by_example(self.outputs, targets, self.weights, softmax_loss_function=loss_function)

        # aggregated average loss per instance for batch
        self.loss = tf.reduce_sum(self.instance_loss) / math_ops.cast(array_ops.shape(targets[0])[0], self.instance_loss.dtype)

        if output_projection is not None:
            self.projected_output = [tf.matmul(o, output_projection[0]) + output_projection[1] for o in self.outputs]
            self.decoded_outputs = tf.unstack(tf.argmax(tf.stack(self.projected_output), 2))
            self.decoded_output_prob = tf.reduce_max(tf.nn.softmax(tf.stack(self.projected_output)), 2)
        else:
            self.decoded_outputs = tf.unstack(tf.argmax(tf.stack(self.outputs), 2))
            self.decoded_output_prob = tf.reduce_max(tf.nn.softmax(tf.stack(self.outputs)), 2)

        self.decoded_lenghts = tf.reduce_sum(tf.sign(tf.transpose(tf.stack(self.decoded_outputs))), 1)
        self.decoded_batch = tf.transpose(tf.stack(self.decoded_outputs))
        self.decoded_batch_probs = tf.transpose(tf.stack(self.decoded_output_prob))
示例#13
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.rnncell == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.rnncell == 'gru':
            cell_fn = GRUCell
        elif args.rnncell == 'lstm':
            cell_fn = core_rnn_cell_impl.BasicLSTMCell
        else:
            raise Exception("rnncell type not supported: {}".format(
                args.rnncell))

        cell = cell_fn(args.rnn_size)
        self.cell = MultiRNNCell([cell] * args.num_layers)
        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = self.cell.zero_state(args.batch_size, tf.float32)
        self.attn_length = 5
        self.attn_size = 32
        self.attention_states = tf.placeholder(
            tf.float32, [args.batch_size, self.attn_length, self.attn_size])
        with tf.variable_scope('rnnlm'):
            softmax_w = build_weight([args.rnn_size, args.vocab_size],
                                     name='soft_w')
            softmax_b = build_weight([args.vocab_size], name='soft_b')
            self.word_embedding = build_weight(
                [args.vocab_size, args.embedding_size], name='word_embedding')
            inputs_list = tf.split(
                tf.nn.embedding_lookup(self.word_embedding, self.input_data),
                args.seq_length, 1)
            inputs_list = [tf.squeeze(input_, [1]) for input_ in inputs_list]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(self.word_embedding, prev_symbol)

        if not args.attention:
            outputs, last_state = seq2seq.rnn_decoder(
                inputs_list,
                self.initial_state,
                self.cell,
                loop_function=loop if infer else None,
                scope='rnnlm')
        else:
            outputs, last_state = attention_decoder(
                inputs_list,
                self.initial_state,
                self.attention_states,
                self.cell,
                loop_function=loop if infer else None,
                scope='rnnlm')

        self.final_state = last_state
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])], args.vocab_size)
        # average loss for each word of each timestep
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.lr = tf.Variable(0.0, trainable=False)
        self.var_trainable_op = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(self.cost, self.var_trainable_op), args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(
            zip(grads, self.var_trainable_op))
        self.initial_op = tf.global_variables_initializer()
        self.logfile = args.log_dir + str(
            datetime.datetime.strftime(datetime.datetime.now(),
                                       '%Y-%m-%d %H:%M:%S') + '.txt').replace(
                                           ' ', '').replace('/', '')
        self.var_op = tf.global_variables()
        self.saver = tf.train.Saver(self.var_op,
                                    max_to_keep=4,
                                    keep_checkpoint_every_n_hours=1)
示例#14
0
    def __init__(self, is_training, config, input_):
        self._input = input_

        batch_size = input_.batch_size
        num_steps = input_.batch_size
        size = config.hidden_size
        vocab_size = config.vocab_size

        def lstm_cell():
            return tf.nn.rnn_cell.BasicLSTMCell(size,
                                                forget_bias=0.0,
                                                state_is_tuple=True)

        attn_cell = lstm_cell
        if is_training and config.keep_prob < 1:

            def attn_cell():
                return tf.nn.rnn_cell.MultiRNNCell(
                    lstm_cell(), out_keep_prob=config.keep_prob)

        cell = tf.nn.rnn_cell.MultiRNNCell(
            [attn_cell() for _ in range(config.num_layers)],
            state_is_tuple=True)
        self._initial_state = cell.zero_state(batch_size, tf.float32)

        embedding = tf.get_variable("embedding", [vocab_size, size],
                                    dtype=tf.float32)
        inputs = tf.nn.embedding_lookup(embedding, input_.input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        outputs = []
        state = self._initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)

        output = tf.reshape(tf.concat(outputs, 1), [-1, size])
        softmax_w = tf.get_variable("softmax_w", [size, vocab_size],
                                    dtype=tf.float32)
        softmax_b = tf.get_variable("softmax_b", [vocab_size],
                                    dtype=tf.float32)
        logits = tf.matmul(output, softmax_w) + softmax_b
        loss = sequence_loss_by_example(
            [logits], [tf.reshape(input_.targets, [-1])],
            [tf.ones([batch_size * num_steps], dtype=tf.float32)])
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = state

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self._lr)
        self._train_op = optimizer.apply_gradients(
            zip(grads, tvars),
            global_step=tf.contrib.framework.get_or_create_global_step())
        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name="new_learning_rate")
        self._lr_update = tf.assign(self._lr, self._new_lr)
示例#15
0
    def __init__(self, mode, is_training, filename):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        self.size = size = config.hidden_size
        vocab_size = config.vocab_size

        filename_queue = tf.train.string_input_producer([filename],
                                                        num_epochs=None)

        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)
        features = tf.parse_single_example(
            serialized_example,
            features={
                'input_data':
                tf.FixedLenFeature([batch_size * num_steps], tf.int64),
                'target':
                tf.FixedLenFeature([batch_size * num_steps], tf.int64),
                'mask':
                tf.FixedLenFeature([batch_size * num_steps], tf.float32),
                'key_words':
                tf.FixedLenFeature([batch_size * config.num_keywords],
                                   tf.int64)
            })

        self._input_data = tf.cast(features['input_data'], tf.int32)
        self._targets = tf.cast(features['target'], tf.int32)
        self._mask = tf.cast(features['mask'], tf.float32)
        self._key_words = tf.cast(features['key_words'], tf.int32)
        self._init_output = tf.placeholder(tf.float32, [batch_size, size])

        self._input_data = tf.reshape(self._input_data, [batch_size, -1])
        self._targets = tf.reshape(self._targets, [batch_size, -1])
        self._mask = tf.reshape(self._mask, [batch_size, -1])
        self._key_words = tf.reshape(self._key_words, [batch_size, -1])

        # single_cell = rnn_cell.LSTMCell(num_units=size, state_is_tuple=False)
        # if is_training and config.keep_prob < 1:
        #     single_cell = rnn_cell.DropoutWrapper(cell=single_cell, input_keep_prob=config.keep_prob)

        def single_cell_fn(unit_type,
                           num_units,
                           dropout,
                           mode,
                           forget_bias=1.0):
            """Create an instance of a single RNN cell."""
            dropout = dropout if mode is True else 0.0
            if unit_type == "lstm":
                c = rnn_cell.LSTMCell(num_units,
                                      forget_bias=forget_bias,
                                      state_is_tuple=False)
            elif unit_type == "gru":
                c = rnn_cell.GRUCell(num_units)
            else:
                raise ValueError("Unknown unit type %s!" % unit_type)
            if dropout > 0.0:
                c = rnn_cell.DropoutWrapper(cell=c,
                                            input_keep_prob=(1.0 - dropout))
            return c

        cell_list = []
        for i in range(config.num_layers):
            single_cell = single_cell_fn(unit_type="lstm",
                                         num_units=size,
                                         dropout=1 - config.keep_prob,
                                         mode=is_training)
            cell_list.append(single_cell)

        cell = rnn_cell.MultiRNNCell(cell_list, state_is_tuple=False)
        self._initial_state = cell.zero_state(batch_size, tf.float32)

        # with tf.device("/cpu:0"):
        embedding_keyword = tf.get_variable(
            'keyword_embedding',
            [config.movie + config.score, config.word_embedding_size],
            trainable=True,
            initializer=tf.random_uniform_initializer(-config.init_scale,
                                                      config.init_scale))
        embedding = tf.get_variable('word_embedding',
                                    [vocab_size, config.word_embedding_size],
                                    trainable=True,
                                    initializer=tf.random_uniform_initializer(
                                        -config.init_scale, config.init_scale))
        # initializer=tf.constant_initializer(word_vec)

        inputs = tf.nn.embedding_lookup(embedding, self._input_data)
        keyword_inputs = tf.nn.embedding_lookup(embedding_keyword,
                                                self._key_words)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)
            # keyword_inputs = tf.nn.dropout(keyword_inputs, config.keep_prob)

        outputs = []
        if mode == "v1":
            output_state = self._init_output
        elif mode == "v3":
            gate = tf.ones([batch_size, config.num_keywords])
            atten_sum = tf.zeros([batch_size, config.num_keywords])

            with tf.variable_scope("coverage"):
                """
                u_f 是一个变量参数,他负责与topic相乘,得到的结果再通过sigmoid归一化到0~1之间,目的是为每一个控制信息分配一个初始比例
                sen_len 是想计算每个样本的有效字数
                假设每个样本,如果有两个控制条件的话,每一个控制条件的重要程度用一个0~1之间的数表示,(其实这里应该是 softmax更加合理)
                有多少有效字,那么这句话中该控制条件就有多少的初始总分值
                """
                u_f = tf.get_variable("u_f", [
                    config.num_keywords * config.word_embedding_size,
                    config.num_keywords
                ])
                res1 = tf.sigmoid(
                    tf.matmul(tf.reshape(keyword_inputs, [batch_size, -1]),
                              u_f))  # todo
                sen_len = tf.reduce_sum(self._mask, -1, keepdims=True)
                phi_res = sen_len * res1
                self.output1 = phi_res

            output_state = self._init_output
            state = self._initial_state
            with tf.variable_scope("RNN"):
                for time_step in range(num_steps):
                    # vs 里面放的是当前这个time step,上一个时刻的隐含层状态跟每一个主题的关系一个被gate消弱后的得分
                    vs = []
                    for kw_i in range(config.num_keywords):
                        with tf.variable_scope("RNN_attention"):
                            if time_step > 0 or kw_i > 0:
                                tf.get_variable_scope().reuse_variables()
                            u = tf.get_variable("u", [size, 1])
                            w1 = tf.get_variable("w1", [size, size])
                            w2 = tf.get_variable(
                                "w2", [config.word_embedding_size, size])
                            b = tf.get_variable("b1", [size])
                            # 加工上一次隐含层状态 线性变换一下
                            temp2 = tf.matmul(output_state, w1)
                            # 取到某一个主题的向量
                            temp3 = keyword_inputs[:, kw_i, :]
                            # 对主题的向量,线性变换一下
                            temp4 = tf.matmul(temp3, w2)
                            # 线性变换后的隐状态和主题add起来
                            temp5 = tf.add(temp2, temp4)
                            # 加上一个偏置项
                            temp6 = tf.add(temp5, b)
                            # 加上一个非线性
                            temp7 = tf.tanh(temp6)
                            # 在线性变换一下
                            vi = tf.matmul(temp7, u)
                            temp8 = gate[:, kw_i:kw_i +
                                         1]  # 把kw_i主题对应的gate控制变量取出来,这个gate初始值都是1
                            temp9 = vi * temp8  # 一开始 门的初始值是1 不会对权重进行减弱,随后门的数越来越低,会进行削弱
                            vs.append(temp9)

                    self.attention_vs = tf.concat(vs, axis=1)
                    prob_p = tf.nn.softmax(self.attention_vs)
                    # 此处prob_p表示的是上一步的隐含层状态对每一个主题的注意力得分
                    gate = gate - (prob_p / phi_res)
                    temp10 = self._mask[:, time_step:time_step + 1]
                    atten_sum += prob_p * temp10
                    # (batchsize,2) * (batchsize,1)
                    # 如果某一个样本的这个time step的mask是0,那么对应这个样本的所有的主题的权重都为0
                    # 全部被mask掉了
                    # 全部主题的词向量的加权和
                    mt = tf.add_n([
                        prob_p[:, i:i + 1] * keyword_inputs[:, i, :]
                        for i in range(config.num_keywords)
                    ])

                    with tf.variable_scope("RNN_sentence"):
                        if time_step > 0:
                            tf.get_variable_scope().reuse_variables()
                        temp11 = inputs[:, time_step, :]
                        # mt 是根据 time_step上一个时刻的 隐含层状态 和 主题 信息一起得到的
                        temp12 = tf.concat([temp11, mt], axis=1)
                        # 必须要保证 cell input 的 dims = hidden units
                        temp13 = tf.layers.dense(inputs=temp12, units=size)
                        (cell_output,
                         state) = cell(temp13, state)  # state 是 lstm 里面的 c
                        outputs.append(cell_output)
                        output_state = cell_output  # 隐含层状态更新 为下一个时间步使用

                self._end_output = cell_output

        output = tf.reshape(tf.concat(outputs, axis=1), [-1, size])
        softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b

        loss = sequence_loss_by_example([logits],
                                        [tf.reshape(self._targets, [-1])],
                                        [tf.reshape(self._mask, [-1])])
        # 得到的是一个batch里面 所有字的 loss  shape : batch_size*seq_len
        self.cost1 = tf.reduce_sum(loss)
        self.cost2 = tf.reduce_sum((phi_res - atten_sum)**2)
        mask_sum = tf.reduce_sum(self._mask)
        self._cost = cost = (self.cost1 + 0.1 * self.cost2) / mask_sum
        # self._cost = cost = (self.cost1 + 0.1 * self.cost2)

        self._final_state = state
        self._prob = tf.nn.softmax(logits)

        if not is_training:
            prob = tf.nn.softmax(logits)
            self._sample = tf.argmax(prob, 1)
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
示例#16
0
    def __init__(self, is_training, word_embedding, config, filename):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        self.size = size = config.hidden_size
        vocab_size = config.vocab_size
        key_words_voc_size = config.key_words_voc_size

        alpha = tf.constant(0.5)

        filename_queue = tf.train.string_input_producer([filename],
                                                        num_epochs=None)
        # Unlike the TFRecordWriter, the TFRecordReader is symbolic
        reader = tf.TFRecordReader()
        # One can read a single serialized example from a filename
        # serialized_example is a Tensor of type string.
        _, serialized_example = reader.read(filename_queue)
        # The serialized example is converted back to actual values.

        features = tf.parse_single_example(
            serialized_example,
            features={
                # We know the length of both fields. If not the
                # tf.VarLenFeature could be used
                'input_data':
                tf.FixedLenFeature([batch_size * num_steps], tf.int64),
                'target':
                tf.FixedLenFeature([batch_size * num_steps], tf.int64),
                'mask':
                tf.FixedLenFeature([batch_size * num_steps], tf.float32),
                'key_words':
                tf.FixedLenFeature([batch_size * key_words_voc_size],
                                   tf.float32),
            })

        self._input_data = tf.cast(features['input_data'], tf.int32)
        self._targets = tf.cast(features['target'], tf.int32)  #声明输入变量x, y
        self._mask = tf.cast(features['mask'], tf.float32)
        self._key_words = tf.cast(features['key_words'], tf.float32)
        self._input_word = tf.reshape(self._key_words, [batch_size, -1])

        self._input_data = tf.reshape(self._input_data, [batch_size, -1])
        self._targets = tf.reshape(self._targets, [batch_size, -1])
        self._mask = tf.reshape(self._mask, [batch_size, -1])

        LSTM_cell = SC_LSTM(key_words_voc_size,
                            size,
                            forget_bias=0.0,
                            state_is_tuple=False)
        if is_training and config.keep_prob < 1:
            LSTM_cell = SC_DropoutWrapper(LSTM_cell,
                                          output_keep_prob=config.keep_prob)
        cell = SC_MultiRNNCell([LSTM_cell] * config.num_layers,
                               state_is_tuple=False)

        self._initial_state = cell.zero_state(batch_size, tf.float32)
        self._init_output = tf.zeros([batch_size, size * config.num_layers],
                                     tf.float32)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable(
                'word_embedding', [vocab_size, config.word_embedding_size],
                trainable=True,
                initializer=tf.constant_initializer(word_embedding))
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        sc_vec = self._input_word

        outputs = []
        output_state = self._init_output
        state = self._initial_state

        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                with tf.variable_scope("RNN_sentence"):
                    if time_step > 0: tf.get_variable_scope().reuse_variables()

                    sc_wr = tf.get_variable(
                        'sc_wr',
                        [config.word_embedding_size, key_words_voc_size])
                    res_wr = tf.matmul(inputs[:, time_step, :], sc_wr)

                    res_hr = tf.zeros_like(res_wr, dtype=tf.float32)
                    for layer_id in range(config.num_layers):
                        sc_hr = tf.get_variable('sc_hr_%d' % layer_id,
                                                [size, key_words_voc_size])
                        res_hr += alpha * tf.matmul(
                            tf.slice(output_state, [0, size * layer_id],
                                     [-1, size]), sc_hr)
                    r_t = tf.sigmoid(res_wr + res_hr)
                    sc_vec = r_t * sc_vec

                    (cell_output, state,
                     cell_outputs) = cell(inputs[:, time_step, :], state,
                                          sc_vec)
                    outputs.append(cell_outputs)
                    output_state = cell_outputs

            self._end_output = output_state

        # output = tf.reshape(tf.concat(1, outputs), [-1, size*config.num_layers])
        output = tf.reshape(tf.concat(outputs, 1),
                            [-1, size * config.num_layers])

        softmax_w = tf.get_variable("softmax_w",
                                    [size * config.num_layers, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b
        loss = sequence_loss_by_example(
            # loss = tf.nn.seq2seq.sequence_loss_by_example(
            [logits],
            [tf.reshape(self._targets, [-1])],
            [tf.reshape(self._mask, [-1])],
            average_across_timesteps=False)

        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = state

        if not is_training:
            prob = tf.nn.softmax(logits)
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
示例#17
0
    def __init__(self,
                 args,
                 infer=False):  # infer is set to true during sampling.
        self.args = args
        if infer:
            # Worry about one character at a time during sampling; no batching or BPTT.
            args.batch_size = 1
            args.seq_length = 1

        # Set cell_fn to the type of network cell we're creating -- RNN, GRU or LSTM.
        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        # Call tensorflow library tensorflow-master/tensorflow/python/ops/rnn_cell
        # to create a layer of rnn_size cells of the specified basic type (RNN/GRU/LSTM).
        if args.model == "gru":
            cell = cell_fn(args.rnn_size)
        else:
            cell = cell_fn(args.rnn_size, state_is_tuple=True)

        # Use the same rnn_cell library to create a stack of these cells
        # of num_layers layers. Pass in a python list of these cells.
        # (The [cell] * arg.num_layers syntax literally duplicates cell multiple times in
        # a list. The syntax is such that [5, 6] * 3 would return [5, 6, 5, 6, 5, 6].)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers,
                                                 state_is_tuple=True)

        # Create two TF placeholder nodes of 32-bit ints (NOT floats!),
        # each of shape batch_size x seq_length. This shape matches the batches
        # (listed in x_batches and y_batches) constructed in create_batches in utils.py.
        # input_data will receive input batches, and targets will be what it compares against
        # to calculate loss.
        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])

        # Using the zero_state function in the RNNCell master class in rnn_cell library,
        # create a tensor of zeros such that we can swap it in for the network state at any time
        # to zero out the network's state.
        # State dimensions are: cell_fn state size (2 for LSTM) x rnn_size x num_layers.
        # So an LSTM network with 100 cells per layer and 3 layers would have a state size of 600,
        # and initial_state would have a dimension of none x 600.
        self.initial_state = self.cell.zero_state(args.batch_size, tf.float32)

        # Scope our new variables to the scope identifier string "rnnlm".
        with tf.variable_scope('rnnlm'):
            # Create new variable softmax_w and softmax_b for output.
            # softmax_w is a weights matrix from the top layer of the model (of size rnn_size)
            # to the vocabulary output (of size vocab_size).
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            # softmax_b is a bias vector of the ouput characters (of size vocab_size).
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            # [TODO: Why specify CPU? Same as the TF translation tutorial, but don't know why.]
            with tf.device("/cpu:0"):
                # Create new variable named 'embedding' to connect the character input to the base layer
                # of the RNN. Its role is the conceptual inverse of softmax_w.
                # It contains the trainable weights from the one-hot input vector to the lowest layer of RNN.
                embedding = tf.get_variable("embedding",
                                            [args.vocab_size, args.rnn_size])
                # Create an embedding tensor with tf.nn.embedding_lookup(embedding, self.input_data).
                # This tensor has dimensions batch_size x seq_length x rnn_size.
                # tf.split splits that embedding lookup tensor into seq_length tensors (along dimension 1).
                # Thus inputs is a list of seq_length different tensors,
                # each of dimension batch_size x 1 x rnn_size.
                inputs = tf.split(tf.nn.embedding_lookup(
                    embedding, self.input_data),
                                  args.seq_length,
                                  axis=1)
                # Iterate through these resulting tensors and eliminate that degenerate second dimension of 1,
                # i.e. squeeze each from batch_size x 1 x rnn_size down to batch_size x rnn_size.
                # Thus we now have a list of seq_length tensors, each with dimension batch_size x rnn_size.
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        # THIS LOOP FUNCTION IS NEVER ACTUALLY USED.
        # IT IS EXPLICITLY NOT USED DURING TRAINING.
        # DURING INFERENCE, SEQ_LENGTH == 1, SO SEQ2SEQ.RNN_DECODER() ONLY USES THE LOOP ARGUMENT
        # ON SEQUENCE LENGTH ITEMS SUBSEQUENT TO THE FIRST.
        # This looping function is used as part of seq2seq.rnn_decoder only during sampling -- not training.
        # prev is a 2D Tensor of shape [batch_size x cell.output_size].
        # returns a 2D Tensor of shape [batch_size x cell.input_size].
        def loop(prev, _):
            # prev is initially the top cell state.
            # Convert the top cell state into character logits.
            prev = tf.matmul(prev, softmax_w) + softmax_b
            # Pull the character with the greatest logit (no sampling, just argmaxing).
            # WHY IS THIS ARGMAXING WHEN ACTUAL SAMPLING IS DONE PROBABILISTICALLY?
            # DOESN'T THIS CAUSE OUTPUTS NOT TO MATCH INPUTS DURING SEQUENCE GENERATION?
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            # Re-embed that symbol as the next step's input, and return that.
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        # Set up a seq2seq decoder from the seq2seq.py library.
        # This constructs the outputs and states nodes of the network.
        # Outputs is a list (of len seq_length, same as inputs) of tensors of shape [batch_size x rnn_size].
        # These are the raw output values of the top layer of the network at each time step.
        # They have NOT been fed through the decoder projection; they are still in network space,
        # not character space.
        # State is a tensor of shape [batch_size x cell.state_size].
        # This is also the step where all of the trainable parameters for the LSTM (weights and biases) are defined.
        outputs, self.final_state = seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if infer else None,
            scope='rnnlm')
        # tf.concat concatenates the output tensors along the rnn_size dimension,
        # to make a single tensor of shape [batch_size x (seq_length * rnn_size)].
        # This gives the following 2D outputs matrix:
        #   [(rnn output: batch 0, seq 0) (rnn output: batch 0, seq 1) ... (rnn output: batch 0, seq seq_len-1)]
        #   [(rnn output: batch 1, seq 0) (rnn output: batch 1, seq 1) ... (rnn output: batch 1, seq seq_len-1)]
        #   ...
        #   [(rnn output: batch batch_size-1, seq 0) (rnn output: batch batch_size-1, seq 1) ... (rnn output: batch batch_size-1, seq seq_len-1)]
        # tf.reshape then reshapes it to a tensor of shape [(batch_size * seq_length) x rnn_size].
        # Output will now be the following matrix:
        #   [rnn output: batch 0, seq 0]
        #   [rnn output: batch 0, seq 1]
        #   ...
        #   [rnn output: batch 0, seq seq_len-1]
        #   [rnn output: batch 1, seq 0]
        #   [rnn output: batch 1, seq 1]
        #   ...
        #   [rnn output: batch 1, seq seq_len-1]
        #   ...
        #   ...
        #   [rnn output: batch batch_size-1, seq seq_len-1]
        # Note the following comment in rnn_cell.py:
        #   Note: in many cases it may be more efficient to not use this wrapper,
        #   but instead concatenate the whole sequence of your outputs in time,
        #   do the projection on this batch-concatenated sequence, then split it
        #   if needed or directly feed into a softmax.
        output = tf.reshape(tf.concat(outputs, axis=1), [-1, args.rnn_size])
        # Obtain logits node by applying output weights and biases to the output tensor.
        # Logits is a tensor of shape [(batch_size * seq_length) x vocab_size].
        # Recall that outputs is a 2D tensor of shape [(batch_size * seq_length) x rnn_size],
        # and softmax_w is a 2D tensor of shape [rnn_size x vocab_size].
        # The matrix product is therefore a new 2D tensor of [(batch_size * seq_length) x vocab_size].
        # In other words, that multiplication converts a loooong list of rnn_size vectors
        # to a loooong list of vocab_size vectors.
        # Then add softmax_b (a single vocab-sized vector) to every row of that list.
        # That gives you the logits!
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        # Convert logits to probabilities. Probs isn't used during training! That node is never calculated.
        # Like logits, probs is a tensor of shape [(batch_size * seq_length) x vocab_size].
        # During sampling, this means it is of shape [1 x vocab_size].
        self.probs = tf.nn.softmax(self.logits)
        # seq2seq.sequence_loss_by_example returns 1D float Tensor containing the log-perplexity
        # for each sequence. (Size is batch_size * seq_length.)
        # Targets are reshaped from a [batch_size x seq_length] tensor to a 1D tensor, of the following layout:
        #   target character (batch 0, seq 0)
        #   target character (batch 0, seq 1)
        #   ...
        #   target character (batch 0, seq seq_len-1)
        #   target character (batch 1, seq 0)
        #   ...
        # These targets are compared to the logits to generate loss.
        # Logits: instead of a list of character indices, it's a list of character index probability vectors.
        # seq2seq.sequence_loss_by_example will do the work of generating losses by comparing the one-hot vectors
        # implicitly represented by the target characters against the probability distrutions in logits.
        # It returns a 1D float tensor (a vector) where item i is the log-perplexity of
        # the comparison of the ith logit distribution to the ith one-hot target vector.
        loss = seq2seq.sequence_loss_by_example(
            [self.logits],
            # logits: 1-item list of 2D Tensors of shape [batch_size x vocab_size]
            [tf.reshape(self.targets, [-1])],
            # targets: 1-item list of 1D batch-sized int32 Tensors of the same length as logits
            [tf.ones([args.batch_size * args.seq_length])],
            # weights: 1-item list of 1D batch-sized float-Tensors of the same length as logits
            args.vocab_size
        )  # num_decoder_symbols: integer, number of decoder symbols (output classes)
        # Cost is the arithmetic mean of the values of the loss tensor
        # (the sum divided by the total number of elements).
        # It is a single-element floating point tensor. This is what the optimizer seeks to minimize.
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        # Create a summary for our cost.
        tf.summary.scalar("cost", self.cost)
        # Create a node to track the learning rate as it decays through the epochs.
        self.lr = tf.Variable(args.learning_rate, trainable=False)
        self.global_epoch_fraction = tf.Variable(0.0, trainable=False)
        self.global_seconds_elapsed = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables(
        )  # tvars is a python list of all trainable TF Variable objects.

        # tf.gradients returns a list of tensors of length len(tvars) where each tensor is sum(dy/dx).
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(
            self.lr)  # Use ADAM optimizer with the current learning rate.
        # Zip creates a list of tuples, where each tuple is (variable tensor, gradient tensor).
        # Training op nudges the variables along the gradient, with the given learning rate, using the ADAM optimizer.
        # This is the op that a training session should be instructed to perform.
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        self.summary_op = tf.summary.merge_all()
示例#18
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.LayerNormBasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)

        #self.cell = cell = tf.nn.rnn_cell.MultiRNNCell([cell] * args.num_layers) #changed

        self.cell = cell  #tf.nn.rnn_cell.BasicRNNCell([cell] * args.num_layers)
        #self.cell = rnn_cell.BasicRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)
        self.batch_pointer = tf.Variable(0,
                                         name="batch_pointer",
                                         trainable=False,
                                         dtype=tf.int32)
        self.inc_batch_pointer_op = tf.assign(self.batch_pointer,
                                              self.batch_pointer + 1)
        self.epoch_pointer = tf.Variable(0,
                                         name="epoch_pointer",
                                         trainable=False)
        self.batch_time = tf.Variable(0.0, name="batch_time", trainable=False)
        tf.summary.scalar("time_batch", self.batch_time)

        def variable_summaries(var):
            """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
            with tf.name_scope('summaries'):
                mean = tf.reduce_mean(var)
                tf.summary.scalar('mean', mean)
                #with tf.name_scope('stddev'):
                #   stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
                #tf.summary.scalar('stddev', stddev)
                tf.summary.scalar('max', tf.reduce_max(var))
                tf.summary.scalar('min', tf.reduce_min(var))
                #tf.summary.histogram('histogram', var)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            variable_summaries(softmax_w)
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            variable_summaries(softmax_b)
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding",
                                            [args.vocab_size, args.rnn_size])
                inputs = tf.split(axis=1,
                                  num_or_size_splits=args.seq_length,
                                  value=tf.nn.embedding_lookup(
                                      embedding, self.input_data))
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if infer else None,
            scope='rnnlm')
        output = tf.reshape(tf.concat(axis=1, values=outputs),
                            [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])], args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        tf.summary.scalar("cost", self.cost)
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
示例#19
0
    def __init__(self, is_training, batch_size, num_steps, batch_counts):
        self.batch_size = batch_size
        self.num_steps = num_steps
        self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self.targets = tf.placeholder(tf.int32, [batch_size, TERM_SIZE])

        lstm_cell_fw = tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_SIZE)
        if is_training:
            lstm_cell_fw = tf.nn.rnn_cell.DropoutWrapper(
                lstm_cell_fw, output_keep_prob=KEEP_PROB)
        rnn_cell_fw = tf.nn.rnn_cell.MultiRNNCell([lstm_cell_fw] * NUM_LAYERS)

        self.initial_state_fw = rnn_cell_fw.zero_state(batch_size, tf.float32)

        embedding = tf.get_variable("embedding", [VOCAB_SIZE, HIDDEN_SIZE])
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        if is_training:
            inputs = tf.nn.dropout(inputs, KEEP_PROB)
        with tf.variable_scope('bidirectional_rnn'):
            outputs_fw = []
            state_fw = self.initial_state_fw
            with tf.variable_scope("rnn_fw"):
                for time_step in range(num_steps):
                    if time_step > 0:
                        tf.get_variable_scope().reuse_variables()
                    cell_output_fw, state_fw = rnn_cell_fw(
                        inputs[:, time_step, :], state_fw)
                    if time_step >= num_steps - TERM_SIZE:
                        outputs_fw.append(cell_output_fw)
            output = tf.reshape(tf.concat(outputs_fw, 1), [-1, HIDDEN_SIZE])
        weight = tf.get_variable("weight", [HIDDEN_SIZE, VOCAB_SIZE])
        bias = tf.get_variable("bias", [VOCAB_SIZE])
        logits = tf.matmul(output, weight) + bias
        loss = seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self.targets, [-1])],
            weights=[tf.ones([batch_size * TERM_SIZE], dtype=tf.float32)])

        self.cost = tf.reduce_mean(loss)
        self.final_state_fw = state_fw
        self.predictions = tf.cast(tf.argmax(logits, 1), tf.int32)
        self.correct_prediction = tf.equal(self.predictions,
                                           tf.reshape(self.targets, [-1]))
        self.accuracy = tf.reduce_mean(
            tf.cast(self.correct_prediction, tf.float32))

        self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False)
        self.learning_rate = tf.train.exponential_decay(LEARNING_RATE,
                                                        self.global_step,
                                                        batch_counts /
                                                        self.batch_size,
                                                        LEARNING_RATE_DECAY,
                                                        staircase=True)
        trainable_variables = tf.trainable_variables()
        # regularization_cost = tf.reduce_sum([tf.nn.l2_loss(v) for v in trainable_variables])
        regularization_cost = tf.nn.l2_loss(weight) + tf.nn.l2_loss(bias)
        self.cost = self.cost + REGULARIZATION_RATE * regularization_cost
        if not is_training:
            return
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(self.cost, trainable_variables), MAX_GRAD_NORM)

        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(
            self.cost, global_step=self.global_step)
示例#20
0
               for logit in logits]  #[steps, batch, vocab]
# print(predictions)
'''
 why use logists as cost, is because logists can provide semi-one-hot vector with each entry some value. Then with one-hot y, all values
 are error except y's 1 entry, before softmax, it is kind of continues, softmax makes it not continue
'''
y_as_list = [
    tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(y_, num_of_steps, axis=1)
]  # y_as_list = [steps, batch]
# print(y_as_list)
# print(y_one_hot_)

loss_weights = [tf.ones([batch_size]) for i in range(num_of_steps)]
# print(loss_weights)
losses = sequence_loss_by_example(
    logits, y_as_list, loss_weights
)  # this is calculated step by step so, step should go as first index
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learn_rate).minimize(total_loss)

# run prediction
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

# predicted result
for i in range(epoch_size):
    _, cost_val, w_val, b_val, final_state_val = sess.run(
        [train_step, total_loss, W, b, final_state], {
            x_: input_x,
            y_: input_target
示例#21
0
    def __init__(self, is_training, filename):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        self.size = size = config.hidden_size
        vocab_size = config.vocab_size

        filename_queue = tf.train.string_input_producer([filename],
                                                        num_epochs=None)

        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)
        features = tf.parse_single_example(
            serialized_example,
            features={
                # We know the length of both fields.
                # If not the tf.VarLenFeature could be used
                'input_data':
                tf.FixedLenFeature([batch_size * num_steps], tf.int64),
                'target':
                tf.FixedLenFeature([batch_size * num_steps], tf.int64),
                'mask':
                tf.FixedLenFeature([batch_size * num_steps], tf.float32),
                'key_words':
                tf.FixedLenFeature([batch_size * config.num_keywords],
                                   tf.int64)
            })

        self._input_data = tf.cast(features['input_data'], tf.int32)
        self._targets = tf.cast(features['target'], tf.int32)
        self._input_word = tf.cast(features['key_words'], tf.int32)
        self._mask = tf.cast(features['mask'], tf.float32)
        self._init_output = tf.placeholder(tf.float32, [batch_size, size])

        self._input_data = tf.reshape(self._input_data, [batch_size, -1])
        self._targets = tf.reshape(self._targets, [batch_size, -1])
        self._input_word = tf.reshape(self._input_word, [batch_size, -1])
        self._mask = tf.reshape(self._mask, [batch_size, -1])

        def single_cell_fn(unit_type,
                           num_units,
                           dropout,
                           mode,
                           forget_bias=1.0):
            """Create an instance of a single RNN cell."""

            dropout = dropout if mode == True else 0.0

            if unit_type == "lstm":
                single_cell = rnn_cell.LSTMCell(num_units,
                                                forget_bias=forget_bias,
                                                state_is_tuple=False)
            else:
                raise ValueError("Unknown unit type %s!" % unit_type)

            if dropout > 0.0:
                single_cell = rnn_cell.DropoutWrapper(
                    cell=single_cell, input_keep_prob=(1.0 - dropout))

            return single_cell

        cell_list = []
        for i in range(config.num_layers):
            single_cell = single_cell_fn(unit_type="lstm",
                                         num_units=size,
                                         dropout=1 - config.keep_prob,
                                         mode=is_training)
            cell_list.append(single_cell)

        cell = rnn_cell.MultiRNNCell(cell_list, state_is_tuple=False)
        self._initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable(
                'word_embedding', [vocab_size, config.word_embedding_size],
                trainable=True,
                initializer=tf.constant_initializer(word_vec))
            inputs = tf.nn.embedding_lookup(
                embedding, self._input_data
            )  # 返回一个tensor,shape是(batch_size, num_steps, size)
            keyword_inputs = tf.nn.embedding_lookup(embedding,
                                                    self._input_word)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        gate = tf.ones([batch_size, config.num_keywords])

        atten_sum = tf.zeros([batch_size, config.num_keywords])

        with tf.variable_scope("coverage"):
            u_f = tf.get_variable("u_f", [
                config.num_keywords * config.word_embedding_size,
                config.num_keywords
            ])
            res1 = tf.sigmoid(
                tf.matmul(tf.reshape(keyword_inputs, [batch_size, -1]), u_f))
            temp1 = tf.reduce_sum(self._mask, 1, keepdims=True)
            phi_res = temp1 * res1

            self.output1 = phi_res

        outputs = []
        output_state = self._init_output
        state = self._initial_state
        with tf.variable_scope("RNN"):

            for time_step in range(num_steps):
                # vs 里面放的是 当前这个 time step, 上一个时刻的隐含层状态跟每一个主题的关系 一个 被 gate 消弱后的得分
                vs = []
                for s2 in range(config.num_keywords):
                    with tf.variable_scope("RNN_attention"):
                        if time_step > 0 or s2 > 0:
                            tf.get_variable_scope().reuse_variables()
                        u = tf.get_variable("u", [size, 1])
                        w1 = tf.get_variable("w1", [size, size])
                        w2 = tf.get_variable(
                            "w2", [config.word_embedding_size, size])
                        b = tf.get_variable("b1", [size])

                        # 加工上一次隐含层状态 线性变换一下
                        temp2 = tf.matmul(output_state, w1)
                        # 取到某一个主题的向量
                        temp3 = keyword_inputs[:, s2, :]
                        # 对主题的向量 线性变换一下
                        temp4 = tf.matmul(temp3, w2)
                        # 线性变换后的 隐状态 和 主题 add起来
                        temp5 = tf.add(temp2, temp4)
                        # 加上一个偏置项
                        temp6 = tf.add(temp5, b)
                        # 加上一个非线性
                        temp7 = tf.tanh(temp6)

                        # 在线性变换一下
                        vi = tf.matmul(temp7, u)
                        temp8 = gate[:, s2:s2 +
                                     1]  # 把 s2 主题对应的 gate 控制变量取出来,这个gate初始值都是1
                        temp9 = vi * temp8

                        vs.append(temp9)

                self.attention_vs = tf.concat(vs, axis=1)
                prob_p = tf.nn.softmax(self.attention_vs)
                # 此处 prob_p 表示的是 上一步的隐含层状态 对每一个主题的 注意力得分

                gate = gate - (prob_p / phi_res)

                temp10 = self._mask[:, time_step:time_step + 1]
                atten_sum += prob_p * temp10
                # (32,5) * (32,1)
                # 如果某一个样本的这个time step的mask是0,那么对应这个样本的所有的主题的权重都为0
                # 全部被mask掉了

                # 全部主题的词向量的加权和
                mt = tf.add_n([
                    prob_p[:, i:i + 1] * keyword_inputs[:, i, :]
                    for i in range(config.num_keywords)
                ])

                with tf.variable_scope("RNN_sentence"):
                    if time_step > 0:
                        tf.get_variable_scope().reuse_variables()
                    temp11 = inputs[:, time_step, :]
                    # mt 是根据 time_step上一个时刻的 隐含层状态 和 主题 信息一起得到的
                    temp12 = tf.concat([temp11, mt], axis=1)
                    # 必须要保证 cell input 的 dims = hidden units
                    temp13 = tf.layers.dense(inputs=temp12, units=size)
                    (cell_output, state) = cell(temp13, state)
                    outputs.append(cell_output)
                    output_state = cell_output

            self._end_output = cell_output

        self.output2 = atten_sum
        output = tf.reshape(tf.concat(outputs, axis=1), [-1, size])

        softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b

        try:
            loss = tf.nn.seq2seq.sequence_loss_by_example(
                [logits], [tf.reshape(self._targets, [-1])],
                [tf.reshape(self._mask, [-1])],
                average_across_timesteps=False)
        except:
            loss = sequence_loss_by_example([logits],
                                            [tf.reshape(self._targets, [-1])],
                                            [tf.reshape(self._mask, [-1])],
                                            average_across_timesteps=False)

        self.cost1 = tf.reduce_sum(loss)
        self.cost2 = tf.reduce_sum((phi_res - atten_sum)**2)

        self._cost = cost = (self.cost1 + 0.1 * self.cost2) / batch_size
        self._final_state = state
        self._prob = tf.nn.softmax(logits)

        if not is_training:
            prob = tf.nn.softmax(logits)
            self._sample = tf.argmax(prob, 1)
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
示例#22
0
    def build_graph(self, test):
        """
        Builds an graph in TensorFlow.
        """
        if test:
            self.batch_size = 1
            self.seq_len = 1
  ##
        # Cells
        ##

        lstm_cell = rnn_cell.BasicLSTMCell(self.cell_size)
        self.cell = rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers)

        ##
        # Data
        ##

        # inputs and targets are 2D tensors of shape
        self.inputs = tf.placeholder(tf.int32, [self.batch_size, self.seq_len])
        self.targets = tf.placeholder(tf.int32, [self.batch_size, self.seq_len])
        self.initial_state = self.cell.zero_state(self.batch_size, tf.float32)

        ##
        # Variables
        ##
        with tf.variable_scope('lstm_vars'):
            self.ws = tf.get_variable('ws', [self.cell_size, self.vocab_size])
            self.bs = tf.get_variable('bs', [self.vocab_size])  # TODO: initializer?
            with tf.device('/cpu:0'): # put on CPU to parallelize for faster training/
                self.embeddings = tf.get_variable('embeddings', [self.vocab_size, self.cell_size])

                # get embeddings for all input words
                input_embeddings = tf.nn.embedding_lookup(self.embeddings, self.inputs)
                # The split splits this tensor into a seq_len long list of 3D tensors of shape
                # [batch_size, 1, rnn_size]. The squeeze removes the 1 dimension from the 1st axis
                # of each tensor
                inputs_split = tf.split(input_embeddings, self.seq_len, 1)
                inputs_split = [tf.squeeze(input_, [1]) for input_ in inputs_split]


     
        def loop(prev, _):
            prev = tf.matmul(prev, self.ws) + self.bs
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(self.embeddings, prev_symbol)

        lstm_outputs_split, self.final_state = seq2seq.rnn_decoder(inputs_split,
                                                                   self.initial_state,
                                                                   self.cell,
                                                                   loop_function=loop if test else None,
                                                                   scope='lstm_vars')
        lstm_outputs = tf.reshape(tf.concat(lstm_outputs_split, 1), [-1, self.cell_size])

        logits = tf.matmul(lstm_outputs, self.ws) + self.bs
        self.probs = tf.nn.softmax(logits)

        ##
        # Train
        ##

        total_loss = seq2seq.sequence_loss_by_example([logits],
                                                      [tf.reshape(self.targets, [-1])],
                                                      [tf.ones([self.batch_size * self.seq_len])],
                                                      self.vocab_size)
        self.loss = tf.reduce_sum(total_loss) / self.batch_size / self.seq_len

        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        self.optimizer = tf.train.AdamOptimizer(learning_rate=c.L_RATE, name='optimizer')
        self.train_op = self.optimizer.minimize(self.loss,
                                                global_step=self.global_step,
                                                name='train_op')
示例#23
0
    def __init__(self, args, embedding):
        self.args = args

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cell = cell_fn(args.rnn_size)

        self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers)

        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length],
                                         name='STAND_input')
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length],
                                      name='STAND_targets')
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)
        self.embedding = embedding
        with tf.variable_scope('STAND'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            inputs = tf.split(
                1, args.seq_length,
                tf.nn.embedding_lookup(self.embedding, self.input_data))
            inputs = map(lambda i: tf.nn.l2_normalize(i, 1),
                         [tf.squeeze(input_, [1]) for input_ in inputs])

        def loop(prev, i):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.l2_normalize(
                tf.nn.embedding_lookup(embedding, prev_symbol), 1)

        o, _ = seq2seq.rnn_decoder(inputs,
                                   self.initial_state,
                                   cell,
                                   loop_function=None,
                                   scope='STAND')
        with tf.variable_scope('STAND', reuse=True) as scope:
            sf_o, _ = seq2seq.rnn_decoder(inputs,
                                          self.initial_state,
                                          cell,
                                          loop_function=loop,
                                          scope=scope)
        output = tf.reshape(tf.concat(1, o), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)

        sf_output = tf.reshape(tf.concat(1, sf_o), [-1, args.rnn_size])
        self_feed_logits = tf.matmul(sf_output, softmax_w) + softmax_b
        self.self_feed_probs = tf.nn.softmax(self_feed_logits)

        loss = seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])], args.vocab_size)
        self.loss = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          args.grad_clip)
        for g, v in zip(grads, tvars):
            print v.name
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))