Пример #1
0
 def setup_encoder(self):  # encoder的设置
     with vs.variable_scope("Encoder"):  # 在encoder的作用域下
         inp = tf.nn.dropout(
             self.encoder_inputs, self.keep_prob
         )  # 对encoder进行dropout dropout率为 keep_prob -> 减少过拟合
         fw_cell = rnn_cell.GRUCell(self.size)  # GRU单元
         fw_cell = rnn_cell.DropoutWrapper(
             fw_cell, output_keep_prob=self.keep_prob)  # 对单元进行dropout
         self.encoder_fw_cell = rnn_cell.MultiRNNCell(  # 创建多层RNN的函数  encoder 前向单元
             [fw_cell] * self.num_layers,
             state_is_tuple=True)  # 设置multi-rnn cell
         bw_cell = rnn_cell.GRUCell(self.size)  # 设置size大小的GRU单元
         bw_cell = rnn_cell.DropoutWrapper(  # 根据dropout率,随机在抛弃GRU中计算的数据
             bw_cell,
             output_keep_prob=self.keep_prob)
         self.encoder_bw_cell = rnn_cell.MultiRNNCell(  # 设置 encoder 反向单元
             [bw_cell] * self.num_layers,
             state_is_tuple=True)
         out, _ = rnn.bidirectional_dynamic_rnn(
             self.encoder_fw_cell,  # 设置动态双向RNN
             self.encoder_bw_cell,
             inp,
             self.src_len,
             dtype=tf.float32,
             time_major=True,
             initial_state_fw=self.encoder_fw_cell.zero_state(
                 self.batch_size, dtype=tf.float32),  #  状态全部初始化为0
             initial_state_bw=self.encoder_bw_cell.zero_state(
                 self.batch_size, dtype=tf.float32))
         out = tf.concat([out[0], out[1]], axis=2)  # 把 1 和 2拼接起来
         self.encoder_output = out
def recurrent_neural_network(x, keep_prob):
    # Bidirectional LSTM; needs 
    layer = {'weights':  tf.Variable(tf.random_normal([2*rnn_size ,n_classes])),
             'biases':  tf.Variable(tf.random_normal([n_classes])) }

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True, forget_bias=1.0)
    lstm_fw_cell = rnn_cell.DropoutWrapper(lstm_fw_cell, output_keep_prob=keep_prob)
    lstm_fw_cell = rnn_cell.MultiRNNCell([lstm_fw_cell] * num_layers)
    
    # Backward direction cell
    lstm_bw_cell = rnn_cell.BasicLSTMCell(rnn_size,state_is_tuple=True,  forget_bias=1.0)
    lstm_bw_cell = rnn_cell.DropoutWrapper(lstm_bw_cell, output_keep_prob=keep_prob)
    lstm_bw_cell = rnn_cell.MultiRNNCell([lstm_bw_cell] * num_layers)
    
    
    # Get lstm cell output
    #try:
    outputs, states = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, 
                                                      lstm_bw_cell, 
                                                      x,
                                                      dtype=tf.float32,
                                                      sequence_length=length(x))
                                                      #sequence_length=early_stop)
    #except Exception: # Old TensorFlow version only returns outputs not states
    #    outputs = tf.nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, x,
    #                                    dtype=tf.float32, sequence_length=early_stop)

    output_fw, output_bw = outputs
    
    last = last_relevant(output_fw, length(x))
    first = last_relevant(output_fw, length(x))

    return tf.matmul(tf.concat(1,[first,last]) , layer['weights']) + layer['biases']
Пример #3
0
    def __init__(self, hidden_size, keep_prob, num_layers):
        """
        Inputs:
          hidden_size: int. Hidden size of the RNN
          keep_prob: Tensor containing a single scalar that is the keep probability (for dropout)
        """
        self.hidden_size = hidden_size
        self.keep_prob = keep_prob
        self.num_layers = num_layers
        self.rnn_cell_fw = [
            rnn_cell.GRUCell(self.hidden_size) for _ in range(self.num_layers)
        ]
        self.rnn_cell_fw = [
            DropoutWrapper(cell, input_keep_prob=self.keep_prob)
            for cell in self.rnn_cell_fw
        ]
        self.multi_rnn_cell_fw = rnn_cell.MultiRNNCell(self.rnn_cell_fw,
                                                       state_is_tuple=False)

        self.rnn_cell_bw = [
            rnn_cell.GRUCell(self.hidden_size) for _ in range(self.num_layers)
        ]
        self.rnn_cell_bw = [
            DropoutWrapper(cell, input_keep_prob=self.keep_prob)
            for cell in self.rnn_cell_bw
        ]
        self.multi_rnn_cell_bw = rnn_cell.MultiRNNCell(self.rnn_cell_bw,
                                                       state_is_tuple=False)
Пример #4
0
def build_nmt_multicell_rnn(num_layers_encoder, num_layers_decoder, encoder_size, decoder_size,
                            source_proj_size, use_lstm=True, input_feeding=True,
                            dropout=0.0):

        if use_lstm:
            print("I'm building the model with LSTM cells")
            cell_class = rnn_cell.LSTMCell
        else:
            print("I'm building the model with GRU cells")
            cell_class = GRUCell

        initializer = tf.random_uniform_initializer(minval=-0.1, maxval=0.1, seed=1234)

        encoder_cell = cell_class(num_units=encoder_size, input_size=source_proj_size, initializer=initializer)

        if input_feeding:
            decoder_cell0 = cell_class(num_units=decoder_size, input_size=decoder_size * 2, initializer=initializer)
        else:
            decoder_cell0 = cell_class(num_units=decoder_size, input_size=decoder_size, initializer=initializer)

        # if dropout > 0.0:  # if dropout is 0.0, it is turned off
        encoder_cell = rnn_cell.DropoutWrapper(encoder_cell, output_keep_prob=1.0 - dropout)
        encoder_rnncell = rnn_cell.MultiRNNCell([encoder_cell] * num_layers_encoder)

        decoder_cell0 = rnn_cell.DropoutWrapper(decoder_cell0, output_keep_prob=1.0 - dropout)
        if num_layers_decoder > 1:
            decoder_cell1 = cell_class(num_units=decoder_size, input_size=decoder_size, initializer=initializer)
            decoder_cell1 = rnn_cell.DropoutWrapper(decoder_cell1, output_keep_prob=1.0 - dropout)
            decoder_rnncell = rnn_cell.MultiRNNCell([decoder_cell0] + [decoder_cell1] * (num_layers_decoder - 1))

        else:

            decoder_rnncell = rnn_cell.MultiRNNCell([decoder_cell0])

        return encoder_rnncell, decoder_rnncell
Пример #5
0
    def encoder(self):
        with tf.variable_scope("encoder") as encoder_scope:
            encoder_w_in = self._weight_variable(
                [self.input_dim, self.hidden_size], name='encoder_w_in')
            encoder_b_in = self._bias_variable([
                self.hidden_size,
            ],
                                               name='encoder_b_in')
            encoder_inputs_2d = tf.reshape(
                self.encoder_inputs,
                [self.batch_size * self.max_time, self.input_dim])
            encoder_cell_inputs = tf.nn.relu(
                tf.add(tf.matmul(encoder_inputs_2d, encoder_w_in),
                       encoder_b_in))
            encoder_cell_inputs_3d = tf.reshape(
                encoder_cell_inputs,
                [self.batch_size, self.max_time, self.hidden_size])

            encoder_fw_cells = []
            encoder_bw_cells = []
            for i in range(self.num_layers):
                with tf.variable_scope('encoder_lstm_{}'.format(i)):
                    encoder_fw_cells.append(
                        rnn_cell.DropoutWrapper(
                            cell=rnn_cell.BasicLSTMCell(self.hidden_size,
                                                        forget_bias=1.0,
                                                        state_is_tuple=True),
                            input_keep_prob=1.0,
                            output_keep_prob=self.output_keep_prob))
                    encoder_bw_cells.append(
                        rnn_cell.DropoutWrapper(
                            cell=rnn_cell.BasicLSTMCell(self.hidden_size,
                                                        forget_bias=1.0,
                                                        state_is_tuple=True),
                            input_keep_prob=1.0,
                            output_keep_prob=self.output_keep_prob))
            encoder_muti_fw_cell = rnn_cell.MultiRNNCell(encoder_fw_cells)
            encoder_muti_bw_cell = rnn_cell.MultiRNNCell(encoder_bw_cells)

            (encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state) = \
                tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_muti_fw_cell,
                                                cell_bw=encoder_muti_bw_cell,
                                                inputs=encoder_cell_inputs_3d,
                                                #sequence_length=self.sequence_length,
                                                dtype=tf.float32, time_major=False)

            encoder_outputs = tf.concat(
                (encoder_fw_outputs, encoder_bw_outputs), 2)

            #encoder_final_state_c = tf.concat(
            #    (encoder_fw_final_state.c, encoder_bw_final_state.c), 1)

            #encoder_final_state_h = tf.concat(
            #    (encoder_fw_final_state.h, encoder_bw_final_state.h), 1)

            #encoder_final_state = tf.contrib.rnn.LSTMStateTuple(
            #    c=encoder_final_state_c,
            #    h=encoder_final_state_h
            #)
            return encoder_outputs
Пример #6
0
    def __init__(self, dim_image, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, enc_timesteps, bias_init_vector=None):

        self.dim_image = np.int(dim_image)
        self.dim_embed = np.int(dim_embed)
        self.dim_hidden = np.int(dim_hidden)
        self.batch_size = np.int(batch_size)
        self.n_lstm_steps = np.int(n_lstm_steps)
        self.n_words = np.int(n_words)
        self.enc_timesteps = np.int(enc_timesteps)
        with tf.device("/cpu:0"):
            self.Wemb = tf.Variable(tf.random_uniform(
                [n_words, dim_embed], -0.1, 0.1), name='Wemb')

        self.bemb = self.init_bias(dim_embed, name='bemb')

        self.lstm = rnn_cell.LSTMCell(dim_hidden, state_is_tuple=True)
        self.lstm = rnn_cell.DropoutWrapper(self.lstm, input_keep_prob=1)
        self.lstm = rnn_cell.MultiRNNCell([self.lstm ])

        self.back_lstm = rnn_cell.LSTMCell(dim_hidden, state_is_tuple=True)
        self.back_lstm = rnn_cell.DropoutWrapper(self.back_lstm, input_keep_prob=1)
        self.back_lstm = rnn_cell.MultiRNNCell([self.back_lstm])
        self.encode_img_W = tf.Variable(tf.random_uniform(
            [dim_image, dim_hidden], -0.1, 0.1), name='encode_img_W')
        self.encode_img_b = self.init_bias(dim_hidden, name='encode_img_b')

        self.embed_word_W = tf.Variable(tf.random_uniform(
            [dim_hidden, n_words], -0.1, 0.1), name='embed_word_W')

        if bias_init_vector is not None:
            self.embed_word_b = tf.Variable(
                bias_init_vector.astype(np.float32), name='embed_word_b')
        else:
            self.embed_word_b = self.init_bias(n_words, name='embed_word_b')
Пример #7
0
    def testLSTMBasicToBlockCell(self):
        with self.session(use_gpu=True) as sess:
            x = array_ops.zeros([1, 2])
            x_values = np.random.randn(1, 2)

            m0_val = 0.1 * np.ones([1, 2])
            m1_val = -0.1 * np.ones([1, 2])
            m2_val = -0.2 * np.ones([1, 2])
            m3_val = 0.2 * np.ones([1, 2])

            initializer = init_ops.random_uniform_initializer(-0.01,
                                                              0.01,
                                                              seed=19890212)
            with variable_scope.variable_scope("basic",
                                               initializer=initializer):
                m0 = array_ops.zeros([1, 2])
                m1 = array_ops.zeros([1, 2])
                m2 = array_ops.zeros([1, 2])
                m3 = array_ops.zeros([1, 2])
                g, ((out_m0, out_m1),
                    (out_m2, out_m3)) = rnn_cell.MultiRNNCell(
                        [
                            rnn_cell.BasicLSTMCell(2, state_is_tuple=True)
                            for _ in range(2)
                        ],
                        state_is_tuple=True)(x, ((m0, m1), (m2, m3)))
                sess.run([variables.global_variables_initializer()])
                basic_res = sess.run(
                    [g, out_m0, out_m1, out_m2, out_m3], {
                        x.name: x_values,
                        m0.name: m0_val,
                        m1.name: m1_val,
                        m2.name: m2_val,
                        m3.name: m3_val
                    })

            with variable_scope.variable_scope("block",
                                               initializer=initializer):
                m0 = array_ops.zeros([1, 2])
                m1 = array_ops.zeros([1, 2])
                m2 = array_ops.zeros([1, 2])
                m3 = array_ops.zeros([1, 2])
                g, ((out_m0, out_m1),
                    (out_m2, out_m3)) = rnn_cell.MultiRNNCell(
                        [lstm_ops.LSTMBlockCell(2) for _ in range(2)],
                        state_is_tuple=True)(x, ((m0, m1), (m2, m3)))
                sess.run([variables.global_variables_initializer()])
                block_res = sess.run(
                    [g, out_m0, out_m1, out_m2, out_m3], {
                        x.name: x_values,
                        m0.name: m0_val,
                        m1.name: m1_val,
                        m2.name: m2_val,
                        m3.name: m3_val
                    })

            self.assertEqual(len(basic_res), len(block_res))
            for basic, block in zip(basic_res, block_res):
                self.assertAllClose(basic, block)
Пример #8
0
    def getCell(self, is_training, dp, config):
        # code for RNN
        if is_training == True:
            print("==> Construct ", config.cell_type, " graph for training")
        else:
            print("==> Construct ", config.cell_type, " graph for testing")

        if config.cell_type == "LSTM":
            if config.num_layer == 1:
                basicCell = LSTMCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True)
            elif config.num_layer == 2:
                basicCell = LSTMCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True)
                basicCell_2 = LSTMCell(config.hidden_size_2, forget_bias=0.0, state_is_tuple=True)
            else:
                raise ValueError("config.num_layer should be 1:2 ")
        elif config.cell_type == "RNN":
            if config.num_layer == 1:
                basicCell = BasicRNNCell(config.hidden_size)
            elif config.num_layer == 2:
                basicCell = BasicRNNCell(config.hidden_size)
                basicCell_2 = BasicRNNCell(config.hidden_size_2)
            else:
                raise ValueError("config.num_layer should be [1-3] ")
        elif config.cell_type == "GRU":
            if config.num_layer == 1:
                basicCell = GRUCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True)
            elif config.num_layer == 2:
                basicCell = GRUCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True)
                basicCell_2 = GRUCell(config.hidden_size_2, forget_bias=0.0, state_is_tuple=True)
            else:
                raise ValueError("only support 1-2 layers ")
        else:
            raise ValueError("cell type should be GRU,LSTM,RNN")

            # add dropout layer between hidden layers
        if is_training and config.keep_prob < 1:
            if config.num_layer == 1:
                basicCell = DropoutWrapper(basicCell, input_keep_prob=config.keep_prob,
                                           output_keep_prob=config.keep_prob)
            elif config.num_layer == 2:
                basicCell = DropoutWrapper(basicCell, input_keep_prob=config.keep_prob,
                                           output_keep_prob=config.keep_prob)
                basicCell_2 = DropoutWrapper(basicCell_2, input_keep_prob=config.keep_prob,
                                             output_keep_prob=config.keep_prob)
            else:
                pass

        if config.num_layer == 1:
            cell = rnn_cell.MultiRNNCell([basicCell], state_is_tuple=True)
        elif config.num_layer == 2:
            cell = rnn_cell.MultiRNNCell([basicCell, basicCell_2], state_is_tuple=True)

        return cell
Пример #9
0
    def __init__(self, seq_length, vocab_size, stack_dimension, batch_size):
        config = tf.ConfigProto(allow_soft_placement=True)
        self.sess = tf.Session(config=config)

        self.seq_length = seq_length
        self.vocab_size = vocab_size
        self.memory_dim = vocab_size

        self.enc_inp = [
            tf.placeholder(tf.float32,
                           shape=(vocab_size, batch_size),
                           name="enc_inp%i" % t) for t in range(seq_length)
        ]

        self.dec_inp = self.enc_inp[:-1] + [
            tf.zeros_like(self.enc_inp[0], dtype=np.float32, name="GO")
        ]

        single_enc_cell = rnn_cell.LSTMCell(self.memory_dim,
                                            state_is_tuple=False)
        self.enc_cell = rnn_cell.MultiRNNCell([single_enc_cell] *
                                              stack_dimension,
                                              state_is_tuple=True)
        _, encoder_state = rnn.rnn(self.enc_cell,
                                   self.enc_inp,
                                   dtype=tf.float32)

        single_dec_cell = rnn_cell.LSTMCell(self.memory_dim,
                                            state_is_tuple=False)
        self.dec_cell = rnn_cell.MultiRNNCell([single_dec_cell] *
                                              stack_dimension,
                                              state_is_tuple=True)

        self.Ws = tf.Variable(
            tf.random_uniform([self.memory_dim, self.vocab_size], 0, 0.1))
        self.bs = tf.Variable(tf.random_uniform([self.vocab_size], -0.1, 0.1))

        self.dec_outputs, self.dec_state = rnn_decoder(
            self.dec_inp, encoder_state, self.dec_cell, self.Ws, self.bs,
            vocab_size, batch_size, self.memory_dim)

        self.labels = [
            tf.placeholder(tf.float32, [vocab_size, batch_size],
                           name='LABEL%i' % t) for t in range(seq_length)
        ]
        self.weights = [
            tf.ones_like(labels_t, dtype=tf.float32)
            for labels_t in self.labels
        ]
        self.loss = loss(self.labels, self.dec_outputs)

        self.train_op = tf.train.AdamOptimizer(1e-3).minimize(self.loss)
        self.sess.run(tf.initialize_all_variables())
Пример #10
0
    def encoder(self):
        with tf.variable_scope("encoder",
                               reuse=tf.AUTO_REUSE) as encoder_scope:
            encoder_inputs_2d = tf.reshape(
                self.encoder_inputs,
                [self.batch_size * self.max_time, self.input_dim])
            encoder_cell_inputs = tf.layers.dense(inputs=encoder_inputs_2d,
                                                  units=self.hidden_size,
                                                  activation=tf.nn.relu)
            encoder_cell_inputs_3d = tf.reshape(
                encoder_cell_inputs,
                [self.batch_size, self.max_time, self.hidden_size])

            encoder_fw_cells = []
            encoder_bw_cells = []
            keep_prob = self.output_keep_prob
            for i in range(self.num_layers):
                with tf.variable_scope('encoder_lstm_{}'.format(i)):
                    cell = tf.contrib.rnn.GLSTMCell(self.hidden_size)
                    #keep_prob+= self.output_keep_prob * ( i*1.0 / float(self.num_layers))
                    #cell=rnn_cell.DropoutWrapper(cell=cell, input_keep_prob=1.0, output_keep_prob=self.output_keep_prob)
                    encoder_fw_cells.append(cell)
                    encoder_bw_cells.append(cell)
            encoder_muti_fw_cell = rnn_cell.MultiRNNCell(encoder_fw_cells)
            encoder_muti_bw_cell = rnn_cell.MultiRNNCell(encoder_bw_cells)

            (encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state) = \
                tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_muti_fw_cell,
                                                cell_bw=encoder_muti_bw_cell,
                                                inputs=encoder_cell_inputs_3d,
                                                sequence_length=self.sequence_length,
                                                dtype=tf.float32, time_major=False)

            encoder_outputs = tf.concat(
                (encoder_fw_outputs, encoder_bw_outputs), 2)

            self.encoder_final_state = []
            for i in range(self.num_layers):
                encoder_final_state_c = tf.concat(
                    (encoder_fw_final_state[i].c, encoder_bw_final_state[i].c),
                    1)

                encoder_final_state_h = tf.concat(
                    (encoder_fw_final_state[i].h, encoder_bw_final_state[i].h),
                    1)

                encoder_final_state = LSTMStateTuple(c=encoder_final_state_c,
                                                     h=encoder_final_state_h)
                self.encoder_final_state.append(encoder_final_state)
            return encoder_outputs, encoder_bw_final_state
Пример #11
0
def BRNN(x, weight, bias):
    cell1_fw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True)
    cell2_fw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True)
    cell_fw = rnn_cell.MultiRNNCell([cell1_fw, cell2_fw])

    cell1_bw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True)
    cell2_bw = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True)
    cell_bw = rnn_cell.MultiRNNCell([cell1_bw, cell2_bw])

    output, out_states = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, x, dtype = tf.float32)
    # print(output[-1].get_shape().as_list())
    output = tf.transpose(output[-1], [1, 0, 2])
    last = tf.gather(output, int(output.get_shape()[0]) - 1)
    return tf.nn.softmax(tf.matmul(last, weight) + bias, name="pred")
Пример #12
0
	def __init__(self, rnn_size, rnn_layer, batch_size, input_embedding_size, dim_image, dim_hidden, max_words_q, vocabulary_size, drop_out_rate):
		self.rnn_size = rnn_size
		self.rnn_layer = rnn_layer
		self.batch_size = batch_size
		self.input_embedding_size = input_embedding_size
		self.dim_image = dim_image
		self.dim_hidden = dim_hidden
		self.max_words_q = max_words_q
		self.vocabulary_size = vocabulary_size
		self.drop_out_rate = drop_out_rate

		# Before-LSTM-embedding
		self.embed_BLSTM_Q_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_BLSTM_Q_W')
		self.embed_BLSTM_A_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_BLSTM_A_W')

		# encoder: RNN body
		self.lstm_1_q = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True,state_is_tuple=False)
		self.lstm_dropout_1_q = rnn_cell.DropoutWrapper(self.lstm_1_q, output_keep_prob = 1 - self.drop_out_rate)
		self.lstm_2_q = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True,state_is_tuple=False)
		self.lstm_dropout_2_q = rnn_cell.DropoutWrapper(self.lstm_2_q, output_keep_prob = 1 - self.drop_out_rate)
		self.stacked_lstm_q = rnn_cell.MultiRNNCell([self.lstm_dropout_1_q, self.lstm_dropout_2_q],state_is_tuple=False)

		self.lstm_1_a = rnn_cell.LSTMCell(rnn_size, input_embedding_size, use_peepholes=True,state_is_tuple=False)
		self.lstm_dropout_1_a = rnn_cell.DropoutWrapper(self.lstm_1_a, output_keep_prob = 1 - self.drop_out_rate)
		self.lstm_2_a = rnn_cell.LSTMCell(rnn_size, rnn_size, use_peepholes=True,state_is_tuple=False)
		self.lstm_dropout_2_a = rnn_cell.DropoutWrapper(self.lstm_2_a, output_keep_prob = 1 - self.drop_out_rate)
		self.stacked_lstm_a = rnn_cell.MultiRNNCell([self.lstm_dropout_1_a, self.lstm_dropout_2_a],state_is_tuple=False)

		# question-embedding W1
		self.embed_Q_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_Q_W')
		self.embed_Q_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_Q_b')
		
		# Answer-embedding W3
		self.embed_A_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_A_W')
		self.embed_A_b = tf.Variable(tf.random_uniform([self.dim_hidden], -0.08, 0.08), name='embed_A_b')

		# image-embedding W2
		self.embed_image_W = tf.Variable(tf.random_uniform([dim_image, self.dim_hidden], -0.08, 0.08), name='embed_image_W')
		self.embed_image_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_image_b')

		# score-embedding W4
		#self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W')
		#self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b')
		self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W')
		self.embed_scor_b = tf.Variable(tf.random_uniform([num_output], -0.08, 0.08), name='embed_scor_b')

		# QI-embedding W3
		self.embed_QI_W = tf.Variable(tf.random_uniform([dim_hidden, dim_hidden], -0.08, 0.08), name='embed_QI_W')
		self.embed_QI_b = tf.Variable(tf.random_uniform([dim_hidden], -0.08, 0.08), name='embed_QI_b')
Пример #13
0
        def h_rnn(input):
            i = 0
            num_layer = 0
            layer = [input]
            while True:
                print(num_layer)
                layer.append([])
                _input = layer[num_layer]
                length = len(_input)
                with tf.variable_scope("RNN_" + str(num_layer)) as scope:
                    cell = rnn_cell.BasicLSTMCell(self.dim)
                    cell = rnn_cell.DropoutWrapper(
                        cell, output_keep_prob=self.keep_prob)
                    stacked_cell = rnn_cell.MultiRNNCell([cell] *
                                                         self.number_of_layers)
                    i = 0
                    while i < length:
                        state = _rnn(stacked_cell,
                                     _input[i:min(i + self.seg_len, length)])
                        layer[num_layer + 1].append(state)
                        scope.reuse_variables()
                        i += self.seg_len
                num_layer += 1
                if length <= self.seg_len:
                    break

            return layer[num_layer][0]
Пример #14
0
def model():

    x = tf.transpose(covariates, [1, 0, 2])
    x = tf.reshape(covariates, [-1, N])
    x = tf.split(0, datalen, x)

    lstm_cell = rnn_cell.BasicLSTMCell(rnn_size, state_is_tuple=True)
    #DCell = rnn_cell.DropoutWrapper(lstm_cell,output_keep_prob=0.8)
    multi_cell = rnn_cell.MultiRNNCell([lstm_cell] * 2, state_is_tuple=True)
    #init_state = multi_cell.zero_state(1,tf.float32)

    #outputs,states = rnn.rnn(multi_cell,x,dtype=tf.float32)
    outputs, _ = rnn.dynamic_rnn(multi_cell, x, dtype=tf.float32)

    tsize = int(outputs.get_shape()[0])

    #last = tf.gather(outputs,int(outputs.get_shape()[0])-1)

    #output = tf.matmul(outputs[-1],weights) + biases
    #output = tf.matmul(last,weights) + biases
    #output  = [tf.matmul(tf.gather(outputs,i),weights)+biases for i in range(tsize)]
    output = tf.batch_matmul(outputs, weights) + biases
    output = tf.transpose(output, [1, 0, 2])

    return output
    def benchmarkTfRNNLSTMBlockCellTraining(self):
        test_configs = self._GetTestConfig()
        for config_name, config in test_configs.items():
            num_layers = config["num_layers"]
            num_units = config["num_units"]
            batch_size = config["batch_size"]
            seq_length = config["seq_length"]

            with ops.Graph().as_default(), ops.device("/gpu:0"):
                inputs = seq_length * [
                    array_ops.zeros([batch_size, num_units], dtypes.float32)
                ]
                cell = lambda: lstm_ops.LSTMBlockCell(num_units=num_units)  # pylint: disable=cell-var-from-loop

                multi_cell = rnn_cell.MultiRNNCell(
                    [cell() for _ in range(num_layers)])
                outputs, final_state = core_rnn.static_rnn(
                    multi_cell, inputs, dtype=dtypes.float32)
                trainable_variables = ops.get_collection(
                    ops.GraphKeys.TRAINABLE_VARIABLES)
                gradients = gradients_impl.gradients([outputs, final_state],
                                                     trainable_variables)
                training_op = control_flow_ops.group(*gradients)
                self._BenchmarkOp(
                    training_op, "tf_rnn_lstm_block_cell %s %s" %
                    (config_name, self._GetConfigDesc(config)))
    def benchmarkTfRNNLSTMTraining(self):
        test_configs = self._GetTestConfig()
        for config_name, config in test_configs.items():
            num_layers = config["num_layers"]
            num_units = config["num_units"]
            batch_size = config["batch_size"]
            seq_length = config["seq_length"]

            with ops.Graph().as_default(), ops.device("/gpu:0"):
                inputs = seq_length * [
                    array_ops.zeros([batch_size, num_units], dtypes.float32)
                ]
                initializer = init_ops.random_uniform_initializer(-0.01,
                                                                  0.01,
                                                                  seed=127)

                cell = rnn_cell.LSTMCell(num_units=num_units,
                                         initializer=initializer,
                                         state_is_tuple=True)
                multi_cell = rnn_cell.MultiRNNCell(
                    [cell() for _ in range(num_layers)])
                outputs, final_state = core_rnn.static_rnn(
                    multi_cell, inputs, dtype=dtypes.float32)
                trainable_variables = ops.get_collection(
                    ops.GraphKeys.TRAINABLE_VARIABLES)
                gradients = gradients_impl.gradients([outputs, final_state],
                                                     trainable_variables)
                training_op = control_flow_ops.group(*gradients)
                self._BenchmarkOp(
                    training_op, "tf_rnn_lstm %s %s" %
                    (config_name, self._GetConfigDesc(config)))
Пример #17
0
def RNN(x):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

    x = tf.transpose(x, [1, 0, 2])
    # Reshaping to (n_steps*batch_size, n_input)
    x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    x = tf.split(0, n_steps, x)
    # Define a lstm cell with tensorflow
    lstm_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)
    cell = rnn_cell.MultiRNNCell([lstm_cell] * 2)
    # Get lstm cell output
    outputs, states = rnn.rnn(cell, x, dtype=tf.float32)
    weights_out = tf.get_variable(
        name="weights_out",
        shape=[n_hidden, n_classes],
        initializer=tf.truncated_normal_initializer())
    biases_out = tf.get_variable(name="biases_out",
                                 shape=[n_classes],
                                 initializer=tf.truncated_normal_initializer())
    # Linear activation, using rnn inner loop last output
    return tf.sigmoid(tf.matmul(outputs[-1], weights_out) + biases_out)
Пример #18
0
    def _create_encoder(self, args):
        # Create LSTM portion of network
        lstm = rnn_cell.LSTMCell(args.encoder_size,
                                 state_is_tuple=True,
                                 initializer=initializers.xavier_initializer())
        self.full_lstm = rnn_cell.MultiRNNCell([lstm] *
                                               args.num_encoder_layers,
                                               state_is_tuple=True)
        self.lstm_state = self.full_lstm.zero_state(args.batch_size,
                                                    tf.float32)

        # Forward pass
        encoder_input = tf.concat(1, [self.states_encode, self.actions_encode])
        output, self.final_state = seq2seq.rnn_decoder([encoder_input],
                                                       self.lstm_state,
                                                       self.full_lstm)
        output = tf.reshape(tf.concat(1, output), [-1, args.encoder_size])

        # Fully connected layer to latent variable distribution parameters
        W = tf.get_variable("latent_w", [args.encoder_size, 2 * args.z_dim],
                            initializer=initializers.xavier_initializer())
        b = tf.get_variable("latent_b", [2 * args.z_dim])
        logits = tf.nn.xw_plus_b(output, W, b)

        # Separate into mean and logstd
        self.z_mean, self.z_logstd = tf.split(1, 2, logits)
Пример #19
0
def RNN(x, weights, biases):
    x = tf.reshape(x, [-1, RNN_IN_DIMENS])
    x = tf.split(0, SEQUENCE_LENGTH, x)
    lstm_cell = rnn_cell.BasicLSTMCell(RNN_NEURONS, forget_bias = 1.0, state_is_tuple=False)
    stacked_lstm = rnn_cell.MultiRNNCell([lstm_cell] * RNN_LAYERS, state_is_tuple=False)
    outputs, states = rnn.rnn(stacked_lstm, x, dtype=tf.float32)
    return (outputs, states, tf.matmul(outputs[-1], weights['out']) + biases['out'])
Пример #20
0
    def model(self):
        """
        Builds the Tensorflow graph
        :return:
        """
        print('Building model\n')
        # We don't want to modify to original tensor
        x = self.x
        # Reshape input into a list of tensors of the correct size
        x = tf.transpose(x, [1, 0, 2])
        x = tf.reshape(x, [-1, INPUT_SIZE])
        # Since we're using one pixel at a time, transform list of vector of
        # 784x1
        x = tf.split(0, STEPS, x)

        # Define LSTM cells and get outputs list and states
        gru = rnn_cell.GRUCell(self.num_hid_units)
        gru = rnn_cell.DropoutWrapper(gru, output_keep_prob=1)
        gru = rnn_cell.MultiRNNCell([gru] * self.num_hid_layers)
        outputs, state = rnn.rnn(gru, x, dtype=tf.float32)

        # First affine-transformation - output from last input
        y1 = tf.matmul(outputs[-1], self.weights_H2L) + self.bias_H2L
        y2 = tf.nn.relu(y1)
        y_pred = tf.matmul(y2, self.weights_L2O) + self.bias_L2O

        return y_pred
Пример #21
0
def RNN(x, is_training, weights, biases):
    x = tf.transpose(x, [1, 0, 2])
    x = tf.reshape(x, [-1, n_input])
    x = tf.split(0, n_time_step, x)

    lstm_cell_1 = rnn_cell.LSTMCell(n_hidden_1, forget_bias=0.8)
    lstm_cell_2 = rnn_cell.LSTMCell(n_hidden_2, forget_bias=0.8)

    if is_training and keep_prob < 1:
        lstm_cell_1 = rnn_cell.DropoutWrapper(lstm_cell_1,
                                              output_keep_prob=keep_prob)
        lstm_cell_2 = rnn_cell.DropoutWrapper(lstm_cell_2,
                                              output_keep_prob=keep_prob)

    cell = rnn_cell.MultiRNNCell([lstm_cell_1, lstm_cell_2])

    #if is_training and keep_prob < 1:
    #    x = tf.nn.dropout(x,keep_prob)

    #initial_state = cell.zero_state(batch_size,tf.float32)
    #state = initial_state
    output = []
    output, states = rnn.rnn(cell, x, dtype=tf.float32)
    #outputs = tf.reshape(tf.concat(1,output),[-1,n_hidden_2])
    #maybe a softmax
    return tf.matmul(output[-1], weights['out']) + biases['out']
Пример #22
0
    def _create_lstm_policy(self, args):
        # Create LSTM portion of network
        lstm = rnn_cell.LSTMCell(args.policy_size,
                                 state_is_tuple=True,
                                 initializer=initializers.xavier_initializer())
        self.full_lstm = rnn_cell.MultiRNNCell([lstm] * args.num_policy_layers,
                                               state_is_tuple=True)
        self.lstm_state = self.full_lstm.zero_state(args.batch_size,
                                                    tf.float32)

        # Forward pass
        policy_input = self.states
        output, self.final_state = seq2seq.rnn_decoder([policy_input],
                                                       self.lstm_state,
                                                       self.full_lstm)
        output = tf.reshape(tf.concat(1, output), [-1, args.policy_size])

        # Fully connected layer to latent variable distribution parameters
        W = tf.get_variable("lstm_w", [args.policy_size, args.action_dim],
                            initializer=initializers.xavier_initializer())
        b = tf.get_variable("lstm_b", [args.action_dim])
        self.a_mean = tf.nn.xw_plus_b(output, W, b)

        # Initialize logstd
        self.a_logstd = tf.Variable(np.zeros(args.action_dim),
                                    name="a_logstd",
                                    dtype=tf.float32)
Пример #23
0
    def RNN(x, weights, biases, type, layer_norm):

        # Prepare data shape to match `rnn` function requirements
        # Current data input shape: (batch_size, n_steps, n_input)
        # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

        # Permuting batch_size and n_steps
        x = tf.transpose(x, [1, 0, 2])
        # Reshaping to (n_steps*batch_size, n_input)
        x = tf.reshape(x, [-1, n_input])
        # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
        x = tf.split(0, n_steps, x)

        # Define a lstm cell with tensorflow
        cell_class_map = {
             "LSTM": rnn_cell.BasicLSTMCell(n_hidden),
             "GRU": rnn_cell.GRUCell(n_hidden),
             "BasicRNN": rnn_cell.BasicRNNCell(n_hidden),
             "LNGRU": LNGRUCell(n_hidden),
             "LNLSTM": LNBasicLSTMCell(n_hidden),
            'HyperLnLSTMCell':HyperLnLSTMCell(n_hidden, is_layer_norm = layer_norm)}

        lstm_cell = cell_class_map.get(type)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * FLAGS.layers)
        print "Using %s model" % type
        # Get lstm cell output
        outputs, states = rnn.rnn(cell, x, dtype=tf.float32)

        # Linear activation, using rnn inner loop last output
        return tf.matmul(outputs[-1], weights['out']) + biases['out']
Пример #24
0
def RNN(x):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

    x = tf.transpose(x, [1, 0, 2])
    # Reshaping to (n_steps*batch_size, n_input)
    x = tf.reshape(x, [-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    x = tf.split(0, n_steps, x)

    # Define a lstm cell with tensorflow
    lstm_cell = tf.nn.rnn_cell.LSTMCell(n_hidden,
                                        forget_bias=1.0,
                                        state_is_tuple=True)
    cell = rnn_cell.MultiRNNCell([lstm_cell] * 3, state_is_tuple=True)
    # Get lstm cell output
    outputs, states = rnn.rnn(cell, x, dtype=tf.float32)

    weights_2 = tf.get_variable(name="weights_2", shape=[n_hidden, 2],\
            initializer=tf.truncated_normal_initializer())
    biases_2 = tf.get_variable(name="biases_2", shape=[2],\
            initializer=tf.truncated_normal_initializer())
    weights_1 = tf.get_variable(name="weights_1", shape=[2, 1],\
            initializer=tf.truncated_normal_initializer())
    biases_1 = tf.get_variable(name="biases_1", shape=[1],\
            initializer=tf.truncated_normal_initializer())

    drawing_layer = tf.sigmoid(tf.matmul(outputs[-1], weights_2) + biases_2)

    # Linear activation, using rnn inner loop last output
    return tf.sigmoid(tf.matmul(drawing_layer, weights_1) +
                      biases_1), drawing_layer
Пример #25
0
 def testLSTMBlockCell(self):
     with self.session(use_gpu=True, graph=ops.Graph()) as sess:
         with variable_scope.variable_scope(
                 "root", initializer=init_ops.constant_initializer(0.5)):
             x = array_ops.zeros([1, 2])
             m0 = array_ops.zeros([1, 2])
             m1 = array_ops.zeros([1, 2])
             m2 = array_ops.zeros([1, 2])
             m3 = array_ops.zeros([1, 2])
             g, ((out_m0, out_m1),
                 (out_m2, out_m3)) = rnn_cell.MultiRNNCell(
                     [lstm_ops.LSTMBlockCell(2) for _ in range(2)],
                     state_is_tuple=True)(x, ((m0, m1), (m2, m3)))
             sess.run([variables.global_variables_initializer()])
             res = sess.run(
                 [g, out_m0, out_m1, out_m2, out_m3], {
                     x.name: np.array([[1., 1.]]),
                     m0.name: 0.1 * np.ones([1, 2]),
                     m1.name: 0.1 * np.ones([1, 2]),
                     m2.name: 0.1 * np.ones([1, 2]),
                     m3.name: 0.1 * np.ones([1, 2])
                 })
             self.assertEqual(len(res), 5)
             self.assertAllClose(res[0], [[0.24024698, 0.24024698]])
             # These numbers are from testBasicLSTMCell and only test c/h.
             self.assertAllClose(res[1], [[0.68967271, 0.68967271]])
             self.assertAllClose(res[2], [[0.44848421, 0.44848421]])
             self.assertAllClose(res[3], [[0.39897051, 0.39897051]])
             self.assertAllClose(res[4], [[0.24024698, 0.24024698]])
def RNN(x, weight, bias):
    cell = rnn_cell.LSTMCell(n_hidden, state_is_tuple=True)
    cell = rnn_cell.MultiRNNCell([cell] * 2)
    output, state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32)
    output = tf.transpose(output, [1, 0, 2])
    last = tf.gather(output, int(output.get_shape()[0]) - 1)
    return tf.nn.softmax(tf.matmul(last, weight) + bias)
Пример #27
0
    def _build_graph(self, input_vars):
        input, nextinput = input_vars

        cell = rnn_cell.BasicLSTMCell(num_units=param.rnn_size)
        cell = rnn_cell.MultiRNNCell([cell] * param.num_rnn_layer)

        self.initial = initial = cell.zero_state(
            tf.shape(input)[0], tf.float32)

        embeddingW = tf.get_variable('embedding',
                                     [param.vocab_size, param.rnn_size])
        input_feature = tf.nn.embedding_lookup(embeddingW,
                                               input)  # B x seqlen x rnnsize

        input_list = tf.split(1, param.seq_len,
                              input_feature)  #seqlen x (Bx1xrnnsize)
        input_list = [tf.squeeze(x, [1]) for x in input_list]

        # seqlen is 1 in inference. don't need loop_function
        outputs, last_state = rnn.rnn(cell, input_list, initial, scope='rnnlm')
        self.last_state = tf.identity(last_state, 'last_state')

        # seqlen x (Bxrnnsize)
        output = tf.reshape(tf.concat(1, outputs),
                            [-1, param.rnn_size])  # (Bxseqlen) x rnnsize
        logits = FullyConnected('fc', output, param.vocab_size, nl=tf.identity)
        self.prob = tf.nn.softmax(logits / param.softmax_temprature)

        xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits, symbolic_functions.flatten(nextinput))
        self.cost = tf.reduce_mean(xent_loss, name='cost')
        summary.add_param_summary([('.*/W', ['histogram'])
                                   ])  # monitor histogram of all W
    def model(self):

        print('Building model\n')
        # We don't want to modify to original tensor
        x = self.x
        # Reshape input into a list of tensors of the correct size
        x = tf.transpose(x, [1, 0, 2])
        x = tf.reshape(x, [-1, INPUT_SIZE])
        # Since we're using one pixel at a time, transform list of vector of
        # 784x1
        x = tf.split(0, STEPS, x)

        # Define LSTM cells and get outputs list and states
        gru = rnn_cell.GRUCell(self.num_hid_units)
        gru = rnn_cell.DropoutWrapper(gru, output_keep_prob=1)
        if self.num_hid_layers > 1:
            gru = rnn_cell.MultiRNNCell([gru] * self.num_hid_layers)
        outputs, state = rnn.rnn(gru, x, dtype=tf.float32)

        # Turn result back into [batch_size, steps, hidden_units] format.
        outputs = tf.transpose(outputs, [1, 0, 2])
        # Flatten into [batch_size x steps, hidden_units] to allow matrix
        # multiplication
        outputs = tf.reshape(outputs, [-1, self.num_hid_units])

        # Apply affine transformation to reshape output [batch_size x steps, 1]
        y1 = tf.matmul(outputs, self.weights_H2O) + self.bias_H2O
        y1 = tf.reshape(y1, [-1, STEPS])
        # Keep prediction (sigmoid applied) and non-sigmoid (apply sigmoid in
        #  cost function)
        y_ns = y1[:, :783]
        y_pred = tf.sigmoid(y1)[:, :783]

        return y_ns, y_pred
Пример #29
0
def _get_rnn_cell(cell_type, num_units, num_layers):
    """Constructs and return an `RNNCell`.

  Args:
    cell_type: either a string identifying the `RNNCell` type, or a subclass of
      `RNNCell`.
    num_units: the number of units in the `RNNCell`.
    num_layers: the number of layers in the RNN.
  Returns:
    An initialized `RNNCell`.
  Raises:
    ValueError: `cell_type` is an invalid `RNNCell` name.
    TypeError: `cell_type` is not a string or a subclass of `RNNCell`.
  """
    if isinstance(cell_type, str):
        cell_type = _CELL_TYPES.get(cell_type)
        if cell_type is None:
            raise ValueError('The supported cell types are {}; got {}'.format(
                list(_CELL_TYPES.keys()), cell_type))
    if not issubclass(cell_type, rnn_cell.RNNCell):
        raise TypeError(
            'cell_type must be a subclass of RNNCell or one of {}.'.format(
                list(_CELL_TYPES.keys())))
    cell = cell_type(num_units=num_units)
    if num_layers > 1:
        cell = rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
    return cell
Пример #30
0
    def __init__(self, rnn_size, num_layers, batch_size, seq_length, vocab_size, grad_clip,\
                         infer=False):
        """
        Constructor for an RNN using LSTMs.
        @param rnn_size: The size of the RNN
        @param num_layers: The number of layers for the RNN to have
        @param batch_size: The batch size to train with
        @param seq_length: The length of the sequences to use in training
        @param vocab_size: The size of the vocab
        @param grad_clip: The point at which to clip the gradient in the gradient descent
        @param infer:
        """
        #TODO: During training, (and when sampling), the input to the RNN should be
        #      the list of ingredients that goes with that recipe text.
        if infer:
            batch_size = 1
            seq_length = 1

        cell_fn = rnn_cell.GRUCell  #BasicLSTMCell
        cell = cell_fn(rnn_size)
        self.cell = cell = rnn_cell.MultiRNNCell([cell] * num_layers)

        self.input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.targets = tf.placeholder(tf.int32, [batch_size, seq_length])
        self.initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.variable_scope("rnnlm"):
            softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
            softmax_b = tf.get_variable("softmax_b", [vocab_size])
            with (tf.device("/cpu:0")):
                embedding = tf.get_variable("embedding",
                                            [vocab_size, rnn_size])
                inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(\
                                                    embedding, self.input_data))
                inputs = [tf.squeeze(inp, [1]) for inp in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        loop_func = loop if infer else None
        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state,\
                                        cell, loop_function=loop_func, scope="rnnlm")
        output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b

        self.probs = tf.nn.softmax(self.logits)

        loss = seq2seq.sequence_loss_by_example([self.logits],\
                            [tf.reshape(self.targets, [-1])],\
                            [tf.ones([batch_size * seq_length])], vocab_size)
        self.cost = tf.reduce_sum(loss) / batch_size / seq_length
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))