def __load_model(self, num_layers): # Initial memory value for recurrence. self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim)) # choose RNN/GRU/LSTM cell with tf.variable_scope("forward"): fw_single_cell = rnn_cell.GRUCell(self.memory_dim) # Stacks layers of RNN's to form a stacked decoder self.forward_cell = rnn_cell.MultiRNNCell([fw_single_cell] * num_layers) with tf.variable_scope("backward"): bw_single_cell = rnn_cell.GRUCell(self.memory_dim) # Stacks layers of RNN's to form a stacked decoder self.backward_cell = rnn_cell.MultiRNNCell([bw_single_cell] * num_layers) # embedding model if not self.attention: with tf.variable_scope("forward"): self.dec_outputs_fwd, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("forward", reuse=True): self.dec_outputs_fwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) with tf.variable_scope("backward"): self.dec_outputs_bwd, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("backward", reuse=True): self.dec_outputs_bwd_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) else: with tf.variable_scope("forward"): self.dec_outputs_fwd, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("forward", reuse=True): self.dec_outputs_fwd_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_fwd, self.dec_inp, self.forward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) with tf.variable_scope("backward"): self.dec_outputs_bwd, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("backward", reuse=True): self.dec_outputs_bwd_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp_bwd, self.dec_inp, self.backward_cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
def char_rnn_model(X, y): byte_list = skflow.ops.one_hot_matrix(X, 256) byte_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, byte_list) cell = rnn_cell.GRUCell(HIDDEN_SIZE) #cell = rnn_cell.BasicLSTMCell(HIDDEN_SIZE) _, encoding = rnn.rnn(cell, byte_list, dtype=tf.float32) return skflow.models.logistic_regression(encoding, y)
def RNN(X, num_words_in_X, hidden_size, input_vector_size, max_input_size): """ Passes the input data through an RNN and outputs the final states. X: Input is a MAX_INPUT_LENGTH X BATCH_SIZE X WORD_VECTOR_LENGTH matrix num_words_in_X: Number of words in X, which is needed because X is zero padded hidden_size: The dimensionality of the hidden layer of the RNN input_vector_size: This is the dimensionality of each input vector, in this case it is WORD_VECTOR_LENGTH max_input_size: This is the max number of input vectors that can be passed in to the RNN. """ # Split X into a list of tensors of length max_input_size where each tensor is a BATCH_SIZE x input_vector_size vector X = tf.split(0, max_input_size, X) squeezed = [] for i in range(len(X)): squeezed.append(tf.squeeze(X[i])) gru_cell = rnn_cell.GRUCell(num_units=hidden_size, input_size=input_vector_size) output, state = rnn.rnn(gru_cell, squeezed, sequence_length=num_words_in_X, dtype=tf.float32) return output, state
def testEmbeddingAttentionDecoder(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): inp = [tf.constant(0.5, shape=[2, 2]) for _ in xrange(2)] cell = rnn_cell.GRUCell(2) enc_outputs, enc_states = rnn.rnn(cell, inp, dtype=tf.float32) attn_states = tf.concat(1, [ tf.reshape(e, [-1, 1, cell.output_size]) for e in enc_outputs ]) dec_inp = [ tf.constant(i, tf.int32, shape=[2]) for i in xrange(3) ] dec, mem = seq2seq.embedding_attention_decoder(dec_inp, enc_states[-1], attn_states, cell, 4, output_size=3) sess.run([tf.initialize_all_variables()]) res = sess.run(dec) self.assertEqual(len(res), 3) self.assertEqual(res[0].shape, (2, 3)) res = sess.run(mem) self.assertEqual(len(res), 4) self.assertEqual(res[0].shape, (2, 2))
def testRNNDecoder(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): inp = [tf.constant(0.5, shape=[2, 2]) for _ in xrange(2)] _, enc_states = rnn.rnn(rnn_cell.GRUCell(2), inp, dtype=tf.float32) dec_inp = [tf.constant(0.4, shape=[2, 2]) for _ in xrange(3)] cell = rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(2), 4) dec, mem = seq2seq.rnn_decoder(dec_inp, enc_states[-1], cell) sess.run([tf.initialize_all_variables()]) res = sess.run(dec) self.assertEqual(len(res), 3) self.assertEqual(res[0].shape, (2, 4)) res = sess.run(mem) self.assertEqual(len(res), 4) self.assertEqual(res[0].shape, (2, 2))
def rnn_model(X, y): word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words') word_list = [tf.squeeze(w, [1]) for w in tf.split(1, MAX_DOCUMENT_LENGTH, word_vectors)] cell = rnn_cell.GRUCell(EMBEDDING_SIZE) _, encoding = rnn.rnn(cell, word_list, dtype=tf.float32) return skflow.models.logistic_regression(encoding[-1], y)
def RNN(X, num_words_in_X, hidden_size, max_input_size): # Reshape `X` as a vector. -1 means "set this dimension automatically". X_as_vector = tf.reshape(X, [-1]) # Create another vector containing zeroes to pad `X` to (MAX_INPUT_LENGTH * WORD_VECTOR_LENGTH) elements. zero_padding = tf.zeros([max_input_size * WORD_VECTOR_LENGTH] - tf.shape(X_as_vector), dtype=X.dtype) # Concatenate `X_as_vector` with the padding. X_padded_as_vector = tf.concat(0, [X_as_vector, zero_padding]) # Reshape the padded vector to the desired shape. X_padded = tf.reshape(X_padded_as_vector, [max_input_size, WORD_VECTOR_LENGTH]) # Split X into a list of tensors of length MAX_INPUT_LENGTH where each tensor is a 1xWORD_VECTOR_LENGTH vector # of the word vectors # TODO change input to be a list of tensors of length MAX_INPUT_LENGTH where each tensor is a BATCH_SIZExWORD_VECTOR_LENGTH vector X = tf.split(0, max_input_size, X_padded) print "Length X: {}".format(len(X)) gru_cell = rnn_cell.GRUCell(num_units=hidden_size, input_size=WORD_VECTOR_LENGTH) output, state = rnn.rnn(gru_cell, X, sequence_length=(num_words_in_X), dtype=tf.float32) print "State: {}".format(state) return output, state, X_padded
def final_state_of_rnn_over_embedded_sequence(idx, embedded_seq): with tf.variable_scope("rnn_%s" % idx): gru = rnn_cell.GRUCell(opts.hidden_dim) initial_state = gru.zero_state(opts.batch_size, tf.float32) outputs, _states = rnn.rnn(gru, embedded_seq, initial_state=initial_state) return outputs[-1]
def GRUSeq2Seq(enc_inp, dec_inp): cell = rnn_cell.MultiRNNCell([rnn_cell.GRUCell(24)] * 2) return seq2seq.embedding_attention_seq2seq( enc_inp, dec_inp, cell, classes, classes, output_projection=(w, b))
def __init__(self, params, emb_mat): self.params = params V, d, L, e = params.vocab_size, params.hidden_size, params.rnn_num_layers, params.word_size prev_size = e hidden_sizes = [d for _ in range(params.emb_num_layers)] for layer_idx in range(params.emb_num_layers): with tf.variable_scope("emb_%d" % layer_idx): cur_hidden_size = hidden_sizes[layer_idx] emb_mat = tf.tanh( my.nn.linear([V, prev_size], cur_hidden_size, emb_mat)) prev_size = cur_hidden_size self.emb_mat = emb_mat self.emb_hidden_sizes = [d for _ in range(params.emb_num_layers)] self.input_size = self.emb_hidden_sizes[ -1] if self.emb_hidden_sizes else e if params.lstm == 'basic': self.first_cell = my.rnn_cell.BasicLSTMCell( d, input_size=self.input_size, forget_bias=params.forget_bias) self.second_cell = my.rnn_cell.BasicLSTMCell( d, forget_bias=params.forget_bias) elif params.lstm == 'regular': self.first_cell = rnn_cell.LSTMCell(d, self.input_size, cell_clip=params.cell_clip) self.second_cell = rnn_cell.LSTMCell(d, d, cell_clip=params.cell_clip) elif params.lstm == 'gru': self.first_cell = rnn_cell.GRUCell(d, input_size=self.input_size) self.second_cell = rnn_cell.GRUCell(d) else: raise Exception() if params.train and params.keep_prob < 1.0: self.first_cell = tf.nn.rnn_cell.DropoutWrapper( self.first_cell, input_keep_prob=params.keep_prob, output_keep_prob=params.keep_prob) self.cell = rnn_cell.MultiRNNCell([self.first_cell] + [self.second_cell] * (L - 1)) self.scope = tf.get_variable_scope() self.used = False
def prediction(self): # Recurrent network. output, _ = rnn.dynamic_rnn( rnn_cell.GRUCell(self._num_hidden), data, dtype=tf.float32, sequence_length=self.length, ) last = self._last_relevant(output, self.length) # Softmax layer. weight, bias = self._weight_and_bias(self._num_hidden, int(self.target.get_shape()[1])) prediction = tf.nn.softmax(tf.matmul(last, weight) + bias) return prediction
def testGRUCell(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m = tf.zeros([1, 2]) g, _ = rnn_cell.GRUCell(2)(x, m) sess.run([tf.variables.initialize_all_variables()]) res = sess.run([g], { x.name: np.array([[1., 1.]]), m.name: np.array([[0.1, 0.1]]) }) # Smoke test self.assertAllClose(res[0], [[0.175991, 0.175991]])
def testTiedRNNSeq2Seq(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): inp = [tf.constant(0.5, shape=[2, 2]) for _ in xrange(2)] dec_inp = [tf.constant(0.4, shape=[2, 2]) for _ in xrange(3)] cell = rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(2), 4) dec, mem = seq2seq.tied_rnn_seq2seq(inp, dec_inp, cell) sess.run([tf.initialize_all_variables()]) res = sess.run(dec) self.assertEqual(len(res), 3) self.assertEqual(res[0].shape, (2, 4)) res = sess.run(mem) self.assertEqual(len(res), 4) self.assertEqual(res[0].shape, (2, 2))
def testEmbeddingWrapper(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 1], dtype=tf.int32) m = tf.zeros([1, 2]) g, new_m = rnn_cell.EmbeddingWrapper(rnn_cell.GRUCell(2), 3)(x, m) sess.run([tf.variables.initialize_all_variables()]) res = sess.run([g, new_m], { x.name: np.array([[1]]), m.name: np.array([[0.1, 0.1]]) }) self.assertEqual(res[1].shape, (1, 2)) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res[0], [[0.17139, 0.17139]])
def prediction(self): #运行结果给cost计算交叉熵或者计算error等损失函数 # Recurrent network. output, _ = rnn.dynamic_rnn( rnn_cell.GRUCell(self._num_hidden), data, dtype=tf.float32, sequence_length=self.length, ) #训练结束后,传进来一个序列进行预测时,dynamic_rnn的output要进行last_relevant last = self._last_relevant(output, self.length) # Softmax layer. weight, bias = self._weight_and_bias(self._num_hidden, int(self.target.get_shape()[1])) prediction = tf.nn.softmax(tf.matmul(last, weight) + bias) return prediction
def testMultiRNNCell(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m = tf.zeros([1, 4]) _, ml = rnn_cell.MultiRNNCell([rnn_cell.GRUCell(2)] * 2)(x, m) sess.run([tf.variables.initialize_all_variables()]) res = sess.run( ml, { x.name: np.array([[1., 1.]]), m.name: np.array([[0.1, 0.1, 0.1, 0.1]]) }) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res, [[0.175991, 0.175991, 0.13248, 0.13248]])
def prediction(self): # Recurrent network. network = rnn_cell.GRUCell(self._num_hidden) network = rnn_cell.DropoutWrapper( network, output_keep_prob=self.dropout) network = rnn_cell.MultiRNNCell([network] * self._num_layers) output, _ = rnn.dynamic_rnn(network, data, dtype=tf.float32) # Softmax layer. max_length = int(self.target.get_shape()[1]) num_classes = int(self.target.get_shape()[2]) weight, bias = self._weight_and_bias(self._num_hidden, num_classes) # Flatten to apply same weights to all time steps. output = tf.reshape(output, [-1, self._num_hidden]) prediction = tf.nn.softmax(tf.matmul(output, weight) + bias) prediction = tf.reshape(prediction, [-1, max_length, num_classes]) return prediction
def prediction(self): # Recurrent network. output, _ = rnn.dynamic_rnn( rnn_cell.GRUCell(self._num_hidden), self.data, dtype=tf.float32, sequence_length=self.length, ) # Softmax layer. max_length = int(self.target.get_shape()[1]) num_classes = int(self.target.get_shape()[2]) weight, bias = self._weight_and_bias(self._num_hidden, num_classes) # Flatten to apply same weights to all time steps. output = tf.reshape(output, [-1, self._num_hidden]) prediction = tf.nn.softmax(tf.matmul(output, weight) + bias) prediction = tf.reshape(prediction, [-1, max_length, num_classes]) return prediction
def testInputProjectionWrapper(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m = tf.zeros([1, 3]) cell = rnn_cell.InputProjectionWrapper(rnn_cell.GRUCell(3), 2) g, new_m = cell(x, m) sess.run([tf.variables.initialize_all_variables()]) res = sess.run( [g, new_m], { x.name: np.array([[1., 1.]]), m.name: np.array([[0.1, 0.1, 0.1]]) }) self.assertEqual(res[1].shape, (1, 3)) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res[0], [[0.154605, 0.154605, 0.154605]])
def RNN(self, scope): # input shape: (batch_size, step_size, input_dim) # we need to permute step_size and batch_size(change the position of step and batch size) data = tf.transpose(self.input_data, [1, 0, 2]) # Reshape to prepare input to hidden activation # (step_size*batch_size, n_input), flattens the batch and step #after the above transformation, data is now (step_size*batch_size, input_dim) data = tf.reshape(data, [-1, self.config.input_dim + 1]) with tf.variable_scope(str(scope)): data = tf.nn.dropout( tf.matmul(data, self.weights['hidden']) + self.biases['hidden'], self.config.dropout) # Define a lstm cell with tensorflow if self.config.cell_type == 'GRU': lstm_cell = rnn_cell.GRUCell(self.config.hidden_dim) else: lstm_cell = rnn_cell.LSTMCell( self.config.hidden_dim, forget_bias=self.config.forget_bias) # Split data because rnn cell needs a list of inputs for the RNN inner loop data = tf.split(0, self.config.step_size, data) # step_size * (batch_size, hidden_dim) # Get lstm cell output outputs, states = rnn.rnn(lstm_cell, data, initial_state=self.init_state) # we really just interested in the last state's output return [ tf.matmul(outputs[-1], self.weights['out1']) + self.biases['out1'], tf.matmul(outputs[-1], self.weights['out2']) + self.biases['out2'], tf.matmul(outputs[-1], self.weights['out3']) + self.biases['out3'], tf.matmul(outputs[-1], self.weights['out4']) + self.biases['out4'], tf.matmul(outputs[-1], self.weights['out5']) + self.biases['out5'] ]
def __init__(self, config): self.config = config self.vocab_size = vocab_size = config.vocab_size self.y_size = y_size = config.y_size self.batch_size = batch_size = config.batch_size self.steps = config.steps self.layers = layers = config.layers self.dim_ictx = dim_ictx = config.dim_ictx self.dim_iemb = dim_iemb = config.dim_iemb self.dim_wemb = dim_wemb = config.dim_wemb self.dim_hidden = dim_hidden = config.dim_hidden self.lr = tf.Variable(config.lr, trainable=False) rnn_type = config.rnn_type if rnn_type == 'gru': rnn_ = rnn_cell.GRUCell(dim_hidden) elif rnn_type == 'lstm': rnn_ = rnn_cell.BasicLSTMCell(dim_hidden) if layers is not None: self.my_rnn = my_rnn = rnn_cell.MultiRNNCell([rnn_] * layers) self.init_state = my_rnn.zero_state(batch_size, tf.float32) else: self.my_rnn = my_rnn = rnn_ self.init_state = tf.zeros([batch_size, my_rnn.state_size]) self.W_iemb = tf.get_variable("W_iemb", [dim_ictx, dim_iemb]) self.b_iemb = tf.get_variable("b_iemb", [dim_iemb]) with tf.device("/cpu:0"): self.W_wemb = tf.get_variable("W_wemb", [vocab_size, dim_wemb]) if config.is_birnn: # add 보다 concat이 더 잘나오는듯.. self.W_pred = tf.get_variable("W_pred", [dim_hidden * 2, y_size]) else: self.W_pred = tf.get_variable("W_pred", [dim_hidden, y_size]) self.b_pred = tf.get_variable("b_pred", [y_size])
def rnn_model(X, y): """Recurrent neural network model to predict from sequence of words to a class.""" # Convert indexes of words into embeddings. # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then # maps word indexes of the sequence into [batch_size, sequence_length, # EMBEDDING_SIZE]. word_vectors = skflow.ops.categorical_variable( X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words') # Split into list of embedding per word, while removing doc length dim. # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE]. word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors) # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE. cell = rnn_cell.GRUCell(EMBEDDING_SIZE) # Create an unrolled Recurrent Neural Networks to length of # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit. _, encoding = rnn.rnn(cell, word_list, dtype=tf.float32) # Given encoding of RNN, take encoding of last step (e.g hidden size of the # neural network of last step) and pass it as features for logistic # regression over output classes. return skflow.models.logistic_regression(encoding[-1], y)
def build(self): self.input_0 = tf.placeholder( tf.float32, [self.config.max_length_0_input, 1, self.config.embedding_size]) self.input_0_length = tf.placeholder(tf.int32) self.input_1 = tf.placeholder( tf.float32, [self.config.max_length_0_input, 1, self.config.embedding_size]) self.input_1_length = tf.placeholder(tf.int32) input_0 = array_ops.unpack(self.input_0) input_1 = array_ops.unpack(self.input_1) # bidirectional rnn cell = rnn_cell.GRUCell(self.config.embedding_size) initial_state_fw = array_ops.zeros(array_ops.pack([1, cell.state_size]), dtype=tf.float32) initial_state_fw.set_shape([1, cell.state_size]) initial_state_bw = array_ops.zeros(array_ops.pack([1, cell.state_size]), dtype=tf.float32) initial_state_bw.set_shape([1, cell.state_size]) states = bidirectional_rnn( cell, cell, input_0, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, dtype=tf.float32, # sequence_length=3 ) self.test = array_ops.pack(states)
def _init_neural_network(self): """Initializing the NN (building a TensorFlow graph and initializing session).""" # set TensorFlow random seed tf.set_random_seed(rnd.randint(-sys.maxint, sys.maxint)) # create placeholders for input & output (always batch-size * 1, list of up to num. steps) self.enc_inputs = [] self.enc_inputs_drop = [] for i in xrange(self.max_da_len): enc_input = tf.placeholder(tf.int32, [None], name=('enc_inp-%d' % i)) self.enc_inputs.append(enc_input) if self.dropout_keep_prob < 1: enc_input_drop = tf.nn.dropout(enc_input, self.dropout_keep_prob, name=('enc_inp-drop-%d' % i)) self.enc_inputs_drop.append(enc_input_drop) self.dec_inputs = [] for i in xrange(self.max_tree_len): self.dec_inputs.append( tf.placeholder(tf.int32, [None], name=('dec_inp-%d' % i))) # targets are just decoder inputs shifted by one (+pad with one empty spot) self.targets = [ self.dec_inputs[i + 1] for i in xrange(len(self.dec_inputs) - 1) ] self.targets.append( tf.placeholder(tf.int32, [None], name=('target-pad'))) # prepare cells self.initial_state = tf.placeholder(tf.float32, [None, self.emb_size]) if self.cell_type.startswith('gru'): self.cell = rnn_cell.GRUCell(self.emb_size) else: self.cell = rnn_cell.BasicLSTMCell(self.emb_size) if self.cell_type.endswith('/2'): self.cell = rnn_cell.MultiRNNCell([self.cell] * 2) # build the actual LSTM Seq2Seq network (for training and decoding) with tf.variable_scope(self.scope_name) as scope: rnn_func = embedding_rnn_seq2seq if self.nn_type == 'emb_attention_seq2seq': rnn_func = embedding_attention_seq2seq elif self.nn_type == 'emb_attention2_seq2seq': rnn_func = partial(embedding_attention_seq2seq, num_heads=2) elif self.nn_type == 'emb_attention_seq2seq_context': rnn_func = embedding_attention_seq2seq_context elif self.nn_type == 'emb_attention2_seq2seq_context': rnn_func = partial(embedding_attention_seq2seq_context, num_heads=2) # for training: feed_previous == False, using dropout if available # outputs = batch_size * num_decoder_symbols ~ i.e. output logits at each steps # states = cell states at each steps self.outputs, self.states = rnn_func( self.enc_inputs_drop if self.enc_inputs_drop else self.enc_inputs, self.dec_inputs, self.cell, self.da_dict_size, self.tree_dict_size, scope=scope) scope.reuse_variables() # for decoding: feed_previous == True self.dec_outputs, self.dec_states = rnn_func(self.enc_inputs, self.dec_inputs, self.cell, self.da_dict_size, self.tree_dict_size, feed_previous=True, scope=scope) # TODO use output projection ??? # target weights # TODO change to actual weights, zero after the end of tree ??? self.cost_weights = [ tf.ones_like(trg, tf.float32, name='cost_weights') for trg in self.targets ] # cost self.tf_cost = sequence_loss(self.outputs, self.targets, self.cost_weights, self.tree_dict_size) self.dec_cost = sequence_loss(self.dec_outputs, self.targets, self.cost_weights, self.tree_dict_size) if self.use_dec_cost: self.cost = 0.5 * (self.tf_cost + self.dec_cost) else: self.cost = self.tf_cost self.learning_rate = tf.placeholder(tf.float32, name="learning_rate") # optimizer (default to Adam) if self.optimizer_type == 'sgd': self.optimizer = tf.train.GradientDescentOptimizer( self.learning_rate) if self.optimizer_type == 'adagrad': self.optimizer = tf.train.AdagradOptimizer(self.learning_rate) else: self.optimizer = tf.train.AdamOptimizer(self.learning_rate) self.train_func = self.optimizer.minimize(self.cost) # initialize session session_config = None if self.max_cores: session_config = tf.ConfigProto( inter_op_parallelism_threads=self.max_cores, intra_op_parallelism_threads=self.max_cores) self.session = tf.Session(config=session_config) # this helps us load/save the model self.saver = tf.train.Saver(tf.all_variables())
def __init__(self, source_vocab_size, target_vocab_size, buckets, size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, use_lstm=False, num_samples=512, forward_only=False): """Create the model. Args: source_vocab_size: size of the source vocabulary. target_vocab_size: size of the target vocabulary. buckets: a list of pairs (I, O), where I specifies maximum input length that will be processed in that bucket, and O specifies maximum output length. Training instances that have inputs longer than I or outputs longer than O will be pushed to the next bucket and padded accordingly. We assume that the list is sorted, e.g., [(2, 4), (8, 16)]. size: number of units in each layer of the model. num_layers: number of layers in the model. max_gradient_norm: gradients will be clipped to maximally this norm. batch_size: the size of the batches used during training; the model construction is independent of batch_size, so it can be changed after initialization if this is convenient, e.g., for decoding. learning_rate: learning rate to start with. learning_rate_decay_factor: decay learning rate by this much when needed. use_lstm: if true, we use LSTM cells instead of GRU cells. num_samples: number of samples for sampled softmax. forward_only: if set, we do not construct the backward pass in the model. """ self.source_vocab_size = source_vocab_size self.target_vocab_size = target_vocab_size self.buckets = buckets self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) # If we use sampled softmax, we need an output projection. output_projection = None softmax_loss_function = None # Sampled softmax only makes sense if we sample less than vocabulary size. if num_samples > 0 and num_samples < self.target_vocab_size: with tf.device("/cpu:0"): w = tf.get_variable("proj_w", [size, self.target_vocab_size]) w_t = tf.transpose(w) b = tf.get_variable("proj_b", [self.target_vocab_size]) output_projection = (w, b) def sampled_loss(inputs, labels): with tf.device("/cpu:0"): labels = tf.reshape(labels, [-1, 1]) return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples, self.target_vocab_size) softmax_loss_function = sampled_loss # Create the internal multi-layer cell for our RNN. single_cell = rnn_cell.GRUCell(size) if use_lstm: single_cell = rnn_cell.BasicLSTMCell(size) cell = single_cell if num_layers > 1: cell = rnn_cell.MultiRNNCell([single_cell] * num_layers) # The seq2seq function: we use embedding for the input and attention. def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, source_vocab_size, target_vocab_size, output_projection=output_projection, feed_previous=do_decode) # Feeds for inputs. self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for i in xrange(buckets[-1][0]): # Last bucket is the biggest one. self.encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(buckets[-1][1] + 1): self.decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) self.target_weights.append( tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) # Our targets are decoder inputs shifted by one. targets = [ self.decoder_inputs[i + 1] for i in xrange(len(self.decoder_inputs) - 1) ] # Training outputs and losses. if forward_only: self.outputs, self.losses = seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True), softmax_loss_function=softmax_loss_function) # If we use output projection, we need to project outputs for decoding. if output_projection is not None: for b in xrange(len(buckets)): self.outputs[b] = [ tf.matmul(output, output_projection[0]) + output_projection[1] for output in self.outputs[b] ] else: self.outputs, self.losses = seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, False), softmax_loss_function=softmax_loss_function) # Gradients and SGD update operation for training the model. params = tf.trainable_variables() if not forward_only: self.gradient_norms = [] self.updates = [] opt = tf.train.GradientDescentOptimizer(self.learning_rate) for b in xrange(len(buckets)): gradients = tf.gradients(self.losses[b], params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.gradient_norms.append(norm) self.updates.append( opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step)) self.saver = tf.train.Saver(tf.all_variables())
def __init__(self, max_len, input_size, size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor): """Create the network. A simplified network that handles only sorting. Args: max_len: maximum length of the model. input_size: size of the inputs data. size: number of units in each layer of the model. num_layers: number of layers in the model. max_gradient_norm: gradients will be clipped to maximally this norm. batch_size: the size of the batches used during training; the model construction is independent of batch_size, so it can be changed after initialization if this is convenient, e.g., for decoding. learning_rate: learning rate to start with. learning_rate_decay_factor: decay learning rate by this much when needed. """ self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) cell = rnn_cell.GRUCell(size) if num_layers > 1: cell = rnn_cell.MultiRNNCell([single_cell] * num_layers) self.encoder_inputs = [] self.decoder_inputs = [] self.decoder_targets = [] self.target_weights = [] for i in range(max_len): self.encoder_inputs.append( tf.placeholder(tf.float32, [batch_size, input_size], name="EncoderInput%d" % i)) for i in range(max_len + 1): self.decoder_inputs.append( tf.placeholder(tf.float32, [batch_size, input_size], name="DecoderInput%d" % i)) self.decoder_targets.append( tf.placeholder(tf.float32, [batch_size, max_len + 1], name="DecoderTarget%d" % i)) # one hot self.target_weights.append( tf.placeholder(tf.float32, [batch_size, 1], name="TargetWeight%d" % i)) # Encoder # Need for attention encoder_outputs, final_state = rnn.rnn(cell, self.encoder_inputs, dtype=tf.float32) # Need a dummy output to point on it. End of decoding. encoder_outputs = [tf.zeros([FLAGS.batch_size, FLAGS.rnn_size]) ] + encoder_outputs # First calculate a concatenation of encoder outputs to put attention on. top_states = [ tf.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs ] attention_states = tf.concat(1, top_states) with tf.variable_scope("decoder"): outputs, states, _ = pointer_decoder(self.decoder_inputs, final_state, attention_states, cell) with tf.variable_scope("decoder", reuse=True): predictions, _, inps = pointer_decoder(self.decoder_inputs, final_state, attention_states, cell, feed_prev=True) self.predictions = predictions self.outputs = outputs self.inps = inps
def __init__(self, vocab_size, buckets_or_sentence_length, size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, model_type, use_lstm=True, num_samples=512, forward_only=False): """Create the model. This constructor can be used to created an embedded or embedded-attention, bucketed or non-bucketed model made of single or multi-layer RNN cells. Args: vocab_size: Size of the vocabulary. target_vocab_size: Size of the target vocabulary. buckets_or_sentence_length: If using buckets: A list of pairs (I, O), where I specifies maximum input length that will be processed in that bucket, and O specifies maximum output length. Training instances that have inputs longer than I or outputs longer than O will be pushed to the next bucket and padded accordingly. We assume that the list is sorted, e.g., [(2, 4), (8, 16)]. Else: Number of the maximum number of words per sentence. size: Number of units in each layer of the model. num_layers: Number of layers in the model. max_gradient_norm: Gradients will be clipped to maximally this norm. batch_size: The size of the batches used during training; the model construction is independent of batch_size, so it can be changed after initialization if this is convenient, e.g., for decoding. learning_rate: Learning rate to start with. learning_rate_decay_factor: Decay learning rate by this much when needed. num_samples: Number of samples for sampled softmax. forward_only: If set, we do not construct the backward pass in the model. """ # Need to determine if we're using buckets or not: if type(buckets_or_sentence_length) == list: self.buckets = buckets_or_sentence_length else: self.max_sentence_length = buckets_or_sentence_length self.vocab_size = vocab_size self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) # If we use sampled softmax, we need an output projection. output_projection = None softmax_loss_function = None # Sampled softmax only makes sense if we sample less than vocabulary size. if num_samples > 0 and num_samples < self.vocab_size: with tf.device("/cpu:0"): w = tf.get_variable("proj_w", [size, self.vocab_size]) w_t = tf.transpose(w) b = tf.get_variable("proj_b", [self.vocab_size]) output_projection = (w, b) def sampled_loss(inputs, labels): with tf.device("/cpu:0"): labels = tf.reshape(labels, [-1, 1]) return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples, self.vocab_size) softmax_loss_function = sampled_loss # Create the internal multi-layer cell for our RNN. single_cell = rnn_cell.GRUCell(size) if use_lstm: single_cell = rnn_cell.BasicLSTMCell(size) cell = single_cell #i, j, f, o = array_ops.split(1, 4, concat) if num_layers > 1: cell = rnn_cell.MultiRNNCell( [single_cell] * num_layers) #cur_inp, array_ops.concat(1, new_states) # The seq2seq function: we use embedding for the input and attention (if applicable). if model_type is 'embedding_attention': def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, vocab_size, vocab_size, output_projection=output_projection, feed_previous=do_decode) else: # just build embedding model, I should probably change this to throw an error def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return seq2seq.embedding_rnn_seq2seq( encoder_inputs, decoder_inputs, cell, vocab_size, vocab_size, output_projection=output_projection, feed_previous=do_decode) # Feeds for inputs. self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] # NOTE: If the model is not bucketed, these try blocks will throw an AttributeError and execute code to build a non-bucketed model. try: encoder_range = self.buckets[-1][0] decoder_range = self.buckets[-1][1] except AttributeError: encoder_range, decoder_range = self.max_sentence_length, self.max_sentence_length for i in xrange(encoder_range): # Last bucket is the biggest one. self.encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(decoder_range + 1): self.decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) self.target_weights.append( tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) # Our targets are decoder inputs shifted by one. targets = [ self.decoder_inputs[i + 1] for i in xrange(len(self.decoder_inputs) - 1) ] # Training outputs and losses. try: if forward_only: self.outputs, self.losses = seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, self.buckets, self.vocab_size, lambda x, y: seq2seq_f(x, y, True), softmax_loss_function=softmax_loss_function) # If we use output projection, we need to project outputs for decoding. if output_projection is not None: for b in xrange(len(self.buckets)): self.outputs[b] = [ tf.nn.xw_plus_b(output, output_projection[0], output_projection[1]) for output in self.outputs[b] ] else: self.outputs, self.losses = seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, self.buckets, self.vocab_size, lambda x, y: seq2seq_f(x, y, False), softmax_loss_function=softmax_loss_function) except AttributeError: if forward_only: self.outputs, self.states = seq2seq_f(self.encoder_inputs, self.decoder_inputs[:-1], True) self.losses = seq2seq.sequence_loss( self.outputs, targets, self.target_weights[:-1], self.vocab_size, softmax_loss_function=softmax_loss_function) # Project outputs for decoding if output_projection is not None: self.outputs = [ tf.nn.xw_plus_b(output, output_projection[0], output_projection[1]) for output in self.outputs ] else: self.outputs, self.states = seq2seq_f(self.encoder_inputs, self.decoder_inputs[:-1], False) self.losses = (seq2seq.sequence_loss( self.outputs, targets, self.target_weights[:-1], self.vocab_size, softmax_loss_function=softmax_loss_function)) # Gradients and SGD update operation for training the model. params = tf.trainable_variables() self.params = params # Hold onto this for Woz if not forward_only: self.gradient_norms = [] self.updates = [] opt = tf.train.GradientDescentOptimizer(self.learning_rate) try: for b in xrange(len(self.buckets)): gradients = tf.gradients(self.losses[b], params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.gradient_norms.append(norm) self.updates.append( opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step)) except AttributeError: gradients = tf.gradients(self.losses, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.gradient_norms = norm self.updates = opt.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.all_variables())
def __init__(self, embedding_mat, non_static, lstm_type, hidden_unit, sequence_length, max_pool_size, num_classes, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0): # Placeholders for input, output and dropout self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.batch_size = tf.placeholder(tf.int32) self.pad = tf.placeholder(tf.float32, [None, 1, embedding_size, 1], name="pad") self.real_len = tf.placeholder(tf.int32, [None], name="real_len") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) # Extend input to a 4D Tensor, because tf.nn.conv2d requires so. with tf.device('/cpu:0'), tf.name_scope("embedding"): if not non_static: W = tf.constant(embedding_mat, name="W") else: W = tf.Variable(embedding_mat, name="W") self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x) emb = tf.expand_dims(self.embedded_chars, -1) # CNN pooled_concat = [] reduced = np.int32(np.ceil((sequence_length) * 1.0 / max_pool_size)) for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Zero paddings so that the convolution output have dimension batch x sequence_length x emb_size x channel num_prio = (filter_size - 1) // 2 num_post = (filter_size - 1) - num_prio pad_prio = tf.concat(1, [self.pad] * num_prio) pad_post = tf.concat(1, [self.pad] * num_post) emb_pad = tf.concat(1, [pad_prio, emb, pad_post]) # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d(emb_pad, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool(h, ksize=[1, max_pool_size, 1, 1], strides=[1, max_pool_size, 1, 1], padding='SAME', name="pool") pooled = tf.reshape(pooled, [-1, reduced, num_filters]) pooled_concat.append(pooled) pooled_concat = tf.concat(2, pooled_concat) pooled_concat = tf.nn.dropout(pooled_concat, self.dropout_keep_prob) # LSTM if lstm_type == "gru": lstm_cell = rnn_cell.GRUCell(num_units=hidden_unit, input_size=embedding_size) else: if lstm_type == "basic": lstm_cell = rnn_cell.BasicLSTMCell(num_units=hidden_unit, input_size=embedding_size) else: lstm_cell = rnn_cell.LSTMCell(num_units=hidden_unit, input_size=embedding_size, use_peepholes=True) lstm_cell = rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=self.dropout_keep_prob) self._initial_state = lstm_cell.zero_state(self.batch_size, tf.float32) inputs = [ tf.squeeze(input_, [1]) for input_ in tf.split(1, reduced, pooled_concat) ] outputs, state = rnn.rnn(lstm_cell, inputs, initial_state=self._initial_state, sequence_length=self.real_len) # Collect the appropriate last words into variable output (dimension = batch x embedding_size) output = outputs[0] with tf.variable_scope("Output"): tf.get_variable_scope().reuse_variables() one = tf.ones([1, hidden_unit], tf.float32) for i in range(1, len(outputs)): ind = self.real_len < (i + 1) ind = tf.to_float(ind) ind = tf.expand_dims(ind, -1) mat = tf.matmul(ind, one) output = tf.add(tf.mul(output, mat), tf.mul(outputs[i], 1.0 - mat)) # Final (unnormalized) scores and predictions with tf.name_scope("output"): self.W = tf.Variable(tf.truncated_normal( [hidden_unit, num_classes], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(output, self.W, b, name="scores") self.predictions = tf.argmax(self.scores, 1, name="predictions") # CalculateMean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits( self.scores, self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def __init__(self, source_vocab_size, target_vocab_size, buckets, size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, use_lstm=False, num_samples=512, forward_only=False): self.source_vocab_size = source_vocab_size self.target_vocab_size = target_vocab_size self.buckets = buckets self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) output_projection = None softmax_loss_function = None if num_samples > 0 and num_samples < self.target_vocab_size: with tf.device("/cpu:0"): w = tf.get_variable("proj_w", [size, self.target_vocab_size]) w_t = tf.transpose(w) b = tf.get_variable("proj_b", [self.target_vocab_size]) output_projection = (w, b) def sampled_loss(inputs, labels): with tf.device("/cpu:0"): labels = tf.reshape(labels, [-1, 1]) return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples, self.target_vocab_size) softmax_loss_function = sampled_loss single_cell = rnn_cell.GRUCell(size) if use_lstm: single_cell = rnn_cell.BasicLSTMCell(size) cell = single_cell if num_layers > 1: cell = rnn_cell.MultiRNNCell([single_cell] * num_layers) def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, source_vocab_size, target_vocab_size, output_projection=output_projection, feed_previous=do_decode) self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for i in xrange(buckets[-1][0]): self.encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(buckets[-1][1] + 1): self.decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) self.target_weights.append( tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) targets = [ self.decoder_inputs[i + 1] for i in xrange(len(self.decoder_inputs) - 1) ] if forward_only: self.outputs, self.losses = seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, buckets, self.target_vocab_size, lambda x, y: seq2seq_f(x, y, True), softmax_loss_function=softmax_loss_function) if output_projection is not None: for b in xrange(len(buckets)): self.outputs[b] = [ tf.matmul(output, output_projection[0]) + output_projection[1] for output in self.outputs[b] ] else: self.outputs, self.losses = seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, buckets, self.target_vocab_size, lambda x, y: seq2seq_f(x, y, False), softmax_loss_function=softmax_loss_function) params = tf.trainable_variables() if not forward_only: self.gradient_norms = [] self.updates = [] opt = tf.train.GradientDescentOptimizer(self.learning_rate) for b in xrange(len(buckets)): gradients = tf.gradients(self.losses[b], params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.gradient_norms.append(norm) self.updates.append( opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step)) self.saver = tf.train.Saver(tf.all_variables())
def BiRNN(self, scope): # input shape: (batch_size, step_size, input_dim) # we need to permute step_size and batch_size(change the position of step and batch size) data = tf.transpose(self.input_data, [1, 0, 2]) # Reshape to prepare input to hidden activation # (step_size*batch_size, n_input), flattens the batch and step #after the above transformation, data is now (step_size*batch_size, input_dim) data = tf.reshape(data, [-1, self.config.input_dim + 1]) # Define lstm cells with tensorflow with tf.variable_scope(str(scope)): # Linear activation data = tf.matmul(data, self.weights['hidden']) + self.biases['hidden'] data = tf.nn.dropout(data, self.config.dropout) # Define a cell if self.config.cell_type == 'GRU': lstm_fw_cell = rnn_cell.GRUCell(self.config.hidden_dim) lstm_bw_cell = rnn_cell.GRUCell(self.config.hidden_dim) else: lstm_fw_cell = rnn_cell.LSTMCell( self.config.hidden_dim, forget_bias=self.config.forget_bias, use_peepholes=self.config.use_peepholes, cell_clip=self.config.cell_clip) lstm_bw_cell = rnn_cell.LSTMCell( self.config.hidden_dim, forget_bias=self.config.forget_bias, use_peepholes=self.config.use_peepholes, cell_clip=self.config.cell_clip) # Split data because rnn cell needs a list of inputs for the RNN inner loop data = tf.split(0, self.config.step_size, data) # step_size * (batch_size, hidden_dim) # Get lstm cell output print 'running single stack Bi-directional RNN.......' outputs = rnn.bidirectional_rnn( lstm_fw_cell, lstm_bw_cell, data, initial_state_fw=self.init_state_fw, initial_state_bw=self.init_state_bw, scope="RNN1") # for basic rnn prediction we really just interested in the last state's output, we need to average them in this case total_outputs = tf.div(tf.add_n([outputs[2], outputs[1]]), 2.0) return [ tf.nn.dropout( tf.matmul(total_outputs, self.weights['out1']) + self.biases['out1'], self.config.dropout), tf.nn.dropout( tf.matmul(total_outputs, self.weights['out2']) + self.biases['out2'], self.config.dropout), tf.nn.dropout( tf.matmul(total_outputs, self.weights['out3']) + self.biases['out3'], self.config.dropout), tf.nn.dropout( tf.matmul(total_outputs, self.weights['out4']) + self.biases['out4'], self.config.dropout), tf.nn.dropout( tf.matmul(total_outputs, self.weights['out5']) + self.biases['out5'], self.config.dropout), ]