def sentence_encoder(self): with tf.variable_scope(self.enc_scope_left_context) as scope: sentence_left_outputs, sentence_left_states = rnn.rnn( self.enc_rnn_left_context, self.input_left_contexts, scope=scope, dtype=tf.float32) #sentence_left_states is a tensor of dimension (#batch_size * #hidden_size) with tf.variable_scope(self.enc_scope_right_context) as scope: sentence_right_outputs, sentence_right_states = rnn.rnn( self.enc_rnn_right_context, self.input_right_contexts, scope=scope, dtype=tf.float32) #sentence_right_states is a tensor of dimension (#batch_size * #hidden_size) with tf.variable_scope(self.enc_scope_mention) as scope: sentence_mention_outputs, sentence_mention_states = rnn.rnn( self.enc_rnn_mention, self.input_mentions, scope=scope, dtype=tf.float32) #sentence_mention_states is a tensor of dimension (#batch_size * #hidden_size) sentence_states = tf.concat( 1, [sentence_left_states, sentence_right_states]) sentence_outputs = [] sentence_outputs.extend(sentence_left_outputs) sentence_outputs.extend(sentence_right_outputs) #sentence_states is a tensor of dimension (#batch_size * 3*#hidden_size) return sentence_states, sentence_outputs, sentence_mention_states, sentence_mention_outputs
def Forward(self, sess): lstm = tf.nn.rnn_cell.BasicLSTMCell(200, forget_bias=1.0) #LSTM size #lstm=tf.nn.rnn_cell.GRUCell(10) state = tf.zeros([1, 200]) # batch size, state_num=2*step_size num_steps = 20 # we don't need time step actually, the length of sentence is time-step x_in_batch = tf.transpose(self.x_in, [1, 0, 2]) #change to 20*1*200 x_in = tf.reshape(x_in_batch, [-1, 200]) #change to 20*200 x_in = tf.split( 0, 20, x_in) #this will return a list, i.e. 20 sequences of 1*200 if self.i == 0: with tf.variable_scope('output'): output_lstm, state = rnn.rnn(lstm, x_in, dtype=tf.float32) #output_lstm, state= lstm(x_in,state)#200*1 else: with tf.variable_scope('output', reuse=True): output_lstm, state = rnn.rnn(lstm, x_in, dtype=tf.float32) #output_lstm, state= lstm(x_in,state) self.i += 1 output_lstm = output_lstm[-1] # get the last element of a list lin_h = tf.matmul(output_lstm, self.hiddenLayer.W) + self.hiddenLayer.b #x_in=1*200, W=200*200 reg_h = tf.reduce_sum(tf.gather(self.reg_lookup_table, self.reg_x), 0) #Num*200 print "reg_h is" print reg_h h = self.activation(lin_h + tf.cast(reg_h, tf.float32)) #1*200 lin_output_pre = tf.matmul(h, self.outputLayer.W) + self.outputLayer.b lin_output = tf.nn.dropout(lin_output_pre, keep_prob=0.6) #h=1*200, outputLayer.W=200*63, lin_outupt=1*63 #re.W:19156*63 reg_output = tf.reduce_sum(tf.gather(self.skip_layer_re.W, self.reg_x), 0) + self.skip_layer_re.b print reg_output #x_in=1*200. ae.W=200*63 ae_output = tf.matmul( x_in[-1], self.skip_layer_ae.W ) + self.skip_layer_ae.b #use the last element as skip layer input ae_output = tf.nn.dropout(ae_output, keep_prob=0.5) output = tf.nn.softmax(lin_output + ae_output + reg_output) #XXX*63 return output
def create_decoder(self): start_time = time.time() with vs.variable_scope("embedding" or scope): tokens = self.tokens[:-1] embeddings = [] with tf.device("/cpu:0"): sqrt3 = np.sqrt(3) embedding = vs.get_variable( "embedding", [self.vocab_size, self.embedding_size], initializer=tf.random_uniform_initializer(-sqrt3, sqrt3)) for token in tokens: # Create the embedding layer. emb = embedding_ops.embedding_lookup(embedding, token) emb.set_shape([self.batch_size, self.embedding_size]) embeddings.append(emb) cell = rnn_cell.GRUCell(self.decoder_cell_size) cell = rnn_cell.OutputProjectionWrapper(cell, self.vocab_size) self.decoder_states = rnn.rnn( cell, embeddings, dtype=tf.float32, sequence_length=self.tokens_len)[0] self.logits = self.decoder_states print('create_decoder graph time %f' % (time.time() - start_time))
def RNN(x, is_training, weights, biases): x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, n_input]) x = tf.split(0, n_time_step, x) lstm_cell_1 = rnn_cell.LSTMCell(n_hidden_1, forget_bias=0.8) lstm_cell_2 = rnn_cell.LSTMCell(n_hidden_2, forget_bias=0.8) if is_training and keep_prob < 1: lstm_cell_1 = rnn_cell.DropoutWrapper(lstm_cell_1, output_keep_prob=keep_prob) lstm_cell_2 = rnn_cell.DropoutWrapper(lstm_cell_2, output_keep_prob=keep_prob) cell = rnn_cell.MultiRNNCell([lstm_cell_1, lstm_cell_2]) #if is_training and keep_prob < 1: # x = tf.nn.dropout(x,keep_prob) #initial_state = cell.zero_state(batch_size,tf.float32) #state = initial_state output = [] output, states = rnn.rnn(cell, x, dtype=tf.float32) #outputs = tf.reshape(tf.concat(1,output),[-1,n_hidden_2]) #maybe a softmax return tf.matmul(output[-1], weights['out']) + biases['out']
def tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, loop_function=None, dtype=dtypes.float32, scope=None): """RNN sequence-to-sequence model with tied encoder and decoder parameters. This model first runs an RNN to encode encoder_inputs into a state vector, and then runs decoder, initialized with the last encoder state, on decoder_inputs. Encoder and decoder use the same RNN cell and share parameters. Args: encoder_inputs: A list of 2D Tensors [batch_size x cell.input_size]. decoder_inputs: A list of 2D Tensors [batch_size x cell.input_size]. cell: rnn_cell.RNNCell defining the cell function and size. loop_function: If not None, this function will be applied to i-th output in order to generate i+1-th input, and decoder_inputs will be ignored, except for the first element ("GO" symbol), see rnn_decoder for details. dtype: The dtype of the initial state of the rnn cell (default: tf.float32). scope: VariableScope for the created subgraph; default: "tied_rnn_seq2seq". Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x cell.output_size] containing the generated outputs. state: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope("combined_tied_rnn_seq2seq"): scope = scope or "tied_rnn_seq2seq" _, enc_state = rnn.rnn( cell, encoder_inputs, dtype=dtype, scope=scope) variable_scope.get_variable_scope().reuse_variables() return rnn_decoder(decoder_inputs, enc_state, cell, loop_function=loop_function, scope=scope)
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps print (x) x = tf.transpose(x, [1, 0, 2]) print(x) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) print (x) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) print(x) # Define a lstm cell with tensorflow lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Get lstm cell output outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def RNN(x, init_state): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_hidden) # Required shape: 'n_steps' tensors list of shape (batch_size, n_hidden) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_hidden) x = tf.reshape(x, [-1, n_hidden]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_hidden) x = tf.split(0, seq_len, x) # Define a lstm cell with tensorflow #lstm_cell = rnn_cell.GRUCell(n_hidden, forget_bias=1.0) gru_cell = rnn_cell.GRUCell(n_hidden) # Get lstm cell output outputs, states = rnn.rnn(gru_cell, x, dtype=tf.float32, initial_state=init_state) # Linear activation, using rnn inner loop last output return outputs
def compute_states(self, emb): def unpack_sequence(tensor): return tf.unpack(tf.transpose(tensor, perm=[1, 0, 2])) with tf.variable_scope( "Composition", initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(self.reg)): cell = rnn_cell.LSTMCell(self.hidden_dim) #tf.cond(tf.less(self.dropout #if tf.less(self.dropout, tf.constant(1.0)): cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=self.dropout, input_keep_prob=self.dropout) #output, state = rnn.dynamic_rnn(cell,emb,sequence_length=self.lngths,dtype=tf.float32) outputs, _ = rnn.rnn(cell, unpack_sequence(emb), sequence_length=self.lngths, dtype=tf.float32) #output = pack_sequence(outputs) sum_out = tf.reduce_sum(tf.pack(outputs), [0]) sent_rep = tf.div(sum_out, tf.expand_dims(tf.to_float(self.lngths), 1)) final_state = sent_rep return final_state
def RNN(x, weights, biases, type): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define a lstm cell with tensorflow cell_class_map = { "LSTM": rnn_cell.BasicLSTMCell(n_hidden), "GRU": rnn_cell.GRUCell(n_hidden), "BasicRNN": rnn_cell.BasicRNNCell(n_hidden), "LNGRU": LNGRUCell(n_hidden), "LNLSTM": LNBasicLSTMCell(n_hidden)} lstm_cell = cell_class_map.get(type) cell = rnn_cell.MultiRNNCell([lstm_cell] * FLAGS.layers) print "Using %s model" % type # Get lstm cell output outputs, states = rnn.rnn(cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def apply_lm(cell, inputs, sequence_length=None, dropout=None, dtype=tf.float32): """ Parameters ---------- cell inputs sequence_length dropout dtype Returns ------- """ if dropout is not None: for c in cell._cells: c.input_keep_prob = 1.0 - dropout cell_outputs, cell_state = rnn.rnn(cell=cell, inputs=inputs, sequence_length=sequence_length, dtype=dtype) return cell_outputs, cell_state
def __init__(self, vocabularySize, config_param): self.vocabularySize = vocabularySize self.config = config_param self._inputX = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputsX") self._inputTargetsY = tf.placeholder(tf.int32, [self.config.batch_size, self.config.sequence_size], "InputTargetsY") #Converting Input in an Embedded form with tf.device("/cpu:0"): #Tells Tensorflow what GPU to use specifically embedding = tf.get_variable("embedding", [self.vocabularySize, self.config.embeddingSize]) embeddingLookedUp = tf.nn.embedding_lookup(embedding, self._inputX) inputs = tf.split(1, self.config.sequence_size, embeddingLookedUp) inputTensorsAsList = [tf.squeeze(input_, [1]) for input_ in inputs] #Define Tensor RNN singleRNNCell = rnn_cell.BasicRNNCell(self.config.hidden_size) self.multilayerRNN = rnn_cell.MultiRNNCell([singleRNNCell] * self.config.num_layers) self._initial_state = self.multilayerRNN.zero_state(self.config.batch_size, tf.float32) #Defining Logits hidden_layer_output, last_state = rnn.rnn(self.multilayerRNN, inputTensorsAsList, initial_state=self._initial_state) hidden_layer_output = tf.reshape(tf.concat(1, hidden_layer_output), [-1, self.config.hidden_size]) self._logits = tf.nn.xw_plus_b(hidden_layer_output, tf.get_variable("softmax_w", [self.config.hidden_size, self.vocabularySize]), tf.get_variable("softmax_b", [self.vocabularySize])) self._predictionSoftmax = tf.nn.softmax(self._logits) #Define the loss loss = seq2seq.sequence_loss_by_example([self._logits], [tf.reshape(self._inputTargetsY, [-1])], [tf.ones([self.config.batch_size * self.config.sequence_size])], self.vocabularySize) self._cost = tf.div(tf.reduce_sum(loss), self.config.batch_size) self._final_state = last_state
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define a lstm cell with tensorflow lstm_cell_1 = rnn_cell.BasicLSTMCell(n_hidden * 2, forget_bias=1.0) lstm_cell_2 = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) cells = rnn_cell.MultiRNNCell([lstm_cell_1, lstm_cell_2]) # Define dropout to avoid overfitting # dropout_cell_1 = DropoutWrapper(lstm_cell_1, input_keep_prob=0.5, output_keep_prob=0.5) # dropout_cell_2 = DropoutWrapper(lstm_cell_2, input_keep_prob=0.5, output_keep_prob=0.5) # cells = rnn_cell.MultiRNNCell([dropout_cell_1, dropout_cell_2]) # Get lstm cell output outputs, states = rnn.rnn(cells, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, loop_function=None, dtype=dtypes.float32, scope=None): """RNN sequence-to-sequence model with tied encoder and decoder parameters. This model first runs an RNN to encode encoder_inputs into a state vector, and then runs decoder, initialized with the last encoder state, on decoder_inputs. Encoder and decoder use the same RNN cell and share parameters. Args: encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. cell: rnn_cell.RNNCell defining the cell function and size. loop_function: if not None, this function will be applied to i-th output in order to generate i+1-th input, and decoder_inputs will be ignored, except for the first element ("GO" symbol), see rnn_decoder for details. dtype: The dtype of the initial state of the rnn cell (default: tf.float32). scope: VariableScope for the created subgraph; default: "tied_rnn_seq2seq". Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x cell.output_size] containing the generated outputs. state: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with vs.variable_scope("combined_tied_rnn_seq2seq"): scope = scope or "tied_rnn_seq2seq" _, enc_state = rnn.rnn( cell, encoder_inputs, dtype=dtype, scope=scope) vs.get_variable_scope().reuse_variables() return rnn_decoder(decoder_inputs, enc_state, cell, loop_function=loop_function, scope=scope)
def fit(self, data_function): with tf.Graph().as_default(), tf.Session() as sess: n, s, p = data_function.train.X.shape X_pl = tf.placeholder(tf.float32, [self.batch_size, s, p]) Y_pl = tf.placeholder(tf.float32, [self.batch_size, p]) lstm_cell = rnn_cell.BasicLSTMCell(self.hidden_size) cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers) outputs, _ = rnn.rnn(cell, [X_pl[:,i,:] for i in xrange(s)], dtype = tf.float32) softmax_w = tf.get_variable("softmax_w", [self.hidden_size, p]) softmax_b = tf.get_variable("softmax_b", [p]) logits = tf.matmul(outputs[-1], softmax_w) + softmax_b loss = loss_dict['ce'](logits, Y_pl) tvars = tf.trainable_variables() print([i.get_shape() for i in tvars]) grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), self.max_grad_norm) optimizer = tf.train.AdamOptimizer() train_op = optimizer.apply_gradients(zip(grads, tvars)) initializer = tf.random_uniform_initializer(-self.init_scale, self.init_scale) tf.initialize_all_variables().run() for i in xrange(self.n_step): batch_xs, batch_ys = data_function.train.next_batch( self.batch_size) feed_dict = {X_pl: batch_xs, Y_pl: batch_ys} _, loss_value = sess.run([train_op, loss], feed_dict = feed_dict) if i % 100 == 0: PrintMessage(data_function.train.epochs_completed, loss_value , 0, 0)
def build(self, input_number, sequence_length, layers_number, units_number, output_number): self.x = tf.placeholder("float", [None, sequence_length, input_number]) self.y = tf.placeholder("float", [None, output_number]) self.sequence_length = sequence_length self.weights = { 'out': tf.Variable(tf.random_normal([units_number, output_number])) } self.biases = {'out': tf.Variable(tf.random_normal([output_number]))} x = tf.transpose(self.x, [1, 0, 2]) x = tf.reshape(x, [-1, input_number]) x = tf.split(0, sequence_length, x) lstm_layers = [] for i in range(0, layers_number): lstm_layer = rnn_cell.BasicLSTMCell(units_number, forget_bias=1.0, state_is_tuple=True) lstm_layers.append(lstm_layer) deep_lstm = rnn_cell.MultiRNNCell(lstm_layers, state_is_tuple=True) self.outputs, states = rnn.rnn(deep_lstm, x, dtype=tf.float32) print "Build model with input_number: {}, sequence_length: {}, layers_number: {}, " \ "units_number: {}, output_number: {}".format(input_number, sequence_length, layers_number, units_number, output_number) self.save(input_number, sequence_length, layers_number, units_number, output_number)
def ops(self, input_emb, seq_length=None): """ operation """ rnn_outputs, _ = rnn(self.cell, inputs=input_emb, dtype=tf.float32, sequence_length=seq_length) return rnn_outputs
def ops(self, input_emb): """ operation """ rnn_outputs, _ = rnn(GRUCell(self.hidden_size), inputs=input_emb, dtype=tf.float32) return rnn_outputs
def RNN(x, weights, biases): # firstly, x will have shape like this (batch,sequenceLenght,input_dim) x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, input_dim]) # Now x have shape like this (sequence*batch,input_dim) x = tf.split(0, sequence_length, x) lmst_cell = rnn_cell.BasicLSTMCell(num_units=hidden_dim, state_is_tuple=True, forget_bias=1.0) outputs, states = rnn.rnn(lmst_cell, x, dtype=tf.float32, sequence_length=early_stop) # In this example, we will slightly change to loss function. # Instead of using only the last output in the RNN_ex, we use all the output to calculate loss # outputs now is a list "sequence_length" elements of tensorflow having shape (1,hidden_dim) # However, if we use concept list in python, we can not get a result we want to # So I try to represent list of tensors in tensorflow concept flat = tf.concat(0, outputs) # Now we have list of tensors, each elements for each predictions # We can compute pred=output*W +bias (tf.matmul(pred,W)+bias) # However, I using batch_matmul for each element in list return tf.batch_matmul(flat, weights) + bias
def _tf_enc_embedding_attention_seq2seq(self, encoder_inputs, cell, num_encoder_symbols, embedding_size, num_heads=1, dtype=dtypes.float32, scope=None, encoder="reverse", sequence_length=None, bucket_length=None, init_backward=False, bow_emb_size=None, single_src_embedding=False): """Embedding sequence-to-sequence model with attention. """ with tf.variable_scope(scope or "embedding_attention_seq2seq", reuse=True): # Encoder. if encoder == "bidirectional": encoder_cell_fw = rnn_cell.EmbeddingWrapper( cell.get_fw_cell(), embedding_classes=num_encoder_symbols, embedding_size=embedding_size) embed_scope = None if single_src_embedding: logging.info("Reuse forward src embedding for backward encoder") with variable_scope.variable_scope("BiRNN/FW/EmbeddingWrapper") as es: embed_scope = es encoder_cell_bw = rnn_cell.EmbeddingWrapper( cell.get_bw_cell(), embedding_classes=num_encoder_symbols, embedding_size=embedding_size, embed_scope=embed_scope) encoder_outputs, encoder_state, encoder_state_bw = rnn.bidirectional_rnn(encoder_cell_fw, encoder_cell_bw, encoder_inputs, dtype=dtype, sequence_length=sequence_length, bucket_length=bucket_length) logging.debug("Bidirectional state size=%d" % cell.state_size) # this shows double the size for lstms elif encoder == "reverse": encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.rnn( encoder_cell, encoder_inputs, dtype=dtype, sequence_length=sequence_length, bucket_length=bucket_length, reverse=True) logging.debug("Unidirectional state size=%d" % cell.state_size) elif encoder == "bow": encoder_outputs, encoder_state = cell.embed(rnn_cell.Embedder, num_encoder_symbols, bow_emb_size, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. if encoder == "bow": top_states = [array_ops.reshape(e, [-1, 1, bow_emb_size]) for e in encoder_outputs] else: top_states = [array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(1, top_states) initial_state = encoder_state if encoder == "bidirectional" and init_backward: initial_state = encoder_state_bw return self._tf_enc_embedding_attention_decoder( attention_states, initial_state, cell, num_heads=num_heads)
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define a lstm cell with tensorflow ## for LSTM cell initialization #cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # for GRU cell initialization #cell = tf.nn.rnn_cell.LSTMCell(n_hidden, state_is_tuple=True) # for Layer normalized cell initialization #cell = LayerNormalizedLSTMCell(n_hidden) cell = tf.nn.rnn_cell.GRUCell(n_hidden) #cell = BNLSTM.BN_LSTMCell(n_hidden, is_training = True) cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=0.9) lstm_cell = tf.nn.rnn_cell.MultiRNNCell([cell] * 5, state_is_tuple=True) # Get lstm cell output outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) #outputs, states = rnn.rnn(cell, x, dtype = tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def basic_rnn_seq2seq( encoder_inputs, decoder_inputs, cell, dtype=dtypes.float32, scope=None): """Basic RNN sequence-to-sequence model. This model first runs an RNN to encode encoder_inputs into a state vector, then runs decoder, initialized with the last encoder state, on decoder_inputs. Encoder and decoder use the same RNN cell type, but don't share parameters. Args: encoder_inputs: A list of 2D Tensors [batch_size x cell.input_size]. decoder_inputs: A list of 2D Tensors [batch_size x cell.input_size]. cell: rnn_cell.RNNCell defining the cell function and size. dtype: The dtype of the initial state of the RNN cell (default: tf.float32). scope: VariableScope for the created subgraph; default: "basic_rnn_seq2seq". Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x cell.output_size] containing the generated outputs. state: The state of each decoder cell in the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "basic_rnn_seq2seq"): _, enc_state = rnn.rnn(cell, encoder_inputs, dtype=dtype) return rnn_decoder(decoder_inputs, enc_state, cell)
def _build_graph(self, input_vars, is_training): input, nextinput = input_vars cell = rnn_cell.BasicLSTMCell(num_units=param.rnn_size) cell = rnn_cell.MultiRNNCell([cell] * param.num_rnn_layer) self.initial = initial = cell.zero_state(tf.shape(input)[0], tf.float32) embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size]) input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize input_list = tf.split(1, param.seq_len, input_feature) #seqlen x (Bx1xrnnsize) input_list = [tf.squeeze(x, [1]) for x in input_list] # seqlen is 1 in inference. don't need loop_function outputs, last_state = rnn.rnn(cell, input_list, initial, scope='rnnlm') self.last_state = tf.identity(last_state, 'last_state') # seqlen x (Bxrnnsize) output = tf.reshape(tf.concat(1, outputs), [-1, param.rnn_size]) # (seqlenxB) x rnnsize logits = FullyConnected('fc', output, param.vocab_size, nl=tf.identity) self.prob = tf.nn.softmax(logits / param.softmax_temprature) xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits, symbolic_functions.flatten(nextinput)) self.cost = tf.reduce_mean(xent_loss, name='cost') summary.add_param_summary([('.*/W', ['histogram'])]) # monitor histogram of all W
def RNN(x): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define a lstm cell with tensorflow lstm_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden) cell = rnn_cell.MultiRNNCell([lstm_cell] * 2) # Get lstm cell output outputs, states = rnn.rnn(cell, x, dtype=tf.float32) weights_out = tf.get_variable( name="weights_out", shape=[n_hidden, n_classes], initializer=tf.truncated_normal_initializer()) biases_out = tf.get_variable(name="biases_out", shape=[n_classes], initializer=tf.truncated_normal_initializer()) # Linear activation, using rnn inner loop last output return tf.sigmoid(tf.matmul(outputs[-1], weights_out) + biases_out)
def _build_graph(self, input_vars): input, nextinput = input_vars cell = rnn_cell.BasicLSTMCell(num_units=param.rnn_size) cell = rnn_cell.MultiRNNCell([cell] * param.num_rnn_layer) self.initial = initial = cell.zero_state( tf.shape(input)[0], tf.float32) embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size]) input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize input_list = tf.split(1, param.seq_len, input_feature) #seqlen x (Bx1xrnnsize) input_list = [tf.squeeze(x, [1]) for x in input_list] # seqlen is 1 in inference. don't need loop_function outputs, last_state = rnn.rnn(cell, input_list, initial, scope='rnnlm') self.last_state = tf.identity(last_state, 'last_state') # seqlen x (Bxrnnsize) output = tf.reshape(tf.concat(1, outputs), [-1, param.rnn_size]) # (Bxseqlen) x rnnsize logits = FullyConnected('fc', output, param.vocab_size, nl=tf.identity) self.prob = tf.nn.softmax(logits / param.softmax_temprature) xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits, symbolic_functions.flatten(nextinput)) self.cost = tf.reduce_mean(xent_loss, name='cost') summary.add_param_summary([('.*/W', ['histogram']) ]) # monitor histogram of all W
def basic_seq2seq(encoder_inputs, decoder_inputs, cell, input_size, hidden_size, output_size, dtype=dtypes.float32, scope=None, feed_previous=False): with variable_scope.variable_scope(scope or "basic_rnn_seq2seq"): cell = tf.nn.rnn_cell.InputProjectionWrapper(cell, hidden_size, input_size) cell = tf.nn.rnn_cell.OutputProjectionWrapper(cell, output_size) _, enc_state = rnn.rnn(cell, encoder_inputs, dtype=dtype) if feed_previous: def simple_loop_function(prev, _): _next = tf.greater_equal(prev, 0.5) _next = tf.to_float(_next) return _next # softmax_w = tf.get_variable("softmax_w", [self.hidden_size, self.output_size]) # softmax_b = tf.get_variable("softmax_b", [self.output_size]) # def simple_softmax_function(prev, _): loop_function = simple_loop_function else: loop_function = None return tf.nn.seq2seq.rnn_decoder(decoder_inputs, enc_state, cell, loop_function=loop_function)
def RNN(x, weights): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # pdb.set_trace() # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define a lstm cell with tensorflow lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) # Define a multi layers lstm cell: multi_lstm_cell lstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * 2) #pdb.set_trace() # Get lstm cell output # https://github.com/tensorflow/tensorflow/blob/r0.8/tensorflow/python/ops/rnn.py outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) #sequence_length参数暂时不会用,可以先不用。只不过计算速度可能会稍微慢一些。 sequence_length.shape=batch_size*n_hidden # outputs, states = rnn.rnn(lstm_cell, x, sequence_length=w, dtype=tf.float32) # Linear activation, using rnn inner loop every output pred_list = [] for output_step in outputs: reluinput = tf.add(tf.matmul(x_profile, weights['profile_out']), output_step) hidden_layer_1 = tf.nn.relu(tf.matmul(reluinput, weights['reluhidden_in']) + weights['reluhidden_in_biases']) # Question + 的执行过程 pred_list.append(tf.matmul(hidden_layer_1, weights['reluhidden_out'])) # return tf.matmul(outputs[-1], weights['out']), outputs, states return pred_list
def basic_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, dtype=dtypes.float32, scope=None): """Basic RNN sequence-to-sequence model. This model first runs an RNN to encode encoder_inputs into a state vector, and then runs decoder, initialized with the last encoder state, on decoder_inputs. Encoder and decoder use the same RNN cell type, but don't share parameters. Args: encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. cell: rnn_cell.RNNCell defining the cell function and size. dtype: The dtype of the initial state of the RNN cell (default: tf.float32). scope: VariableScope for the created subgraph; default: "basic_rnn_seq2seq". Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x cell.output_size] containing the generated outputs. states: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. Each item is a 2D Tensor of shape [batch_size x cell.state_size]. """ with vs.variable_scope(scope or "basic_rnn_seq2seq"): _, enc_states = rnn.rnn(cell, encoder_inputs, dtype=dtype) return rnn_decoder(decoder_inputs, enc_states[-1], cell)
def __call__(self, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): is_list = isinstance(inputs, list) if self._use_dynamic_rnn: if is_list: inputs = array_ops.stack(inputs) outputs, state = rnn.dynamic_rnn(self._cell, inputs, sequence_length=sequence_length, initial_state=initial_state, dtype=dtype, time_major=True, scope=scope) if is_list: # Convert outputs back to list outputs = array_ops.unpack(outputs) else: # non-dynamic rnn if not is_list: inputs = array_ops.unpack(inputs) outputs, state = rnn.rnn(self._cell, inputs, initial_state=initial_state, dtype=dtype, sequence_length=sequence_length, scope=scope) if not is_list: # Convert outputs back to tensor outputs = array_ops.stack(outputs) return outputs, state
def embedding_encoder(encoder_inputs, cell, embedding, num_symbols, embedding_size, bidirectional=False, dtype=None, weight_initializer=None, scope=None): with variable_scope.variable_scope( scope or "embedding_encoder", dtype=dtype) as scope: dtype = scope.dtype # Encoder. if not embedding: embedding = variable_scope.get_variable("embedding", [num_symbols, embedding_size], initializer=weight_initializer()) emb_inp = [embedding_ops.embedding_lookup(embedding, i) for i in encoder_inputs] if bidirectional: _, output_state_fw, output_state_bw = rnn.bidirectional_rnn(cell, cell, emb_inp, dtype=dtype) encoder_state = tf.concat(1, [output_state_fw, output_state_bw]) else: _, encoder_state = rnn.rnn( cell, emb_inp, dtype=dtype) return encoder_state
def basic_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, dtype=dtypes.float32, scope=None): """一个最基本完整的seq2seq模型 encoder与decoder使用同样类型的cell,但是不共享参数 参数只比rnn_decoder多了一个encoder_inputs 也就是说如果我们不希望编码器和解码器采取同样的cell,那么 我们就自己写encoder,然后将其最后状态传入rnn_decoder Args: encoder_inputs: A list of 2D Tensors [batch_size x input_size]. decoder_inputs: A list of 2D Tensors [batch_size x input_size]. cell: rnn_cell.RNNCell defining the cell function and size. dtype: The dtype of the initial state of the RNN cell (default: tf.float32). scope: VariableScope for the created subgraph; default: "basic_rnn_seq2seq". Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. state: The state of each decoder cell in the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "basic_rnn_seq2seq"): #这里其实很简单,即先把encoder_inputs传入到rnn,然后即可传入到rnn_decoder #这里的enc-state是最后时刻的状态 #所以如果我们要构造一个比较新颖的编码器,第一步就是要构造一个心音的 #rnncell,然后将其传入rnn.rnn即可得到最终的状态输出, #然后将该状态作为输入传入到解码器 _, enc_state = rnn.rnn(cell, encoder_inputs, dtype=dtype) return rnn_decoder(decoder_inputs, enc_state, cell)
def lstm_inference(x, is_train=True): RNN_HIDDEN_UNITS = 9 LABEL_SIZE = 1 # x was [BATCH_SIZE, 32, 32, 3] # x changes to [32, BATCH_SIZE, 32, 3] #x = tf.transpose(x, [1, 0, 2, 3]) x = tf.transpose(x, [1, 0]) # x changes to [32 * BATCH_SIZE, 32 * 3] #x = tf.reshape(x, [-1, IMAGE_SIZE * RGB_CHANNEL_SIZE]) # x changes to array of 32 * [BATCH_SIZE, 32 * 3] #x = tf.split(0, IMAGE_SIZE, x) x = tf.split(0, 9, x) weights = tf.Variable(tf.random_normal([RNN_HIDDEN_UNITS, LABEL_SIZE])) biases = tf.Variable(tf.random_normal([LABEL_SIZE])) # output size is 128, state size is (c=128, h=128) lstm_cell = rnn_cell.BasicLSTMCell(RNN_HIDDEN_UNITS, forget_bias=1.0) # outputs is array of 32 * [BATCH_SIZE, 128] outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) # outputs[-1] is [BATCH_SIZE, 128] return tf.matmul(outputs[-1], weights) + biases
def network(x): x = tf.reshape(x, [-1, n_input]) x = tf.split(0, n_steps, x) lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) output = tf.matmul(outputs[-1], weights['out']) + biases['out'] return tf.nn.dropout(output, dropout)
def RNN(x, weights, biases): x = tf.reshape(x, [-1, RNN_IN_DIMENS]) x = tf.split(0, SEQUENCE_LENGTH, x) lstm_cell = rnn_cell.BasicLSTMCell(RNN_NEURONS, forget_bias = 1.0, state_is_tuple=False) stacked_lstm = rnn_cell.MultiRNNCell([lstm_cell] * RNN_LAYERS, state_is_tuple=False) outputs, states = rnn.rnn(stacked_lstm, x, dtype=tf.float32) return (outputs, states, tf.matmul(outputs[-1], weights['out']) + biases['out'])
def __call__(self, inputs, initial_state=None, dtype=None, sequence_length=None, scope=None): is_list = isinstance(inputs, list) if self._use_dynamic_rnn: if is_list: inputs = array_ops.pack(inputs) outputs, state = rnn.dynamic_rnn( self._cell, inputs, sequence_length=sequence_length, initial_state=initial_state, dtype=dtype, time_major=True, scope=scope) if is_list: # Convert outputs back to list outputs = array_ops.unpack(outputs) else: # non-dynamic rnn if not is_list: inputs = array_ops.unpack(inputs) outputs, state = rnn.rnn(self._cell, inputs, initial_state=initial_state, dtype=dtype, sequence_length=sequence_length, scope=scope) if not is_list: # Convert outputs back to tensor outputs = array_ops.pack(outputs) return outputs, state
def _rnn(self, name, enc_inputs): encoder_cell = rnn_cell.EmbeddingWrapper(self.cell, self.dict_size) _, encoder_states = rnn.rnn(encoder_cell, enc_inputs, dtype=tf.float32) w = tf.get_variable(name + '-w', (self.cell.state_size, self.num_outputs), initializer=tf.random_normal_initializer(stddev=0.1)) b = tf.get_variable(name + 'b', (self.num_outputs,), initializer=tf.constant_initializer()) return tf.matmul(encoder_states[-1], w) + b
def model(self): print('Building model\n') # We don't want to modify to original tensor x = self.x # Reshape input into a list of tensors of the correct size x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, INPUT_SIZE]) # Since we're using one pixel at a time, transform list of vector of # 784x1 x = tf.split(0, STEPS, x) # Define LSTM cells and get outputs list and states gru = rnn_cell.GRUCell(self.num_hid_units) gru = rnn_cell.DropoutWrapper(gru, output_keep_prob=1) if self.num_hid_layers > 1: gru = rnn_cell.MultiRNNCell([gru] * self.num_hid_layers) outputs, state = rnn.rnn(gru, x, dtype=tf.float32) # Turn result back into [batch_size, steps, hidden_units] format. outputs = tf.transpose(outputs, [1, 0, 2]) # Flatten into [batch_size x steps, hidden_units] to allow matrix # multiplication outputs = tf.reshape(outputs, [-1, self.num_hid_units]) # Apply affine transformation to reshape output [batch_size x steps, 1] y1 = tf.matmul(outputs, self.weights_H2O) + self.bias_H2O y1 = tf.reshape(y1, [-1, STEPS]) # Keep prediction (sigmoid applied) and non-sigmoid (apply sigmoid in # cost function) y_ns = y1[:, :783] y_pred = tf.sigmoid(y1)[:, :783] return y_ns, y_pred
def embedding_encoder(encoder_inputs, cell, embedding, num_symbols, embedding_size, bidirectional=False, dtype=None, weight_initializer=None, scope=None): with variable_scope.variable_scope(scope or "embedding_encoder", dtype=dtype) as scope: dtype = scope.dtype # Encoder. if not embedding: embedding = variable_scope.get_variable( "embedding", [num_symbols, embedding_size], initializer=weight_initializer()) emb_inp = [ embedding_ops.embedding_lookup(embedding, i) for i in encoder_inputs ] if bidirectional: _, output_state_fw, output_state_bw = rnn.bidirectional_rnn( cell, cell, emb_inp, dtype=dtype) encoder_state = tf.concat(1, [output_state_fw, output_state_bw]) else: _, encoder_state = rnn.rnn(cell, emb_inp, dtype=dtype) return encoder_state
def RNN(x, weights, biases, type, layer_norm): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define a lstm cell with tensorflow cell_class_map = { "LSTM": rnn_cell.BasicLSTMCell(n_hidden), "GRU": rnn_cell.GRUCell(n_hidden), "BasicRNN": rnn_cell.BasicRNNCell(n_hidden), "LNGRU": LNGRUCell(n_hidden), "LNLSTM": LNBasicLSTMCell(n_hidden), 'HyperLnLSTMCell':HyperLnLSTMCell(n_hidden, is_layer_norm = layer_norm)} lstm_cell = cell_class_map.get(type) cell = rnn_cell.MultiRNNCell([lstm_cell] * FLAGS.layers) print "Using %s model" % type # Get lstm cell output outputs, states = rnn.rnn(cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def model(self): """ Builds the Tensorflow graph :return: """ print('Building model\n') # We don't want to modify to original tensor x = self.x # Reshape input into a list of tensors of the correct size x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, INPUT_SIZE]) # Since we're using one pixel at a time, transform list of vector of # 784x1 x = tf.split(0, STEPS, x) # Define LSTM cells and get outputs list and states lstm = rnn_cell.LSTMCell(self.num_hid_units) lstm = rnn_cell.DropoutWrapper(lstm, output_keep_prob=1) outputs, state = rnn.rnn(lstm, x, dtype=tf.float32) # First affine-transformation - output from last input y1 = tf.matmul(outputs[-1], self.weights_H2L) + self.bias_H2L y2 = tf.nn.relu(y1) y_pred = tf.matmul(y2, self.weights_L2O) + self.bias_L2O return y_pred
def RNN(x): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(0, n_steps, x) # Define a lstm cell with tensorflow lstm_cell = tf.nn.rnn_cell.LSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True) cell = rnn_cell.MultiRNNCell([lstm_cell] * 3, state_is_tuple=True) # Get lstm cell output outputs, states = rnn.rnn(cell, x, dtype=tf.float32) weights_2 = tf.get_variable(name="weights_2", shape=[n_hidden, 2],\ initializer=tf.truncated_normal_initializer()) biases_2 = tf.get_variable(name="biases_2", shape=[2],\ initializer=tf.truncated_normal_initializer()) weights_1 = tf.get_variable(name="weights_1", shape=[2, 1],\ initializer=tf.truncated_normal_initializer()) biases_1 = tf.get_variable(name="biases_1", shape=[1],\ initializer=tf.truncated_normal_initializer()) drawing_layer = tf.sigmoid(tf.matmul(outputs[-1], weights_2) + biases_2) # Linear activation, using rnn inner loop last output return tf.sigmoid(tf.matmul(drawing_layer, weights_1) + biases_1), drawing_layer
def compute_hidden_representation(self, sequences, sequence_lengths, lang): x = tf.transpose(tf.add(tf.nn.embedding_lookup(self.embed_W[lang],sequences),self.embed_b),[1,0,2]) x = tf.reshape(x,[-1,self.embedding_size]) x = tf.split(0,self.max_sequence_length,x) _, states = rnn.rnn(self.encoder_cell, x, dtype = tf.float32, sequence_length = sequence_lengths) return states
def embedding_attention_encoder_seq2seq(enc_inp, cell, num_encoder_symbols, embedding_size): with variable_scope.variable_scope("embedding_attention_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper(cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.rnn(encoder_cell, enc_inp, dtype=dtypes.float32) return encoder_outputs, encoder_state
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, beam_search=True, beam_size=10): """Embedding RNN sequence-to-sequence model. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs RNN decoder, initialized with the last encoder state, on embedded decoder_inputs. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial state for both the encoder and encoder rnn cells (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_rnn_seq2seq" Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) return embedding_rnn_decoder( decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous, beam_search=beam_search, beam_size=beam_size)
def single_lstm(name, incoming, n_units, use_peepholes=True, return_seq=False, return_state=False): with tf.name_scope(name) as scope: cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes) output, _cell_state = rnn.rnn(cell, incoming, dtype=tf.float32) out = output if return_seq else output[-1] return (out, _cell_state) if return_state else out
def RNN(x, weights, biases): x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, n_input]) # reshape to n_steps*batch_size x = tf.split(0, n_steps, x) # Define a lstm ccell with tensorflow lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) outpus, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) return tf.matmul(outpus[-1], weights['out']) + biases['out']
def __inner_predict(self, features): features = tf.transpose(features, [1, 0, 2]) features = tf.reshape(features, [-1, self.n_input]) features = tf.split(0, self.n_steps, features) cell = rnn_cell.BasicLSTMCell(self.n_hidden, forget_bias=1.0) multi_cell = rnn_cell.MultiRNNCell([cell] * self.n_layers) outputs, states = rnn.rnn(multi_cell, features, dtype=tf.float32) return tf.matmul(outputs[-1], self.weights['out']) + self.biases['out']
def RNN(x, weights, biases): x = tf.transpose(x, [1, 0, 2]) x = tf.reshape(x, [-1, n_input]) x = tf.split(0, n_steps, x) cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0) multi_cell = rnn_cell.MultiRNNCell([cell] * n_layers) outputs, states = rnn.rnn(multi_cell, x, dtype=tf.float32) return tf.matmul(outputs[-1], weights['out']) + biases['out']
def __classOptoRNN__(self,_Z1): ''' Reccurent neural network with a classifer (logistic) as output layer that tries to predicted if there was an otpogenetic stimulation in a neuron j. Input will be time serie of neuron(s) i starting at time t and output will be a binary value, where the label is whether x was stimulated or not at t-z. ''' #Defining weights self.weights = { 'classi_HO_W' : varInit([self.nhidclassi,1], 'classi_HO_W', std = 0.01 ) } self.biases = { 'classi_HO_B': varInit([1], 'classi_HO_B', std = 1) } self.masks = { } #classiCell = rnn_cell.BasicLSTMCell(self.nhidclassi) classiCell = rnn_cell.BasicRNNCell(self.nhidclassi, activation = self.actfct) #classiCell = rnn_cell.GRUCell(self.nhidclassi, activation = self.actfct) #INITIAL STATE DOES NOT WORK #initClassi = tf.zeros([self.batchSize,classiCell.state_size], dtype='float32') if self.multiLayer: #Stacking classifier cells stackCell = rnn_cell.MultiRNNCell([classiCell] * self.multiLayer) S = stackCell.zero_state(self._batchSize, tf.float32) with tf.variable_scope("") as scope: for i in range(self.seqLen): if i == 1: scope.reuse_variables() O,S = stackCell(_Z1,S) predCell = tf.matmul(O, self.weights['classi_HO_W']) + \ self.biases['classi_HO_B'] else: #classi O, S = rnn.rnn(classiCell, _Z1, dtype = tf.float32) #Output and state #classi to output layer predCell = tf.matmul(O[-1], self.weights['classi_HO_W']) + \ self.biases['classi_HO_B'] return predCell
def dialog_attention_seq2seq(encoder_inputs, decoder_inputs, cell, vocab_size, num_heads=1, output_projection=None, feed_previous=False, dtype=dtypes.float32, scope=None, initial_state_attention=False): if len(encoder_inputs) != len(decoder_inputs): raise Exception with variable_scope.variable_scope(scope or "dialog_attention_seq2seq"): encoder_cell = rnn_cell.EmbeddingWrapper(cell, vocab_size) outputs = [] fixed_batch_size = encoder_inputs[0][0].get_shape().with_rank_at_least(1)[0] if fixed_batch_size.value: batch_size = fixed_batch_size.value else: batch_size = array_ops.shape(encoder_inputs[0][0])[0] drnn_state = cell.zero_state(batch_size, dtype) for i in range(0, len(encoder_inputs)): if i > 0: variable_scope.get_variable_scope().reuse_variables() encoder_outputs, encoder_state = rnn.rnn( encoder_cell, encoder_inputs[i], dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(1, top_states) with variable_scope.variable_scope("DRNN"): drnn_out, drnn_state = cell(encoder_state, drnn_state) # Decoder. output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, vocab_size) output_size = vocab_size answer_output, answer_state = embedding_attention_decoder( decoder_inputs[i], drnn_state, attention_states, cell, vocab_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention) outputs.append(answer_output) with variable_scope.variable_scope("DRNN", reuse=True): drnn_out, drnn_state = cell(answer_state, drnn_state) return outputs, drnn_state
def recurent_neural_network(x): layer = {'weights':tf.Variable(tf.random_normal( [rnn_size, n_classes] )), 'biases':tf.Variable(tf.random_normal([n_classes]))} x = tf.transpose(x, [ 1, 0, 2]) x = tf.reshape(x, [ -1, chunk_size]) x = tf.split(0, n_chunks, x) lstm_cell = rnn_cell.BasicLSTMCell(rnn_size) outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32) output = tf.matmul(outputs[-1], layer['weights']) + layer['biases'] return output
def baseline_forward(self, X, size, n_class): shape = X.get_shape() _X = tf.transpose(X, [1, 0, 2]) # batch_size x sentence_length x word_length -> batch_size x sentence_length x word_length _X = tf.reshape(_X, [-1, int(shape[2])]) # (batch_size x sentence_length) x word_length seq = tf.split(0, int(shape[1]), _X) # sentence_length x (batch_size x word_length) with tf.name_scope("LSTM"): lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=1.0) outputs, states = rnn.rnn(lstm_cell, seq, dtype=tf.float32) with tf.name_scope("LSTM-Classifier"): W = tf.Variable(tf.random_normal([size, n_class]), name="W") b = tf.Variable(tf.random_normal([n_class]), name="b") output = tf.matmul(outputs[-1], W) + b return output
def RNN(tensor, lens, n_hidden, n_summary, name, reuse): with tf.variable_scope(name, reuse) as scope: # Define weights weights = { 'out': tf.Variable(tf.random_normal([n_hidden, n_summary]), name=name+"_weights") } biases = { 'out': tf.Variable(tf.random_normal([n_summary]), name=name+"_biases") } # Define a lstm cell with tensorflow lstm_cell = rnn_cell.LSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True) # Get lstm cell output outputs, states = rnn.rnn(lstm_cell, tensor, sequence_length=lens, dtype=tf.float32, scope=scope) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def train(self, train_x, train_y, learning_rate=0.05, limit=1000, batch_n=1, seq_len=3, input_n=2, hidden_n=5, output_n=2): self.input_layer = [tf.placeholder("float", [seq_len, input_n]) for i in range(batch_n)] self.label_layer = tf.placeholder("float", [seq_len, output_n]) self.weights = tf.Variable(tf.random_normal([hidden_n, output_n])) self.biases = tf.Variable(tf.random_normal([output_n])) self.lstm_cell = rnn_cell.BasicLSTMCell(hidden_n, forget_bias=1.0) outputs, states = rnn.rnn(self.lstm_cell, self.input_layer, dtype=tf.float32) self.prediction = tf.matmul(outputs[-1], self.weights) + self.biases self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(self.prediction, self.label_layer)) self.trainer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss) initer = tf.initialize_all_variables() train_x = train_x.reshape((batch_n, seq_len, input_n)) train_y = train_y.reshape((seq_len, output_n)) # run graph self.session.run(initer) for i in range(limit): self.session.run(self.trainer, feed_dict={self.input_layer[0]: train_x[0], self.label_layer: train_y})
def build_network(self): with tf.variable_scope('encoder'): z_mean_w = tf.Variable(self.initializer([self._enc_cell.state_size, self.n_latent])) z_mean_b = tf.Variable(tf.zeros([self.n_latent], dtype=tf.float32)) z_logvar_w = tf.Variable(self.initializer([self._enc_cell.state_size, self.n_latent])) z_logvar_b = tf.Variable(tf.zeros([self.n_latent], dtype=tf.float32)) _, enc_state = rnn.rnn(self._enc_cell, self.inputs, dtype=tf.float32) self.z_mean = tf.add(tf.matmul(enc_state, z_mean_w), z_mean_b) self.z_log_var = tf.add(tf.matmul(enc_state, z_logvar_w), z_logvar_b) eps = tf.random_normal((self.batch_size, self.n_latent), 0, 1, dtype=tf.float32) self.z = tf.add(self.z_mean, tf.mul(tf.sqrt(tf.exp(self.z_log_var)), eps)) with tf.variable_scope('decoder') as scope: dec_in_w = tf.Variable(self.initializer([self.n_latent, self._dec_cell.state_size], dtype=tf.float32)) dec_in_b = tf.Variable(tf.zeros([self._dec_cell.state_size], dtype=tf.float32)) dec_out_w = tf.Variable(self.initializer([self.n_hidden, self.elem_num], dtype=tf.float32)) dec_out_b = tf.Variable(tf.zeros([self.elem_num], dtype=tf.float32)) initial_dec_state = self.transfer_func(tf.add(tf.matmul(self.z, dec_in_w), dec_in_b)) dec_out, _ = seq2seq.rnn_decoder(self.inputs, initial_dec_state, self._dec_cell) if self.reverse: dec_out = dec_out[::-1] dec_output = tf.transpose(tf.pack(dec_out), [1, 0, 2]) batch_dec_out_w = tf.tile(tf.expand_dims(dec_out_w, 0), [self.batch_size, 1, 1]) self.output = tf.nn.sigmoid(tf.batch_matmul(dec_output, batch_dec_out_w) + dec_out_b) scope.reuse_variables() dec_gen_input = [0.5 * tf.ones([self.batch_size, self.elem_num], dtype=tf.float32) for _ in range(self.step_num)] self.z_gen = tf.placeholder(tf.float32, [self.batch_size, self.n_latent]) dec_gen_state = self.transfer_func( tf.add(tf.matmul(self.z_gen, dec_in_w), dec_in_b)) dec_gen_out, _ = seq2seq.rnn_decoder( dec_gen_input, dec_gen_state, self._dec_cell) if self.reverse: dec_gen_out = dec_gen_out[::-1] dec_gen_output = tf.transpose(tf.pack(dec_gen_out), [1, 0, 2]) self.gen_output = tf.nn.sigmoid(tf.batch_matmul(dec_gen_output, batch_dec_out_w) + dec_out_b) self.inp = tf.transpose(tf.pack(self.inputs), [1, 0, 2]) self.train_loss = self.get_loss() self.train = tf.train.AdamOptimizer(self.learning_rate).minimize(self.train_loss)
def train(self, train_x, train_y, learning_rate=0.02, epochs=1, batch_n=1, input_n=1, hidden_n=4): seq_n = len(train_x) input_n = len(train_x[0]) output_n = len(train_y[0]) # self.input_layer = tf.placeholder(tf.float32, in_shape) self.inputs = tf.placeholder(tf.float32, [batch_n, input_n]) self.label_layer = tf.placeholder(tf.float32, [output_n]) self.input_layer = [tf.reshape(i, (1, input_n)) for i in tf.split(0, batch_n, self.inputs)] self.weights = tf.Variable(tf.random_normal([hidden_n, output_n])) self.biases = tf.Variable(tf.random_normal([output_n])) self.lstm_cell = rnn_cell.BasicLSTMCell(hidden_n, forget_bias=1.0) outputs, states = rnn.rnn(self.lstm_cell, self.input_layer, dtype=tf.float32) self.prediction = tf.matmul(outputs[-1], self.weights) + self.biases #self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(self.prediction, self.label_layer)) #self.trainer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss) self.loss = tf.reduce_mean(tf.square(self.prediction - self.label_layer)) self.trainer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(self.loss) initer = tf.global_variables_initializer() writer = tf.train.SummaryWriter("./graph-rnn", self.session.graph) tf.scalar_summary("loss", self.loss) tf.scalar_summary("prediction", self.prediction[0][0]) #tf.scalar_summary("label", self.label_layer[0]) #tf.scalar_summary("input0", self.input_layer[0][0][0]) merged_summary = tf.merge_all_summaries() self.session.run(initer) total_seq = seq_n - batch_n for epoch in range(epochs): for idx in range(0, total_seq): input_x = train_x[idx:idx + batch_n] output_y = train_y[idx] feed_dict = {self.inputs: input_x, self.label_layer: output_y} _, summary = self.session.run([self.trainer, merged_summary], feed_dict=feed_dict) #progressbar(idx+1, seq_n - batch_n) writer.add_summary(summary, idx+epoch*total_seq)
def my_rnn(inp, ids, cell, length, embeddings, rev=False, init_state=None): if ids: E = tf.get_variable("E_w", initializer=tf.identity(embeddings), trainable=True) if inp: inp = tf.concat(2, [tf.nn.embedding_lookup(E, ids), inp]) else: inp = tf.nn.embedding_lookup(E, ids) if init_state is None: init_state = tf.get_variable("init_state", [cell.state_size], tf.float32) batch_size = tf.gather(tf.shape(inp), [1]) init_state = tf.tile(init_state, batch_size) init_state = tf.reshape(init_state, [-1, cell.state_size]) inps = tf.split(0, length, inp) for i in range(length): inps[i] = tf.squeeze(inps[i], [0]) _, final_state = rnn(cell, inps, init_state, sequence_length=lengths) out = tf.slice(final_state, [0, 0], [-1, cell.output_size]) return out
def _build_rnn(self): word_emb_seq = [] # Unroll for up to the maximum sequence length for i in range(self.max_seq_len): input_slice = _slice_column_2d(self.input, i) gather_node = tf.gather(self.word_embs, input_slice) word_emb_seq.append(gather_node) encoder_outputs, encoder_states = rnn.rnn(self.cell, word_emb_seq, sequence_length=self.input_lengths, dtype=tf.float32) # Combine the outputs into a (max_seq_len, batch_size, output_size) dim. tensor. outputs_by_time = tf.pack(encoder_outputs) # Return a slice outputs_by_time[max_nonzero_index, :, :], eliminating the first dimension max_nonzero_index = tf.reduce_max(self.input_lengths) - 1 begin = tf.concat(0, [tf.expand_dims(max_nonzero_index, 0), [0], [0]]) states_by_batch = tf.squeeze(tf.slice(outputs_by_time, begin, [1, -1, -1])) return states_by_batch
def attention_enc2enc_with_embedding(encoder_inputs, decoder_inputs, embedding, cell, args, num_heads=1, dtype=dtypes.float32, scope=None): """ We initialize all attentions as zeroes Parameters ---------- encoder_inputs decoder_inputs embedding cell args initial_state: 2D Tensor (batch_size x cell.state_size) num_heads: Number of attention heads that read from attention_states. We want this to be covering all embedding states (instead of just 1) dtype scope initial_state_attention Returns ------- """ with vs.variable_scope(scope or "attention_enc2enc_with_embedding"): # Tensor shape of [batch_size, max_time, cell.input_size = embedding_size] em_encoder_input = add_word_embedding(encoder_inputs, args.time_steps, embedding) encoder_outputs, encoder_state = rnn.rnn(cell, em_encoder_input, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(1, top_states) return attention_encoder(decoder_inputs, encoder_state, attention_states, cell, num_heads, dtype=dtypes.float32, scope=None, initial_state_attention=False)