def bi_lstm_class(input_, n_hidden=256, n_steps=32, n_input=54, num_class=10, name='class_bi_lstm'): with tf.variable_scope(name): input_x = tf.unstack(input_, n_steps, 1) lstm_fw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0) lstm_bw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0) x = [] for i in range(n_steps - 1): x.append(tf.concat([input_x[i], input_x[i + 1] - input_x[i]], 1)) try: outputs, _, _ = rnn.stack_bidirectional_rnn([lstm_fw_cell], [lstm_bw_cell], x, dtype=tf.float32) except Exception: outputs = rnn.stack_bidirectional_rnn([lstm_fw_cell], [lstm_bw_cell], x, dtype=tf.float32) h = tf.concat(outputs, 1) h, h_w, h_b = linear(h, 1024, 'd_h3_lin', with_w=True) h = tf.nn.relu(h) h, h_w, h_b = linear(h, num_class, 'd_h4_lin', with_w=True) return h
def bidirectional_lstm(input_, cond, n_hidden=256, n_steps=32, n_input=54, name='bidirec_lstm'): with tf.variable_scope(name): print('new_lstm discrim') # weights = tf.get_variable('weights', [4096, 1], # initializer=tf.random_normal_initializer(stddev=0.02)) # biases = tf.get_variable('biases', [1], initializer=tf.constant_initializer(0.0)) # Prepare data shape to match `bidirectional_rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) input_x = tf.unstack(input_, n_steps, 1) # print(image.shape)s # print('-----------------------------------x shape: ', x[0].get_shape()) # Calculate shifts x = [] for i in range(n_steps - 1): x.append( tf.concat([input_x[i], input_x[i + 1] - input_x[i], cond], 1)) # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0) # Get lstm cell output try: outputs, _, _ = rnn.stack_bidirectional_rnn([lstm_fw_cell], [lstm_bw_cell], x, dtype=tf.float32) except Exception: # Old TensorFlow version only returns outputs not states outputs = rnn.stack_bidirectional_rnn([lstm_fw_cell], [lstm_bw_cell], x, dtype=tf.float32) h = tf.concat(outputs, 1) h, h_w, h_b = linear(h, 1024, 'd_h3_lin', with_w=True) h = tf.nn.relu(h) h, h_w, h_b = linear(h, 1, 'd_h4_lin', with_w=True) return h
def __call__(self, inputs, name, training=False): """ Runs the bidirectional LSTM, produces outputs and saves both forward and backward states as well as gradients. :param inputs: The inputs should be a list of shape [sequence_length, batch_size, 64]1 :param name: Name to give to the tensorflow op :param training: Flag that indicates if this is a training or evaluation stage :return: Returns the LSTM outputs, as well as the forward and backward hidden states. """ with tf.name_scope('bid-lstm' + name), tf.variable_scope( 'bid-lstm', reuse=self.reuse): with tf.variable_scope("encoder"): fw_lstm_cells_encoder = [ rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh) for i in range(len(self.layer_sizes)) ] bw_lstm_cells_encoder = [ rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh) for i in range(len(self.layer_sizes)) ] outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_rnn( fw_lstm_cells_encoder, bw_lstm_cells_encoder, inputs, dtype=tf.float32) print("out shape", tf.stack(outputs, axis=0).get_shape().as_list()) with tf.variable_scope("decoder"): fw_lstm_cells_decoder = [ rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh) for i in range(len(self.layer_sizes)) ] bw_lstm_cells_decoder = [ rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh) for i in range(len(self.layer_sizes)) ] outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_rnn( fw_lstm_cells_decoder, bw_lstm_cells_decoder, outputs, dtype=tf.float32) self.reuse = True self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='bid-lstm') return outputs, output_state_fw, output_state_bw
def BiRNN(X, weights, biases): x = tf.unstack(x, n_steps, 1) lstm_tw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) lstm_bw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) try: outputs, _, _ = rnn.stack_bidirectional_rnn(lstm_bw_cell, lstm_tw_cell, x, dtype=tf.float32) except: outputs = rnn.stack_bidirectional_rnn(lstm_tw_cell, lstm_bw_cell, x, dtypr=tf.float32)
def BiRNN(x, weights, bias): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, num_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input) x = tf.unstack(x, timesteps, axis=1) # Define lstm cells with tensorflow # Forward direction cell lstm_fw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) # Backward direction cell lstm_bw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) # Get lstm cell output try: outputs, _, _ = rnn.static_bidirectional_rnn(cell_fw=lstm_fw_cell, cell_bw=lstm_bw_cell, inputs=x, dtype=tf.float32) except Exception: # old tensorflow version will only returns outputs not states outputs = rnn.stack_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def __call__(self, inputs, name, training=False): """ Runs the bidirectional LSTM, produces outputs and saves both forward and backward states as well as gradients. :param inputs: The inputs should be a list of shape [timestep_size, batch_size, length] :param name: Name to give to the tensorflow op :param training: Flag that indicates if this is a training or evaluation stage :return: Returns the LSTM outputs """ # print (inputs.shape, 'lstm inputs.shape') with tf.name_scope('bid-lstm' + name), tf.variable_scope('bid-lstm', reuse=self.reuse): fw_lstm_cells = [rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh) for i in range(len(self.layer_sizes))] bw_lstm_cells = [rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh) for i in range(len(self.layer_sizes))] outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_rnn( fw_lstm_cells, bw_lstm_cells, inputs, dtype=tf.float32 ) # print (outputs.shape, 'lstm outputs.shape') self.reuse = True self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='bid-lstm') return outputs
def __common_model(self, X, timesteps, num_hidden, layers, w1, b1): x = tf.unstack(X, timesteps, 1) fw_lstm_cells_encoder = [ self.__lstm_cell(num_hidden) for i in range(layers) ] bw_lstm_cells_encoder = [ self.__lstm_cell(num_hidden) for i in range(layers) ] outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_rnn( fw_lstm_cells_encoder, bw_lstm_cells_encoder, x, dtype=tf.float32) #Checking size print("outputs len:", len(outputs)) print("outputs[0].shape:", outputs[0].shape) outputs = tf.reshape(outputs, [timesteps, -1, num_hidden * 2]) print("R_outputs[0].shape:", outputs.shape) # Sort, first batch dimension sorted_outputs = tf.transpose(outputs, (1, 0, 2)) print("sorted_outputs.shape:", sorted_outputs) # list is reshaped in order to multiply with the matrix ######################################batch * timesteps, num_hidden * 2 outputs = tf.reshape(sorted_outputs, [-1, num_hidden * 2]) # Vector Z is calculated return tf.matmul( outputs, w1) + b1 # batch * timesteps, self.num_input * self.d_vector
def g_embedding_biLSTM(self,inputs,reuse=False,layers=False,num=None): if layers : layer_sizes = [32 for i in range(num)] #layer_sizes = [ 32 , 32 , 32 ,32] else : layer_sizes = [ 32 ] with tf.variable_scope('encoder',reuse=reuse) as scope: if reuse: scope.reuse_variables() #fw_lstm_cells_encoder = [rnn.LSTMCell(num_units=layer_sizes[i], activation=tf.nn.tanh) # for i in range(len(layer_sizes))] #bw_lstm_cells_encoder = [rnn.LSTMCell(num_units=layer_sizes[i], activation=tf.nn.tanh) # for i in range(len(layer_sizes))] fw_lstm_cells_encoder = [self._lstm_cell() for i in range(len(layer_sizes))] bw_lstm_cells_encoder = [self._lstm_cell() for i in range(len(layer_sizes))] outputs ,outputs_state_fw , outputs_state_bw = rnn.stack_bidirectional_rnn(fw_lstm_cells_encoder,bw_lstm_cells_encoder,inputs,dtype=tf.float32) self.g_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope='encoder') return outputs
def __call__(self, inputs, name, training=False): # 只要定义类型的时候,实现__call__函数,这个类型就成为可调用的,相当于重载了括号运算符 # lstm = BidirectionalLSTM():执行__init__ # lstm():再次调用()里是(self, inputs, name, training=False):执行__call__ """ Runs the bidirectional LSTM, produces outputs and saves both forward and backward states as well as gradients. :param inputs: The inputs should be a list of shape [sequence_length, batch_size, 64] :param name: Name to give to the tensorflow op :param training: Flag that indicates if this is a training or evaluation stage :return: Returns the LSTM outputs, as well as the forward and backward hidden states. """ with tf.name_scope('bid-lstm' + name), tf.variable_scope( 'bid-lstm', reuse=self.reuse): fw_lstm_cells = [ rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh) for i in range(len(self.layer_sizes)) ] bw_lstm_cells = [ rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh) for i in range(len(self.layer_sizes)) ] outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_rnn( fw_lstm_cells, bw_lstm_cells, inputs, dtype=tf.float32) self.reuse = True self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='bid-lstm') return outputs, output_state_fw, output_state_bw
def __call__(self, inputs, name): """" 只要定义类型的时候,实现__call__函数,这个类型就成为可调用的,相当于重载了括号运算符 lstm = BidirectionalLSTM():执行__init__ lstm():再次调用()里是(self, inputs, name, training=False):执行__call__ """ with tf.name_scope('bid_lstm' + name), tf.variable_scope( 'bid_lstm', reuse=self.reuse): fw_lstm_cells = [ rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh) for i in range(len(self.layer_sizes)) ] bw_lstm_cells = [ rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh) for i in range(len(self.layer_sizes)) ] # 双向LSTM outputs是最后相加前向反向的输出 fw:前向lstm输出 bw反向lstm输出 outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_rnn( fw_lstm_cells, bw_lstm_cells, inputs, dtype=tf.float32) self.reuse = True # 共享lstm self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='bid-lstm') # print(outputs.shape, output_state_fw.shape, output_state_bw.shape) return outputs, output_state_fw, output_state_bw
def simple_stack_bilstm(x,layers,num_hidden,timesteps,name): # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input) x = tf.unstack(x, timesteps, 1) # Define lstm cells with tensorflow lstm_fw_cell = [rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, reuse=tf.AUTO_REUSE, name=name+"_f_"+str(i)) for i in range(layers)] lstm_bw_cell = [rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, reuse=tf.AUTO_REUSE, name=name+"_b_"+str(i)) for i in range(layers)] outputs, _, _ = rnn.stack_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) #print(type(outputs)) return outputs
def build_model(self, metadata_path=None, embedding_weights=None): self.embedding_weights, self.config = ops.embedding_layer( metadata_path[0], embedding_weights[0]) self.pos_embedding_weights, self.config = ops.embedding_layer( metadata_path[1], embedding_weights[1], name='pos_embedding') self.embedded_input = tf.nn.embedding_lookup(self.embedding_weights, self.input) self.embedded_pos = tf.nn.embedding_lookup(self.pos_embedding_weights, self.pos) self.merged_input = tf.concat([self.embedded_input, self.embedded_pos], axis=-1) cells_fw, cells_bw = [], [] for layer in range(self.args['rnn_layers']): cells_fw.append( tf.contrib.rnn.LSTMCell(self.args['hidden_units'], state_is_tuple=True)) cells_bw.append( tf.contrib.rnn.LSTMCell(self.args['hidden_units'], state_is_tuple=True)) self.rnn_output, _, _ = stack_bidirectional_rnn( cells_fw, cells_bw, tf.unstack(tf.transpose(self.merged_input, perm=[1, 0, 2])), dtype=tf.float32, sequence_length=self.input_lengths) weight, bias = self.weight_and_bias(2 * self.args['hidden_units'], self.args['n_classes']) self.rnn_output = tf.reshape( tf.transpose(tf.stack(self.rnn_output), perm=[1, 0, 2]), [-1, 2 * self.args['hidden_units']]) self.rnn_output = dropout(self.rnn_output, keep_prob=self.args['dropout']) logits = tf.matmul(self.rnn_output, weight) + bias prediction = tf.nn.softmax(logits) self.prediction = tf.reshape( prediction, [-1, self.args.get("sequence_length"), self.args['n_classes']]) open_targets = tf.reshape(self.output, [-1, self.args['n_classes']]) with tf.name_scope("loss"): #self.loss = self.cost() self.loss = tf.losses.softmax_cross_entropy(open_targets, logits) if self.args["l2_reg_beta"] > 0.0: self.regularizer = ops.get_regularizer( self.args["l2_reg_beta"]) self.loss = tf.reduce_mean(self.loss + self.regularizer) with tf.name_scope('accuracy'): self.correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(open_targets, 1)) self.accuracy = tf.reduce_mean( tf.cast(self.correct_prediction, tf.float32))
def biRNN(): inputs = tf.reshape(X, [-1], name='flattened_input') embedded_inputs = tf.nn.embedding_lookup(word_embeddings, inputs) embedded_inputs = tf.reshape(embedded_inputs, [-1, time_steps, 128]) embedded_inputs = tf.unstack(embedded_inputs, time_steps, 1) lstm_layer_fw = rnn.BasicLSTMCell(num_units, forget_bias=1) lstm_layer_bw = rnn.BasicLSTMCell(num_units, forget_bias=1) conc_outputs,final_state_fw,final_state_bw=rnn.stack_bidirectional_rnn(lstm_layer_fw,lstm_layer_bw,embedded_inputs,\ dtype='float32') return tf.concat(final_state_fw, final_state_bw)
def BiRNN(x, weight, biases): x = tf.transpose(x, [1, 0, 2]) #以什么为分割标准,就放在第一个 x = tf.reshape(x, [-1, n_input]) # x = tf.split(x, n_steps) # axis = 0 # 这样分割后,x->[n_steps, batch_size, n_input] #针对stack_bidireactional_rnn的特别输入 lstm_fw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) lstm_bw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) outputs, _, _ = rnn.stack_bidirectional_rnn(lstm_bw_cell, lstm_fw_cell, x, dtype=tf.float32) #output [n_steps, batch_size, 2 * hidden_size] 这里取最后一个时刻的状态作为 return tf.matmul(outputs[-1], weight) + biases
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.unstack(x, n_steps, 1) # Define a lstm cell with tensorflow #lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) #lstm_cell = rnn.GRUCell(n_hidden) lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse) #lstm_cell_bk = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse) lstm_cell_bk = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0 ) # make the deep rnn no_of_layers = 3 # layer number of drnn stacked_lstm = rnn.MultiRNNCell( [ #rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse) rnn.LSTMCell(n_hidden, use_peepholes=True, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse) for _ in range(no_of_layers) ] ) stacked_lstm_bk = rnn.MultiRNNCell( [ #rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse) rnn.LSTMCell(n_hidden, use_peepholes=True, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse) for _ in range(no_of_layers) ] ) # providing the dropout for rnn #lstm_cell = rnn.DropoutWrapper(lstm_cell, output_keep_prob=0.5) # for rnn stacked_lstm = rnn.DropoutWrapper(stacked_lstm, output_keep_prob=0.5) # for deep rnn # Get lstm cell output #outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # single layer rnn #outputs, states = rnn.static_rnn(stacked_lstm,, x, dtype=tf.float32) # deep rnn #outputs, states, states_bk = rnn.static_bidirectional_rnn(lstm_cell, lstm_cell_bk, x, dtype=tf.float32) # single layer dirnn #outputs, states, states_bk = rnn.static_bidirectional_rnn(stacked_lstm, stacked_lstm_bk, x, dtype=tf.float32) # deep dirnn outputs, states, states_bk = rnn.stack_bidirectional_rnn([rnn.GRUCell(n_hidden) for _ in range(no_of_layers)], [rnn.GRUCell(n_hidden) for _ in range(no_of_layers)], x, dtype=tf.float32) # deep dirnn # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out']
def inference(self, inputs, seq_len=None, reuse=False): """ Inputs ------ inputs : float, shape=[batch_size, seq_length=100, PLAYERS=11, COLS=98, ROWS=46] real(from data) or fake(from G) seq_len : temparily not used Return ------ decision : bool real(from data) or fake(from G) """ with tf.variable_scope('D', reuse=reuse) as scope: # unstack, axis=1 -> [batch, time, feature] print(inputs) inputs = tf.transpose(inputs, perm=[0, 1, 3, 4, 2]) print(inputs) inputs = tf.unstack(inputs, num=self.seq_length, axis=1) blstm_input = [] output_list = [] for time_step in range(self.seq_length): with tf.variable_scope('conv') as scope: if time_step > 0: tf.get_variable_scope().reuse_variables() filters_list = [32, 64, 128, 256] next_input = inputs[time_step] for i in range(len(filters_list)): with tf.variable_scope('conv' + str(i)) as scope: conv = layers.conv2d( inputs=next_input, num_outputs=filters_list[i], kernel_size=[5, 5], stride=2, padding='SAME', activation_fn=tf.nn.relu, weights_initializer=layers.xavier_initializer( uniform=False), weights_regularizer=None, biases_initializer=tf.zeros_initializer(), reuse=scope.reuse, scope=scope) next_input = conv with tf.variable_scope('fc') as scope: flat_input = layers.flatten(next_input) fc = layers.fully_connected( inputs=flat_input, num_outputs=self.hidden_size, activation_fn=tf.nn.relu, weights_initializer=layers.xavier_initializer( uniform=False), biases_initializer=tf.zeros_initializer(), reuse=scope.reuse, scope=scope) blstm_input.append(fc) with tf.variable_scope('stack_blstm') as scope: stack_blstm, _, _ = rnn.stack_bidirectional_rnn( cells_fw=[ self.__lstm_cell() for _ in range(self.rnn_layers) ], cells_bw=[ self.__lstm_cell() for _ in range(self.rnn_layers) ], inputs=blstm_input, dtype=tf.float32, sequence_length=seq_len) with tf.variable_scope('output') as scope: for i, out_blstm in enumerate(stack_blstm): if i > 0: tf.get_variable_scope().reuse_variables() with tf.variable_scope('fc') as scope: fc = layers.fully_connected( inputs=out_blstm, num_outputs=1, activation_fn=self.__leaky_relu, weights_initializer=layers.xavier_initializer( uniform=False), biases_initializer=tf.zeros_initializer(), reuse=scope.reuse, scope=scope) output_list.append(fc) # stack, axis=1 -> [batch, time, feature] decisions = tf.stack(output_list, axis=1) print('decisions', decisions) decision = tf.reduce_mean(decisions, axis=1) print('decision', decision) return decision
def __init__(self, args, training=True): self.args = args if not training: args.batch_size = 1 if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell elif args.model == 'nas': cell_fn = rnn.NASCell else: raise Exception("model type not supported: {}".format(args.model)) #list of num_layers forward cells. Each cell is an unrollable RNN of variable length. self.cells_fw = [] for _ in range(args.num_layers): cell = cell_fn( args.rnn_size) #rnn_size is the dimension of the hidden layer if training and (args.output_keep_prob < 1.0 or args.input_keep_prob < 1.0): cell = rnn.DropoutWrapper( cell, input_keep_prob=args.input_keep_prob, output_keep_prob=args.output_keep_prob) self.cells_fw.append( cell) #cells is num_layers of cell stacked together #list of backward cells self.cells_bw = [] for _ in range(args.num_layers): cell = cell_fn(args.rnn_size) if training and (args.output_keep_prob < 1.0 or args.input_keep_prob < 1.0): cell = rnn.DropoutWrapper( cell, input_keep_prob=args.input_keep_prob, output_keep_prob=args.output_keep_prob) self.cells_bw.append(cell) #placeholder for input data self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) #define initial hideen states of each cell as all the default zero_state self.initial_states_fw = tuple([ self.cells_fw[i].zero_state(args.batch_size, tf.float32) for i in range(args.num_layers) ]) self.initial_states_bw = tuple([ self.cells_bw[i].zero_state(args.batch_size, tf.float32) for i in range(args.num_layers) ]) #We define an embedding. This is a look-up table for every item in the vocabulary, for a rnn_size-dimensional hidden vector. #This embedding will be learned over time as a part of back-propagation. embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) #we look up our examples in the embedding to expand the input to rnn_size dimensions. inputs = tf.nn.embedding_lookup(embedding, self.input_data) # dropout beta testing: double check which one should affect next line if training and args.output_keep_prob: inputs = tf.nn.dropout(inputs, args.output_keep_prob) #split the input items one by one. If char_level, split everything into letters. If word_level, split into words. inputs = tf.split(inputs, args.seq_length, 1) #inputs is a length seq_length list of batch_size x rnn_size tensors inputs = [tf.squeeze(input_, [1]) for input_ in inputs ] #get rid of the 1-dimension at axis 1, flatten #define bidirectional_rnn layer #outputs: batch_size x rnn_size and there are seq_length number of outputs. Outputs at every step! self.outputs, self.final_state_fw, self.final_state_bw = rnn.stack_bidirectional_rnn( self.cells_fw, self.cells_bw, inputs, self.initial_states_fw, self.initial_states_bw, tf.float32, scope="rnnlm")
def __D(self, inputs, seq_len=None, reuse=False): """ Inputs ------ inputs : float, shape=[batch, length, 272] real(from data) or fake(from G) seq_len : temparily not used Return ------ decision : bool real(from data) or fake(from G) """ with tf.variable_scope('D', reuse=reuse) as scope: # unstack, axis=1 -> [batch, time, feature] inputs = tf.unstack(inputs, num=self.seq_length, axis=1) blstm_input = [] output_list = [] with tf.variable_scope('fully_connect_input') as scope: for time_step in range(self.seq_length): if time_step > 0: tf.get_variable_scope().reuse_variables() fully_connect_input = layers.fully_connected( inputs=inputs[time_step], num_outputs=self.hidden_size, activation_fn=self.__leaky_relu, weights_initializer=layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(), scope=scope) self.__summarize('fully_connect_input', fully_connect_input, collections=[ 'D'], postfix='Activation') blstm_input.append(fully_connect_input) with tf.variable_scope('stack_bi_lstm') as scope: out_blstm_list, _, _ = rnn.stack_bidirectional_rnn( cells_fw=[self.__lstm_cell() for _ in range(self.rnn_layers)], cells_bw=[self.__lstm_cell() for _ in range(self.rnn_layers)], inputs=blstm_input, dtype=tf.float32, sequence_length=seq_len, scope=scope ) with tf.variable_scope('fully_connect') as scope: for i, out_blstm in enumerate(out_blstm_list): self.__summarize('out_blstm', out_blstm, collections=[ 'D'], postfix='Activation') if i > 0: tf.get_variable_scope().reuse_variables() fconnect = layers.fully_connected( inputs=out_blstm, num_outputs=1, activation_fn=self.__leaky_relu, weights_initializer=layers.xavier_initializer( uniform=False), biases_initializer=tf.zeros_initializer(), scope=scope) self.__summarize('fconnect', fconnect, collections=[ 'D'], postfix='Activation') output_list.append(fconnect) # print(output_list) # stack, axis=1 -> [batch, time, feature] decisions = tf.stack(output_list, axis=1) print('decisions', decisions) decision = tf.reduce_mean(decisions, axis=1) print('decision', decision) return decision
lstm_bw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) #outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x1, # dtype=tf.float32) #outputs, _, _ = rnn.stack_bidirectional_rnn([lstm_fw_cell],[lstm_bw_cell], x1, # dtype=tf.float32) stacked_rnn = [] stacked_bw_rnn = [] for i in range(3): stacked_rnn.append(tf.contrib.rnn.LSTMCell(n_hidden)) stacked_bw_rnn.append(tf.contrib.rnn.LSTMCell(n_hidden)) outputs, _, _ = rnn.stack_bidirectional_rnn(stacked_rnn, stacked_bw_rnn, x1, dtype=tf.float32) pred = tf.contrib.layers.fully_connected(outputs[-1], n_classes, activation_fn=None) # Define loss and optimizer cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Evaluate model correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
def __init__(self, conf, pre_word2vec=None, activate_fn=tf.nn.tanh): pretrained_w2v = conf.pretrained_w2v max_position = conf.max_position pos_dim = conf.pos_dim num_relation = conf.num_relation len_sentence = conf.len_sentence num_hidden = conf.num_hidden batch_size = conf.batch_size reg_weight = conf.reg_weight network_type = conf.network_type # CNN-specific config settings num_filters = conf.num_filters filter_sizes = [2, 3, 4] word_embedding_dim = 50 #subject to change # Note that the first dimension of input_sentence and input_y are different because each row in input_y is per # triple, whereas each row in input_sentences corresponds to a single sentence and a triple consists of multiple # sentences. We use input_triple_index to align sentences with corresponding label # for example, label of input_sentences[input_triple_index[0]:input_triple_index[1]] is input[0] self.input_sentences = tf.placeholder(dtype=tf.int32, shape=[None, len_sentence], name='input_sentence') self.input_pos1 = tf.placeholder(dtype=tf.int32, shape=[None, len_sentence], name='input_position1') self.input_pos2 = tf.placeholder(dtype=tf.int32, shape=[None, len_sentence], name='input_position2') self.input_y = tf.placeholder(dtype=tf.int32, shape=[None, num_relation], name='input_y') self.input_triple_index = tf.placeholder(dtype=tf.int32, shape=[None], name='input_triple_index') num_sentences = self.input_triple_index[-1] with tf.device('/gpu:1'): if pretrained_w2v: self.word2vec = tf.get_variable(initializer=pre_word2vec, name="word_embedding") else: self.word2vec = tf.get_variable( shape=[conf.voca_size, conf.word_embedding_dim], name="word_embedding") self.pos2vec1 = tf.get_variable(shape=[max_position, pos_dim], name="pos2vec1") self.pos2vec2 = tf.get_variable(shape=[max_position, pos_dim], name="pos2vec2") # concatenate word embedding + position embeddings # input_forward.shape = [num_sentence, len_sentence, w2v_dim+2*conf.pos_dim] input_forward = tf.concat([ tf.nn.embedding_lookup(self.word2vec, self.input_sentences), tf.nn.embedding_lookup(self.pos2vec1, self.input_pos1), tf.nn.embedding_lookup(self.pos2vec2, self.input_pos2) ], 2) if network_type == 'cnn': with tf.device('/gpu:1'): #input_forward = tf.unstack(input_forward, len_sentence, 1) input_forward = tf.expand_dims( input_forward, -1) #as conv2d expects 4 rank input pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): filter_shape = [ filter_size, word_embedding_dim, 1, num_filters ] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") #Convolution layer conv = tf.nn.conv2d(input_forward, W, strides=[1, 1, 1, 1], padding="SAME", name="conv") #Activation function (ReLu) layer nl = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") #Max-pooling layer pooled = tf.nn.max_pool( nl, ksize=[1, len_sentence - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding="SAME", name="pool") pooled_outputs.append(pooled) # Combine all pooled features num_filters_total = num_filters * len(filter_sizes) self.pool = tf.concat(pooled_outputs, 3) # m dim : input forward (?) sentence_embedding = tf.reshape(self.pool, [-1, num_filters_total]) sentence_embedding = tf.nn.dropout(sentence_embedding, 0.5) #num_hidden = 16 #(sentence_embedding n dimension) h_sentence = sentence_embedding num_hidden = num_filters_total elif network_type == 'rnn': with tf.variable_scope("RNN"): def create_rnn_cells(num_units): """return list of rnn cells""" cells = [ rnn.GRUCell(num_units, activation=activate_fn) for _ in range(conf.num_layer) ] if conf.dropout and conf.is_train: return [rnn.DropoutWrapper(cell) for cell in cells] else: return cells input_forward = tf.unstack(input_forward, len_sentence, 1) # construct rnn with high-level api if conf.bidirectional: output_rnn, _, _ = rnn.stack_bidirectional_rnn( create_rnn_cells(num_hidden), create_rnn_cells(num_hidden), input_forward, dtype=tf.float32) num_hidden = 2 * num_hidden # dimension of concatenated fw-bw outputs output_hidden = tf.reshape( tf.concat(output_rnn, 1), [num_sentences, len_sentence, num_hidden]) else: output_rnn, _ = rnn.static_rnn( create_rnn_cells(num_hidden)[0], input_forward, dtype=tf.float32) output_hidden = tf.reshape( tf.concat(output_rnn, 1), [num_sentences, len_sentence, num_hidden]) # word-level attention layer, represent a sentence as a weighted sum of word vectors with tf.variable_scope("word-attn"): if conf.word_attn: word_attn = tf.get_variable('W', shape=[num_hidden, 1]) word_weight = tf.matmul( tf.reshape( output_hidden, [num_sentences * len_sentence, num_hidden]), word_attn) word_weight = tf.reshape(word_weight, [num_sentences, len_sentence]) sentence_embedding = tf.matmul( tf.reshape(tf.nn.softmax(word_weight), [num_sentences, 1, len_sentence]), output_hidden) sentence_embedding = tf.reshape( sentence_embedding, [num_sentences, num_hidden]) else: sentence_embedding = tf.reduce_mean(output_hidden, 1) with tf.variable_scope("fc-hidden"): h_sentence = tf.layers.dense(sentence_embedding, num_hidden, activation=activate_fn, name='fc-hidden') # sentence-level attention layer, represent a triple as a weighted sum of sentences with tf.device('/gpu:1'), tf.variable_scope("sentence-attn"): attn_weight = tf.get_variable("W", shape=[num_hidden, 1]) if conf.use_multiplier: multiplier = tf.get_variable("A", shape=[num_hidden]) triple_embeddings = list() for i in range(batch_size): target_sentences = h_sentence[self.input_triple_index[i]:self. input_triple_index[i + 1]] if conf.sent_attn: num_triple_sentence = self.input_triple_index[ i + 1] - self.input_triple_index[i] if conf.use_multiplier: tmp = tf.multiply(target_sentences, multiplier) else: tmp = target_sentences sentence_weight = tf.reshape( tf.nn.softmax( tf.reshape(tf.matmul(tmp, attn_weight), [num_triple_sentence])), [1, num_triple_sentence]) triple_embedding = tf.squeeze( tf.matmul(sentence_weight, target_sentences)) # [num_hidden] else: # use mean vector if sentence-level attention layer is not used triple_embedding = tf.squeeze( tf.reduce_mean(target_sentences, 0)) triple_embeddings.append(triple_embedding) triple_embeddings = tf.reshape(triple_embeddings, [-1, num_hidden]) triple_output = tf.layers.dense(triple_embeddings, num_relation, name='fc-output') # Optmization preparation step self.prob = tf.nn.softmax(triple_output) self.predictions = tf.argmax(self.prob, axis=1, name="predictions") self.total_loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits(logits=triple_output, labels=self.input_y), name="loss") self.accuracy = tf.reduce_mean(tf.cast( tf.equal(self.predictions, tf.argmax(self.input_y, 1)), "float"), name="accuracy") tf.summary.scalar("loss", self.total_loss) # regularization self.l2_loss = tf.contrib.layers.apply_regularization( regularizer=tf.contrib.layers.l2_regularizer(reg_weight), weights_list=tf.trainable_variables()) self.final_loss = self.total_loss + self.l2_loss tf.summary.scalar("l2_loss", self.l2_loss) tf.summary.scalar("final_loss", self.final_loss)
n_input = 28 #MNIST 数据输入(img: 28 * 28) n_steps = 28 #步骤序列 n_hidden = 128 #隐藏层个数 n_classes = 10 #MNIST 总类别 (0 ~ 9) tf.reset_default_graph() #定义占位符 x = tf.placeholder('float',[None,n_steps,n_input]) y = tf.placeholder('float',[None,n_classes]) x1 = tf.unstack(x,n_steps,1) lstm_fw_cell = rnn.BasicLSTMCell(n_hidden,forget_bias = 1.0) #反向cell lstm_bw_cell = rnn.BasicLSTMCell(n_hidden,forget_bias = 1.0) outputs,_ ,_ = rnn.stack_bidirectional_rnn([lstm_fw_cell],[lstm_bw_cell],x1,dtype = tf.float32) print(outputs[0].shape,len(outputs)) pred = tf.contrib.layers.fully_connected(outputs[-1],n_classes,activation_fn = None) #定义参数 learning_rate = 0.01 batch_size = 128 time_steps = 28 #损失函数 交叉熵 P107页,最后一行 cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,labels=y))#tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred),reduction_indices = 1)) #等价于 #cost = tf.nn.softmax_cross_entropy_with_logits(labels = y,logits=pred) #使用梯度下降优化器
x = tf.placeholder('float', [None, n_steps*n_input]) y = tf.placeholder('float', [None, n_classes]) x1 = tf.reshape(x, [-1, 28, 28]) x1 = tf.unstack(x1, n_steps, 1) stacked_fw_rnn = [] stacked_bw_rnn = [] for i in range(3): stacked_fw_rnn.append(BasicLSTMCell(n_hidden)) stacked_bw_rnn.append(BasicLSTMCell(n_hidden)) fw_mcell = MultiRNNCell(stacked_fw_rnn) bw_mcell = MultiRNNCell(stacked_bw_rnn) outputs, _, _ = stack_bidirectional_rnn([fw_mcell], [bw_mcell], x1, dtype=tf.float32) # output = tf.concat(outputs, 2) pred = fully_connected(outputs[-1], n_classes, activation_fn=None) cost = tf.reduce_mean(tf.reduce_sum(tf.square(pred - y))) global_step = tf.Variable(0, trainable=False) initial_learning_rate = 0.01 learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step=global_step, decay_steps=3, decay_rate=0.9)
def inference(self, inputs, conds, seq_len=None, reuse=False, if_log_scalar_summary=False, log_scope_name=''): """ Inputs ------ inputs : float, shape=[batch_size, seq_length=100, features=10] real(from data) or fake(from G) conds : float, shape=[batch_size, swq_length=100, features=13] Return ------ score : float real(from data) or fake(from G) """ concat_ = tf.concat([conds, inputs], axis=-1) inputs_ = tf.unstack(concat_, num=self.seq_length, axis=1) with tf.variable_scope('C_inference') as scope: output_list = [] if reuse: tf.get_variable_scope().reuse_variables() with tf.variable_scope('stack_bi_lstm') as scope: out_blstm_list, _, _ = rnn.stack_bidirectional_rnn( cells_fw=[ self.__lstm_cell() for _ in range(self.num_layers) ], cells_bw=[ self.__lstm_cell() for _ in range(self.num_layers) ], inputs=inputs_, dtype=tf.float32, sequence_length=seq_len, scope=scope) for i, out_blstm in enumerate(out_blstm_list): if i > 0: tf.get_variable_scope().reuse_variables() with tf.variable_scope('fully_connect') as scope: fconnect = layers.fully_connected( inputs=out_blstm, num_outputs=1, activation_fn=libs.leaky_relu, weights_initializer=layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope=scope) output_list.append(fconnect) # stack, axis=1 -> [batch, time, feature] decisions = tf.stack(output_list, axis=1) final_ = tf.reduce_mean(decisions, axis=1) final_ = tf.reshape(final_, shape=[self.batch_size]) with tf.name_scope('heuristic_penalty') as scope: # 0. prepare data ball_pos = tf.reshape( conds[:, :, :2], shape=[self.batch_size, self.seq_length, 1, 2]) teamB_pos = tf.reshape( inputs, shape=[self.batch_size, self.seq_length, 5, 2]) basket_right_x = tf.constant( self.data_factory.BASKET_RIGHT[0], dtype=tf.float32, shape=[self.batch_size, self.seq_length, 1, 1]) basket_right_y = tf.constant( self.data_factory.BASKET_RIGHT[1], dtype=tf.float32, shape=[self.batch_size, self.seq_length, 1, 1]) basket_pos = tf.concat([basket_right_x, basket_right_y], axis=-1) vec_ball_2_teamB = ball_pos - teamB_pos # [128,100,5,2] vec_ball_2_basket = ball_pos - basket_pos # [128,100,1,2] b2teamB_dot_b2basket = tf.matmul( vec_ball_2_teamB, vec_ball_2_basket, transpose_b=True) # [128,100,5,1] b2teamB_dot_b2basket = tf.reshape( b2teamB_dot_b2basket, shape=[self.batch_size, self.seq_length, 5]) dist_ball_2_teamB = tf.norm(vec_ball_2_teamB, ord='euclidean', axis=-1) dist_ball_2_basket = tf.norm(vec_ball_2_basket, ord='euclidean', axis=-1) one_sub_cosine = 1 - b2teamB_dot_b2basket / \ (dist_ball_2_teamB * dist_ball_2_basket) heuristic_penalty_all = one_sub_cosine * dist_ball_2_teamB heuristic_penalty_min = tf.reduce_min(heuristic_penalty_all, axis=-1) heuristic_penalty = tf.reduce_mean(heuristic_penalty_min) if self.if_trainable_lambda: trainable_lambda = tf.get_variable( 'trainable_heuristic_penalty_lambda', shape=[], dtype=tf.float32, initializer=tf.constant_initializer(value=1.0)) else: trainable_lambda = tf.constant( self.heuristic_penalty_lambda) # logging if if_log_scalar_summary: with tf.name_scope(log_scope_name): tf.summary.scalar('heuristic_penalty', heuristic_penalty, collections=['C']) tf.summary.scalar('trainable_lambda', trainable_lambda, collections=['C']) return final_ - trainable_lambda * heuristic_penalty
def __init__(self, conf, pre_word2vec=None, activate_fn=tf.nn.tanh): pretrained_w2v = conf.pretrained_w2v max_position = conf.max_position pos_dim = conf.pos_dim num_relation = conf.num_relation len_sentence = conf.len_sentence num_hidden = conf.num_hidden batch_size = conf.batch_size reg_weight = conf.reg_weight # Note that the first dimension of input_sentence and input_y are different because each row in input_y is per # triple, whereas each row in input_sentences corresponds to a single sentence and a triple consists of multiple # sentences. We use input_triple_index to align sentences with corresponding label # for example, label of input_sentences[input_triple_index[0]:input_triple_index[1]] is input[0] self.input_sentences = tf.placeholder(dtype=tf.int32, shape=[None, len_sentence], name='input_sentence') self.input_pos1 = tf.placeholder(dtype=tf.int32, shape=[None, len_sentence], name='input_position1') self.input_pos2 = tf.placeholder(dtype=tf.int32, shape=[None, len_sentence], name='input_position2') self.input_y = tf.placeholder(dtype=tf.int32, shape=[None, num_relation], name='input_y') self.input_triple_index = tf.placeholder(dtype=tf.int32, shape=[None], name='input_triple_index') num_sentences = self.input_triple_index[-1] if pretrained_w2v: self.word2vec = tf.get_variable(initializer=pre_word2vec, name="word_embedding") else: self.word2vec = tf.get_variable( shape=[conf.voca_size, conf.word_embedding_dim], name="word_embedding") self.pos2vec1 = tf.get_variable(shape=[max_position, pos_dim], name="pos2vec1") self.pos2vec2 = tf.get_variable(shape=[max_position, pos_dim], name="pos2vec2") # concatenate word embedding + position embeddings # input_forward.shape = [num_sentence, len_sentence, w2v_dim+2*conf.pos_dim] input_forward = tf.concat([ tf.nn.embedding_lookup(self.word2vec, self.input_sentences), tf.nn.embedding_lookup(self.pos2vec1, self.input_pos1), tf.nn.embedding_lookup(self.pos2vec2, self.input_pos2) ], 2) with tf.variable_scope("RNN"): def create_rnn_cells(num_units): """return list of rnn cells""" cells = [ rnn.GRUCell(num_units, activation=activate_fn) for _ in range(conf.num_layer) ] if conf.dropout and conf.is_train: return [rnn.DropoutWrapper(cell) for cell in cells] else: return cells input_forward = tf.unstack(input_forward, len_sentence, 1) # construct rnn with high-level api if conf.bidirectional: output_rnn, _, _ = rnn.stack_bidirectional_rnn( create_rnn_cells(num_hidden), create_rnn_cells(num_hidden), input_forward, dtype=tf.float32) num_hidden = 2 * num_hidden # dimension of concatenated fw-bw outputs output_hidden = tf.reshape(tf.concat( output_rnn, 1), [num_sentences, len_sentence, num_hidden]) else: output_rnn, _ = rnn.static_rnn(create_rnn_cells(num_hidden)[0], input_forward, dtype=tf.float32) output_hidden = tf.reshape(tf.concat( output_rnn, 1), [num_sentences, len_sentence, num_hidden]) # word-level attention layer, represent a sentence as a weighted sum of word vectors with tf.variable_scope("word-attn"): if conf.word_attn: word_attn = tf.get_variable('W', shape=[num_hidden, 1]) word_weight = tf.matmul( tf.reshape(output_hidden, [num_sentences * len_sentence, num_hidden]), word_attn) word_weight = tf.reshape(word_weight, [num_sentences, len_sentence]) sentence_embedding = tf.matmul( tf.reshape(tf.nn.softmax(word_weight), [num_sentences, 1, len_sentence]), output_hidden) sentence_embedding = tf.reshape( sentence_embedding, [num_sentences, num_hidden]) else: sentence_embedding = tf.reduce_mean(output_hidden, 1) with tf.variable_scope("fc-hidden"): h_sentence = tf.layers.dense(sentence_embedding, num_hidden, activation=activate_fn, name='fc-hidden') # sentence-level attention layer, represent a triple as a weighted sum of sentences with tf.variable_scope("sentence-attn"): attn_weight = tf.get_variable("W", shape=[num_hidden, 1]) multiplier = tf.get_variable("A", shape=[num_hidden]) triple_embeddings = list() for i in range(batch_size): target_sentences = h_sentence[self.input_triple_index[i]:self. input_triple_index[i + 1]] if conf.sent_attn: num_triple_sentence = self.input_triple_index[ i + 1] - self.input_triple_index[i] tmp = tf.multiply(target_sentences, multiplier) sentence_weight = tf.reshape( tf.nn.softmax( tf.reshape(tf.matmul(tmp, attn_weight), [num_triple_sentence])), [1, num_triple_sentence]) triple_embedding = tf.squeeze( tf.matmul(sentence_weight, target_sentences)) # [num_hidden] else: # use mean vector if sentence-level attention layer is not used triple_embedding = tf.squeeze( tf.reduce_mean(target_sentences, 0)) triple_embeddings.append(triple_embedding) triple_embeddings = tf.reshape(triple_embeddings, [-1, num_hidden]) triple_output = tf.layers.dense(triple_embeddings, num_relation, name='fc-output') self.prob = tf.nn.softmax(triple_output) self.predictions = tf.argmax(self.prob, axis=1, name="predictions") self.total_loss = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits(logits=triple_output, labels=self.input_y), name="loss") self.accuracy = tf.reduce_mean(tf.cast( tf.equal(self.predictions, tf.argmax(self.input_y, 1)), "float"), name="accuracy") tf.summary.scalar("loss", self.total_loss) # regularization self.l2_loss = tf.contrib.layers.apply_regularization( regularizer=tf.contrib.layers.l2_regularizer(reg_weight), weights_list=tf.trainable_variables()) self.final_loss = self.total_loss + self.l2_loss tf.summary.scalar("l2_loss", self.l2_loss) tf.summary.scalar("final_loss", self.final_loss)
#定义占位符 x = tf.placeholder('float',[None,n_steps,n_input]) y = tf.placeholder('float',[None,n_classes]) x1 = tf.unstack(x,n_steps,1) stacked_rnn = [] stacked_bw_rnn = [] for i in range(3): stacked_rnn.append(tf.contrib.rnn.LSTMCell(n_hidden)) stacked_bw_rnn.append(tf.contrib.rnn.LSTMCell(n_hidden)) pass mcell = tf.contrib.rnn.MultiRNNCell(stacked_rnn) mcell_bw = tf.contrib.rnn.MultiRNNCell(stacked_bw_rnn) outputs,_ ,_ = rnn.stack_bidirectional_rnn([mcell],[mcell_bw],x1,dtype = tf.float32) print(outputs[0].shape,len(outputs)) pred = tf.contrib.layers.fully_connected(outputs[-1],n_classes,activation_fn = None) #定义参数 learning_rate = 0.01 batch_size = 128 time_steps = 28 #损失函数 交叉熵 P107页,最后一行 cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,labels=y))#tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred),reduction_indices = 1)) #等价于 #cost = tf.nn.softmax_cross_entropy_with_logits(labels = y,logits=pred) #使用梯度下降优化器