def addEncoder(self): print('adding encoder...') self.encoder_inputs = tf.nn.embedding_lookup(self.embedding, self.x_placeholder) if (self.is_training): self.encoder_inputs = tf.nn.dropout(self.encoder_inputs, CONFIG.KEEPPROB) cell_fw = BasicRNNCell(CONFIG.DIM_WordEmbedding) cell_bw = BasicRNNCell(CONFIG.DIM_WordEmbedding) (encoder_output_fw, encoder_output_bw), (fw_state, bw_state) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, self.encoder_inputs, dtype=tf.float32, sequence_length=self.x_lens) # concatenate the bidirectional inputs self._encoder_outputs = tf.concat( [encoder_output_fw, encoder_output_bw], 2) _decoder_in_state = self.last_relevant(encoder_output_bw, self.x_lens) self._decoder_in_state = self.project_encoder_last_states( _decoder_in_state) #self._decoder_in_state = tf.concat([fw_state, bw_state],1) self.attention_states = self._encoder_outputs
def _build_net(self): with tf.variable_scope("critic"): with tf.variable_scope("state_input"): # 这里仅让input与critic相关 # add dim, [time_step, feature] => [time_step, batch_size=1, feature] s = tf.expand_dims(input=self.s, axis=1, name="timely_input") rnn_cell = BasicRNNCell(self.cell_size) self.init_state = rnn_cell.zero_state(batch_size=1, dtype=tf.float32) # output: [time_step, batch_size, cell_size] # final_state: [batch_size, cell_size] output, self.final_state = tf.nn.dynamic_rnn( cell=rnn_cell, inputs=s, initial_state=self.init_state, time_major=True, dtype=tf.float32) cell_out = tf.reshape(tensor=output, shape=[-1, self.cell_size], name="flatten_rnn_outputs") lc = tf.layers.dense(inputs=cell_out, units=50, activation=tf.nn.relu6, kernel_initializer=self.w_init, bias_initializer=self.b_init, name="lc") v = tf.layers.dense(inputs=lc, units=1, activation=None, kernel_initializer=self.w_init, bias_initializer=self.b_init, name="V") with tf.variable_scope("actor"): la = tf.layers.dense(inputs=cell_out, units=80, activation=tf.nn.relu6, kernel_initializer=self.w_init, bias_initializer=self.b_init, name="la") mu = tf.layers.dense(inputs=la, units=self.n_actions, activation=tf.nn.tanh, kernel_initializer=self.w_init, bias_initializer=self.b_init, name="mu") sigma = tf.layers.dense(inputs=la, units=self.n_actions, activation=tf.nn.softplus, kernel_initializer=self.w_init, bias_initializer=self.b_init, name="sigma") actor_params = tf.get_collection(key=tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.scope + "/actor") critic_params = tf.get_collection(key=tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.scope + "/critic") return mu, sigma, v, actor_params, critic_params
def make_RNN_cell(self, fn=tf.nn.relu): """ Returns a new cell (for deep recurrent networks), with Nneurons, and activation function fn. Args: fn - tensorflow activation function, e.g. tf.nn.relu, tf.nn.tanh Return cell - TF RNN cell """ #Make cell type if self.config.cell_type == 'RNN': cell = BasicRNNCell(num_units=self.config.Nhidden, activation=fn) elif self.config.cell_type == 'LSTM': cell = LSTMCell(num_units=self.config.Nhidden, activation=fn) elif self.config.cell_type == 'GRU': cell = GRUCell(num_units=self.config.Nhidden, activation=fn) else: msg = "cell_type must be RNN, LSTM or GRU. cell_type was {}".format( self.config.cell_type) raise Exception(msg) #only include dropout when training cell = DropoutWrapper(cell, input_keep_prob=self.keep_prob, variational_recurrent=True, input_size=self.config.Nhidden, dtype=tf.float32) return cell
def __init__(self, sess, params): self.params = params self.sess = sess # Different placeholders self.batch_ph = tf.placeholder(tf.int32, [None, None]) rnn_inputs = tf.one_hot(self.batch_ph, depth=self.params['vocab']) self.init_state = tf.placeholder(shape=[None, self.params['state']], dtype=tf.float32, name='initial_state') cell = BasicRNNCell(num_units=self.params['state']) _, self.final_state = rnn(cell, inputs=rnn_inputs, initial_state=self.init_state, dtype=tf.float32) # Fully connected layer self.y_hat = slim.fully_connected( self.final_state, 1, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.truncated_normal_initializer()) self.target_ph = tf.placeholder(tf.float32) self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=self.y_hat, labels=self.target_ph)) self.optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize( self.loss) self.accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.round(tf.sigmoid(self.y_hat)), self.target_ph), tf.float32))
def add_prediction_op(self): #Make a list of cells to pass along. cell_list = [] cell_list = [ BasicRNNCell(num_units=self.config.Nhidden, activation=tf.nn.relu) for i in range(self.config.Nlayers) ] # for i in range(self.config.Nlayers): # cell_list.append(make_RNN_cell(self.config.Nhidden,tf.nn.relu)) multi_cell = tf.contrib.rnn.MultiRNNCell(cell_list, state_is_tuple=True) rnn_outputs, states = tf.nn.dynamic_rnn(multi_cell, self.X, dtype=tf.float32) #this maps the number of hidden units to fewer outputs. stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, self.config.Nhidden]) stacked_outputs = fully_connected(stacked_rnn_outputs, self.config.Noutputs, activation_fn=None) outputs = tf.reshape( stacked_outputs, [-1, self.config.Nsteps_out, self.config.Noutputs]) return outputs
def build_cell(self, name=None): if self.hparams.cell_type == 'linear': cell = BasicRNNCell(self.hparams.hidden_units, activation=tf.identity, name=name) elif self.hparams.cell_type == 'tanh': cell = BasicRNNCell(self.hparams.hidden_units, activation=tf.tanh, name=name) elif self.hparams.cell_type == 'relu': cell = BasicRNNCell(self.hparams.hidden_units, activation=tf.nn.relu, name=name) elif self.hparams.cell_type == 'gru': cell = GRUCell(self.hparams.hidden_units, name=name) elif self.hparams.cell_type == 'lstm': cell = LSTMCell(self.hparams.hidden_units, name=name, state_is_tuple=False) else: raise ValueError('Provided cell type not supported.') return cell
def build_rnn(self, in_layer, nodes, batch_size, num_layers=2, mode='RNN'): if mode.upper()=='RNN': cell = MultiRNNCell([BasicRNNCell(nodes) for _ in range(num_layers)]) elif mode.upper()=='LSTM': cell = MultiRNNCell([BasicLSTMCell(nodes) for _ in range(num_layers)]) initial_state = cell.zero_state(batch_size, tf.float32) outputs, state = tf.nn.dynamic_rnn(cell, in_layer, initial_state=initial_state) return initial_state, outputs, state
def _build(self, inputs): """Compute output Tensor from input Tensor.""" x = tf.unstack(inputs, num=self._history_steps, axis=1) cell = BasicRNNCell(self._hidden_size, activation=tf.nn.relu) outputs, states = static_rnn(cell, x, dtype=tf.float32) last_output = outputs[-1] last_input = x[-1] result = tf.concat([last_input, last_output], axis=1) return result
def __init__(self, sess, vocab_size): state_size = config.RNN.state_size.int self.sess = sess # RNN placeholders with tf.name_scope('input'): self.input_ph = tf.placeholder(tf.int32, [None], name='input_ph') input = tf.reshape(tf.one_hot(self.input_ph, depth=vocab_size, name='input_one_hot'), shape=[1, -1, vocab_size]) self.init_state_ph = tf.placeholder(shape=[None, state_size], dtype=tf.float32, name='init_state_ph') # cell = GRUCell(state_size) cell = BasicRNNCell(state_size) _, self.final_state = rnn(cell, input, initial_state=tf.cast( self.init_state_ph, tf.float32)) with tf.name_scope('prediction'): hidden = slim.fully_connected( self.final_state, 10, activation_fn=tf.nn.sigmoid, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.truncated_normal_initializer()) self.prediction = slim.fully_connected( hidden, 1, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.truncated_normal_initializer()) # RNN loss self.label_ph = tf.placeholder(tf.float32, name='label_ph') with tf.name_scope('rnn_loss'): self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(logits=self.prediction, labels=self.label_ph)) with tf.name_scope('rnn_optimizer'): global_step = tf.Variable(0, trainable=False) # lr = tf.train.exponential_decay(0.001, global_step, 1000, 0.96, staircase=True) lr = 1e-3 self.optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize( self.loss, global_step=global_step) with tf.name_scope('accuracy'): self.accuracy = tf.reduce_mean( tf.cast( tf.equal(tf.round(tf.sigmoid(self.prediction)), self.label_ph), tf.float32)) variable_summaries(self.accuracy)
def addDecoder(self): print('adding decoder...') cell = BasicRNNCell(2 * CONFIG.DIM_WordEmbedding) self.attention_states = self._encoder_outputs self.decoder_inputs_embedded = tf.nn.embedding_lookup( self.embedding, self.y_placeholder) # prepare attention: (attention_keys, attention_values, attention_score_fn, attention_construct_fn) = seq2seq.prepare_attention( attention_states=self.attention_states, attention_option='bahdanau', num_units=2 * CONFIG.DIM_WordEmbedding) if (self.is_training): # new Seq2seq train version self.check_op = tf.add_check_numerics_ops() decoder_fn_train = seq2seq.attention_decoder_fn_train( encoder_state=self._decoder_in_state, attention_keys=attention_keys, attention_values=attention_values, attention_score_fn=attention_score_fn, attention_construct_fn=attention_construct_fn, name='attention_decoder') (self.decoder_outputs_train, self.decoder_state_train, self.decoder_context_state_train) = seq2seq.dynamic_rnn_decoder( cell=cell, decoder_fn=decoder_fn_train, inputs=self.decoder_inputs_embedded, sequence_length=self.y_lens, time_major=False) self.decoder_outputs = self.decoder_outputs_train else: # new Seq2seq version start_id = CONFIG.WORDS[CONFIG.STARTWORD] stop_id = CONFIG.WORDS[CONFIG.STOPWORD] decoder_fn_inference = seq2seq.attention_decoder_fn_inference( encoder_state=self._decoder_in_state, attention_keys=attention_keys, attention_values=attention_values, attention_score_fn=attention_score_fn, attention_construct_fn=attention_construct_fn, embeddings=self.embedding, start_of_sequence_id=start_id, end_of_sequence_id=stop_id, maximum_length=CONFIG.DIM_DECODER, num_decoder_symbols=CONFIG.DIM_VOCAB, output_fn=self.output_fn) (self.decoder_outputs_inference, self.decoder_state_inference, self.decoder_context_state_inference ) = seq2seq.dynamic_rnn_decoder(cell=cell, decoder_fn=decoder_fn_inference, time_major=False) self.decoder_outputs = self.decoder_outputs_inference
def main1(): cell = BasicRNNCell(2) X = tf.constant([[1.0], [2.0]]) init_state = tf.constant([[3.0], [4.0]]) out, state = cell(X, init_state) writer = tf.summary.FileWriter('./debug_out') writer.add_graph(tf.get_default_graph()) writer.flush()
def main(): args = parser.parse_args() input_size = args.input_size batch_size = args.batch_size hidden_size = args.hidden_size # Placeholders for inputs. x = tf.placeholder(tf.float32, [batch_size, args.ponder, 1+input_size]) y = tf.placeholder(tf.float32, [batch_size, 1]) zeros = tf.zeros([batch_size, 1]) rnn = BasicRNNCell(args.hidden_size) outputs, final_state = tf.nn.dynamic_rnn(rnn, x, dtype=tf.float32) softmax_w = tf.get_variable("softmax_w", [hidden_size, 1]) softmax_b = tf.get_variable("softmax_b", [1]) logits = tf.matmul(final_state, softmax_w) + softmax_b loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits) loss = tf.reduce_mean(loss) train_step = tf.train.AdamOptimizer(args.lr).minimize(loss) correct_prediction = tf.equal(tf.cast(tf.greater(logits, zeros), tf.float32), y) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('Accuracy', accuracy) tf.summary.scalar('Loss', loss) merged = tf.summary.merge_all() logdir = './logs/parity_test/LR={}_Len={}_Pond={}'.format(args.lr, args.input_size, args.ponder) while os.path.isdir(logdir): logdir += '_' if args.log: writer = tf.summary.FileWriter(logdir) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.vram_fraction) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: sess.run(tf.global_variables_initializer()) loop = trange(args.steps) for i in loop: batch = generate(args) if i % args.log_interval == 0: summary, step_accuracy, step_loss = sess.run([merged, accuracy, loss], feed_dict={x: batch[0], y: batch[1]}) if args.print_results: loop.set_postfix(Loss='{:0.3f}'.format(step_loss), Accuracy='{:0.3f}'.format(step_accuracy)) if args.log: writer.add_summary(summary, i) train_step.run(feed_dict={x: batch[0], y: batch[1]})
def rnn(features, mode, params): """ Recurrent model """ if params.model == "LSTM": cell = BasicLSTMCell(params.hidden_size) elif params.model == "GRU": cell = GRUCell(params.hidden_size) else: cell = BasicRNNCell(params.hidden_size) initial_state = cell.zero_state(params.batch_size, dtype=tf.float64) if params.per_frame: # convert input from (batch_size, max_time, ...) to # (max_time, batch_size, ...) inputs = tf.transpose(features['feature'], [1, 0, 2]) sequence_length = tf.reshape(features['sequence_length'], shape=(params.batch_size, )) outputs, state = tf.nn.dynamic_rnn(cell, inputs=inputs, initial_state=initial_state, sequence_length=sequence_length, time_major=True) # get output from the last state outputs = outputs[features['sequence_length'][0] - 1] else: # reshape MFCC vector to fit in one time step inputs = tf.reshape(features['feature'], shape=(1, params.batch_size, params.max_length * params.feature_length)) outputs, state = tf.nn.dynamic_rnn(cell, inputs=inputs, initial_state=initial_state, time_major=True) outputs = tf.reshape(outputs, shape=(params.batch_size, params.hidden_size)) # apply dropout dropout = tf.layers.dropout(outputs, rate=params.dropout, training=mode == tf.estimator.ModeKeys.TRAIN) logits = tf.layers.dense(dropout, units=params.num_classes, activation=None) return logits
def __init__(self, n_steps, n_inputs, n_hidden, keep_prob=1.0): # construct the rnn model n_outputs = n_inputs learning_rate = 0.001 self.X = tf.placeholder(tf.float32, [None, n_steps, n_inputs], name='inputs') with tf.variable_scope('rnn'): hidden_cell = DropoutWrapper( BasicRNNCell( # GRUCell, BasicRNNCell num_units=n_hidden, activation=tf.nn.tanh), input_keep_prob=keep_prob, output_keep_prob=keep_prob) output_cell = BasicRNNCell(num_units=n_outputs, activation=tf.nn.tanh) multi_layer_cell = tf.contrib.rnn.MultiRNNCell( [hidden_cell, output_cell]) self.outputs, _ = tf.nn.dynamic_rnn(multi_layer_cell, self.X, dtype=tf.float32)
def build_rnn_model(self): # replace this with dictionary style indexing model_options_names = ['RNN','LSTM','GRU','PhasedLSTM'] optimizer_options_names = [] model_options = [BasicRNNCell(self.num_rnn_hidden), rnn_cell.LSTMCell(self.num_rnn_hidden), rnn_cell.GRUCell(self.num_rnn_hidden), PhasedLSTMCell(self.num_rnn_hidden)] self._rnn_model = model_options[np.where(np.array(model_options_names)==self.rnn_type)[0][0]] if self.dropout_keep_prob is not None: self._rnn_model = tf.nn.rnn_cell.DropoutWrapper(self._rnn_model, output_keep_prob=self.dropout_keep_prob) self._Losses = []
def get_cell(cell_type, size, layers=1, direction='unidirectional'): if cell_type == "layer_norm_basic": cell = LayerNormBasicLSTMCell(size) elif cell_type == "lstm_block_fused": cell = tf.contrib.rnn.LSTMBlockFusedCell(size) elif cell_type == "cudnn_lstm": cell = CudnnLSTM(layers, size, direction=direction) elif cell_type == "cudnn_gru": cell = CudnnGRU(layers, size, direction=direction) elif cell_type == "lstm_block": cell = LSTMBlockCell(size) elif cell_type == "gru_block": cell = GRUBlockCell(size) elif cell_type == "rnn": cell = BasicRNNCell(size) elif cell_type == "cudnn_rnn": cell = CudnnRNNTanh(layers, size) else: cell = BasicLSTMCell(size) return cell
def make_RNN_cell(self, Nneurons, fn=tf.nn.relu): """ Returns a new cell (for deep recurrent networks), with Nneurons, and activation function fn. """ #Make cell type if self.config.cell_type == 'RNN': cell = BasicRNNCell(num_units=Nneurons, activation=fn) elif self.config.cell_type == 'LSTM': cell = LSTMCell(num_units=Nneurons, activation=fn) elif self.config.cell_type == 'GRU': cell = GRUCell(num_units=Nneurons, activation=fn) #include dropout #when training, keep_prob is set by config, and is 1 in eval/predict cell = DropoutWrapper(cell, input_keep_prob=self.keep_prob, variational_recurrent=True, input_size=Nneurons, dtype=tf.float32) return cell
def build_rnn(self, rnn_type, hidden_size, num_layes): cells = [] for i in range(num_layes): if rnn_type == 'lstm': cell = LSTMCell(num_units=hidden_size, state_is_tuple=True, initializer=tf.random_uniform_initializer( -0.25, 0.25)) elif rnn_type == 'gru': cell = GRUCell(num_units=hidden_size) elif rnn_type: cell = BasicRNNCell(num_units=hidden_size) else: raise NotImplementedError( f'the rnn type is unexist: {rnn_type}') cells.append(cell) cells = MultiRNNCell(cells, state_is_tuple=True) return cells
def seq_predict_model(x, w, b, time_step_size, vector_size): #x的shape是[batch_size,time_step_size,vector_size] x = tf.transpose( x, [1, 0, 2]) #把x转化成[time_step_size,batch_size,vector_size] x = tf.reshape( x, [-1, vector_size]) #把x转化成[time_step_size*batch_size,vector_size] x = tf.split(x, time_step_size, 0) #对第0维进行分割,分割大小为time_step_size,这样分割后,列表每一项是一个样本序列。 cell = BasicRNNCell( num_units=10, activation=math_ops.tanh) #num_units是一个rnn单元中的输出类别数,比如现在是10, #那么比如再接一层softmax,它的输入个数就是10。同时这也就规定了rnn单元中的参数形状。 initial_state = tf.zeros([batch_size, cell.state_size]) #初始状态h0赋值为全0 outputs, _states = static_rnn( cell, x, initial_state=initial_state) #outputs是每个时刻的输出,_states是最后的一个状态 #线性激活 return tf.matmul(outputs[-1], w) + b, cell.state_size #只取最后一个状态的输出,然后接一个线性激活输出。
def prediction(self): numu = int(self.data.get_shape()[2]) print(int(self.data.get_shape()[2])) print(int(self.data.get_shape()[1])) print(int(self.data.get_shape()[0])) cell = BasicRNNCell(numu) state = tf.zeros([10, cell.state_size]) output, state = cell( #cell, self.data, state #dtype = tf.float32 ) # softmax layer max_length = int(self.target.get_shape()[1]) # timesteps num_classes = int(self.target.get_shape()[2]) # output size # weight [num_hidden, output size] bias [output size] weight, bias = self.weight_and_bias(numu, num_classes) # Flatten to apply same weights to all time steps # nhưng nếu tổng số phần tử không chia hết cho số các ẩn số thì sao? output = tf.reshape(output, [-1, numu]) predictionn = tf.nn.softmax(tf.matmul(output, weight) + bias) predictionn = tf.reshape(predictionn, [-1, max_length, num_classes]) return predictionn
def construct_rnn(self, obz, rnn_history_steps, rnn_hid_units, rnn_num_layers=1, reuse=False, name=""): """ Generate an RNN that is applied to the last @rnn_history_steps @obs. An array with shape [batch_size, history, state_size] """ print("Generating {0} RNN that is applied to {1} states".format( name, rnn_history_steps)) x = tf.unstack(obz, num=rnn_history_steps, axis=1) # 1-layer Basic Cell with n_hidden units. cell = BasicRNNCell(rnn_hid_units, reuse=reuse) # generate prediction outputs, states = static_rnn(cell, x, dtype=tf.float32) # We only return the last output return outputs[-1]
def get_batch(batch, size=5): low = (batch * size) % (40 - size) high = low + size return t_vals[low:high], series[low:high] n_steps = 20 n_inputs = 1 n_neurons = 100 n_outputs = 1 X = tf.placeholder(tf.float32, [None, n_steps, n_inputs]) y = tf.placeholder(tf.float32, [None, n_steps, n_outputs]) cell = OutputProjectionWrapper(BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu), output_size=n_outputs) outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32) loss = tf.reduce_mean(tf.square(outputs - y), name='loss') loss_summary = tf.summary.scalar('loss', loss) optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001) training_op = optimizer.minimize(loss) init = tf.global_variables_initializer() batch_size = 100 n_iterations = 20000 with tf.Session() as sess:
def make_rnn_cell(): return BasicRNNCell(num_units=n_neurous, activation=tf.nn.relu)
def fit(self, X, Y, batch_sz=20, learning_rate=0.1, mu=0.9, activation=tf.nn.sigmoid, epochs=100, show_fig=False): N, T, D = X.shape # X is of size N x T(n) x D K = len(set(Y.flatten())) M = self.M self.f = activation # initial weights # note: Wx, Wh, bh are all part of the RNN unit and will be created # by BasicRNNCell Wo = init_weight(M, K).astype(np.float32) bo = np.zeros(K, dtype=np.float32) # make them tf variables self.Wo = tf.Variable(Wo) self.bo = tf.Variable(bo) # tf Graph input tfX = tf.compat.v1.placeholder(tf.float32, shape=(batch_sz, T, D), name='inputs') tfY = tf.compat.v1.placeholder(tf.int64, shape=(batch_sz, T), name='targets') # turn tfX into a sequence, e.g. T tensors all of size (batch_sz, D) sequenceX = x2sequence(tfX, T, D, batch_sz) # create the simple rnn unit rnn_unit = BasicRNNCell(num_units=self.M, activation=self.f) # Get rnn cell output # outputs, states = rnn_module.rnn(rnn_unit, sequenceX, dtype=tf.float32) outputs, states = get_rnn_output(rnn_unit, sequenceX, dtype=tf.float32) # outputs are now of size (T, batch_sz, M) # so make it (batch_sz, T, M) outputs = tf.transpose(a=outputs, perm=(1, 0, 2)) outputs = tf.reshape(outputs, (T*batch_sz, M)) # Linear activation, using rnn inner loop last output logits = tf.matmul(outputs, self.Wo) + self.bo predict_op = tf.argmax(input=logits, axis=1) targets = tf.reshape(tfY, (T*batch_sz,)) cost_op = tf.reduce_mean( input_tensor=tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=targets ) ) train_op = tf.compat.v1.train.MomentumOptimizer(learning_rate, momentum=mu).minimize(cost_op) costs = [] n_batches = N // batch_sz init = tf.compat.v1.global_variables_initializer() with tf.compat.v1.Session() as session: session.run(init) for i in range(epochs): X, Y = shuffle(X, Y) n_correct = 0 cost = 0 for j in range(n_batches): Xbatch = X[j*batch_sz:(j+1)*batch_sz] Ybatch = Y[j*batch_sz:(j+1)*batch_sz] _, c, p = session.run([train_op, cost_op, predict_op], feed_dict={tfX: Xbatch, tfY: Ybatch}) cost += c for b in range(batch_sz): idx = (b + 1)*T - 1 n_correct += (p[idx] == Ybatch[b][-1]) if i % 10 == 0: print("i:", i, "cost:", cost, "classification rate:", (float(n_correct)/N)) if n_correct == N: print("i:", i, "cost:", cost, "classification rate:", (float(n_correct)/N)) break costs.append(cost) if show_fig: plt.plot(costs) plt.show()
logger.info('X.shape={}, y.shape={}'.format(X_train.shape, y_train.shape)) vocab_size = data_provider.vocab_size input_data = tf.placeholder(tf.float32, [batch_size, seq_length, vocab_size]) targets = tf.placeholder(tf.float32, [batch_size, vocab_size]) test_data_provider = DataProvider(test_text, seq_length, batch_size, logger, data_provider.vocab) if args.cell == 'lstm': cell = LSTMCell(num_units=rnn_size) elif args.cell == 'rnn': cell = BasicRNNCell(num_units=rnn_size) elif args.cell == 'gru': cell = GRUCell(num_units=rnn_size) else: cell = SCRNCell(num_units=rnn_size, context_units=context_size, alpha=alpha) # initial_state = cell.zero_state(batch_size, tf.float32) # Define weights weights = {'out': tf.Variable(tf.random_normal([rnn_size, vocab_size]))} biases = {'out': tf.Variable(tf.random_normal([vocab_size]))} x = tf.unstack(input_data, seq_length, 1) outputs, states = rnn.static_rnn(cell, x, dtype=tf.float32) logits = tf.matmul(outputs[-1], weights['out']) + biases['out']
def main(model, T, n_iter, n_batch, n_hidden, capacity, comp, FFT, learning_rate, norm, update_gate, activation, lambd, layer_norm, zoneout, visualization_experiment): learning_rate = float(learning_rate) # data params n_input = 10 n_output = 9 n_sequence = 10 n_train = n_iter * n_batch n_test = n_batch n_steps = T + 20 n_classes = 9 # create data train_x, train_y = copying_data(T, n_train, n_sequence) test_x, test_y = copying_data(T, n_test, n_sequence) # graph and gradients x = tf.placeholder("int32", [None, n_steps]) y = tf.placeholder("int64", [None, n_steps]) input_data = tf.one_hot(x, n_input, dtype=tf.float32) # input to hidden if model == "LSTM": cell = BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1) elif model == "GRU": cell = GRUCell(n_hidden, kernel_initializer=tf.orthogonal_initializer()) elif model == "RUM": # activation if activation == "relu": act = tf.nn.relu elif activation == "sigmoid": act = tf.nn.sigmoid elif activation == "tanh": act = tf.nn.tanh elif activation == "softsign": act = tf.nn.softsign if visualization_experiment: # placeholder temp_target = tf.placeholder("float32", [n_hidden + 10, n_hidden]) temp_target_bias = tf.placeholder("float32", [n_hidden]) temp_embed = tf.placeholder("float32", [10, n_hidden]) cell = RUMCell( n_hidden, eta_=norm, update_gate=update_gate, lambda_=lambd, activation=act, use_layer_norm=layer_norm, use_zoneout=zoneout, visualization=visualization_experiment, temp_target=temp_target if visualization_experiment else None, temp_target_bias=temp_target_bias if visualization_experiment else None, temp_embed=temp_embed if visualization_experiment else None) elif model == "EUNN": if visualization_experiment: # placeholder temp_theta0 = tf.placeholder("float32", [n_hidden // 2]) temp_theta1 = tf.placeholder("float32", [n_hidden // 2 - 1]) cell = EUNNCell(n_hidden, capacity, FFT, comp, name="eunn") elif model == "GORU": if visualization_experiment: # placeholder temp_theta0 = tf.placeholder("float32", [n_hidden // 2]) temp_theta1 = tf.placeholder("float32", [n_hidden // 2 - 1]) cell = GORUCell(n_hidden, capacity, FFT, temp_theta0=temp_theta0, temp_theta1=temp_theta1) elif model == "RNN": cell = BasicRNNCell(n_hidden) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) # hidden to output V_init_val = np.sqrt(6.) / np.sqrt(n_output + n_input) V_weights = tf.get_variable("V_weights", shape=[n_hidden, n_classes], dtype=tf.float32, initializer=tf.random_uniform_initializer( -V_init_val, V_init_val)) V_bias = tf.get_variable("V_bias", shape=[n_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.01)) hidden_out_list = tf.unstack(hidden_out, axis=1) temp_out = tf.stack([tf.matmul(i, V_weights) for i in hidden_out_list]) output_data = tf.nn.bias_add(tf.transpose(temp_out, [1, 0, 2]), V_bias) # evaluate process cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output_data, labels=y)) tf.summary.scalar('cost', cost) correct_pred = tf.equal(tf.argmax(output_data, 2), y) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) tf.summary.scalar('accuracy', accuracy) # initialization optimizer = tf.train.RMSPropOptimizer( learning_rate=learning_rate).minimize(cost) init = tf.global_variables_initializer() # save filename = model + "_H" + str(n_hidden) + "_" + \ ("L" + str(lambd) + "_" if lambd else "") + \ ("E" + str(eta) + "_" if norm else "") + \ ("A" + activation + "_" if activation else "") + \ ("U_" if update_gate else "") + \ ("Z_" if zoneout and model == "RUM" else "") + \ ("ln_" if layer_norm and model == "RUM" else "") + \ (str(capacity) if model in ["EUNN", "GORU"] else "") + \ ("FFT_" if model in ["EUNN", "GORU"] and FFT else "") + \ ("VE_" if model in ["EUNN", "GORU", "RUM"] and visualization_experiment else "") + \ "B" + str(n_batch) save_path = os.path.join('../../train_log', 'copying', 'T' + str(T), filename) file_manager(save_path) # what follows is task specific filepath = os.path.join(save_path, "eval.txt") if not os.path.exists(os.path.dirname(filepath)): try: os.makedirs(os.path.dirname(filepath)) except OSError as exc: if exc.errno != errno.EEXIST: raise f = open(filepath, 'w') f.write("accuracies \n") log(kwargs, save_path) merged_summary = tf.summary.merge_all() saver = tf.train.Saver() parameters_profiler() # train saver = tf.train.Saver() step = 0 with tf.Session() as sess: sess.run(init) train_writer = tf.summary.FileWriter(save_path, sess.graph) steps = [] losses = [] accs = [] while step < n_iter: batch_x = train_x[step * n_batch:(step + 1) * n_batch] batch_y = train_y[step * n_batch:(step + 1) * n_batch] if visualization_experiment: """ initiative to write simpler code """ if model == "RUM": number_of_weights = (n_hidden + 10) * \ n_hidden + n_hidden + 10 * n_hidden elif model in ["GORU", "EUNN"]: # assuming that n_hidden is even. number_of_weights = n_hidden - 1 print(col("strating linear visualization", 'b')) num_points = 200 coord, weights = generate_points_for_visualization( number_of_weights, num_points) processed_placeholders = process_vis(weights, num_points, n_hidden=n_hidden, cell=model) if model == "RUM": feed_temp_target, feed_temp_target_bias, feed_temp_embed = processed_placeholders else: feed_temp_theta0, feed_temp_theta1 = processed_placeholders collect_losses = [] for i in range(num_points): if model == "RUM": loss = sess.run(cost, feed_dict={ x: batch_x, y: batch_y, temp_target: feed_temp_target[i], temp_target_bias: feed_temp_target_bias[i], temp_embed: feed_temp_embed[i] }) elif model in ["EUNN", "GORU"]: loss = sess.run(cost, feed_dict={ x: batch_x, y: batch_y, temp_theta0: feed_temp_theta0[i], temp_theta1: feed_temp_theta1[i] }) print(col("iter: " + str(i) + " loss: " + str(loss), 'y')) collect_losses.append(loss) np.save(os.path.join(save_path, "linear_height"), np.array(collect_losses)) np.save(os.path.join(save_path, "linear_coord"), np.array(coord)) print(col("done with linear visualization", 'b')) ##################### print(col("strating contour visualization", 'b')) num_points = 20 coord, weights = generate_points_for_visualization( number_of_weights, num_points, type_vis="contour") np.save(os.path.join(save_path, "contour_coord"), np.array(coord)) processed_placeholders = process_vis(weights, num_points**2, n_hidden=n_hidden, cell=model) if model == "RUM": feed_temp_target, feed_temp_target_bias, feed_temp_embed = processed_placeholders else: feed_temp_theta0, feed_temp_theta1 = processed_placeholders collect_contour = np.empty((num_points, num_points)) for i in range(num_points): for j in range(num_points): if model == "RUM": loss = sess.run( cost, feed_dict={ x: batch_x, y: batch_y, temp_target: feed_temp_target[i * num_points + j], temp_target_bias: feed_temp_target_bias[i * num_points + j], temp_embed: feed_temp_embed[i * num_points + j] }) elif model in ["GORU", "EUNN"]: loss = sess.run( cost, feed_dict={ x: batch_x, y: batch_y, temp_theta0: feed_temp_theta0[i * num_points + j], temp_theta1: feed_temp_theta1[i * num_points + j] }) collect_contour[i, j] = loss print( col( "iter: " + str(i) + "," + str(j) + " loss: " + str(loss), 'y')) np.save(os.path.join(save_path, "contour_height"), np.array(collect_contour)) print(col("exiting visualization experiment", 'r')) exit() summ, acc, loss = sess.run([merged_summary, accuracy, cost], feed_dict={ x: batch_x, y: batch_y }) train_writer.add_summary(summ, step) sess.run(optimizer, feed_dict={x: batch_x, y: batch_y}) print( col( "Iter " + str(step) + ", Minibatch Loss: " + "{:.6f}".format(loss) + ", Training Accuracy: " + "{:.5f}".format(acc), 'g')) steps.append(step) losses.append(loss) accs.append(acc) if step % 200 == 0: f.write(col("%d\t%f\t%f\n" % (step, loss, acc), 'y')) f.flush() if step % 1000 == 0: print(col("saving graph and metadata in " + save_path, "b")) saver.save(sess, os.path.join(save_path, "model")) step += 1 print(col("Optimization Finished!", 'b')) # test test_acc = sess.run(accuracy, feed_dict={x: test_x, y: test_y}) test_loss = sess.run(cost, feed_dict={x: test_x, y: test_y}) f.write( col( "Test result: Loss= " + "{:.6f}".format(test_loss) + ", Accuracy= " + "{:.5f}".format(test_acc), 'g')) f.close()
@desc: 同上 """ import tensorflow as tf from tensorflow.contrib.rnn import BasicRNNCell from tensorflow.contrib.rnn import static_rnn import numpy as np n_steps = 2 n_inputs = 3 n_neurons = 5 x = tf.placeholder(tf.float32, [None, n_steps, n_inputs]) x_seqs = tf.unstack(tf.transpose(x, perm=[1, 0, 2])) basic_cell = BasicRNNCell(num_units=n_neurons) output_seqs, states = static_rnn(basic_cell, x_seqs, dtype=tf.float32) outputs = tf.transpose(tf.stack(output_seqs), perm=[1, 0, 2]) x_batch = np.array([ [[0, 1, 2], [9, 8, 7]], [[3, 4, 5], [0, 0, 0]], [[6, 7, 8], [6, 5, 4]], [[9, 0, 1], [3, 2, 1]], ]) init = tf.global_variables_initializer() with tf.Session() as sess: init.run() outputs_val = outputs.eval(feed_dict={x: x_batch})
n_outputs = 1 learning_rate = 4e-4 n_iterations = 10000 batch_size = 50 X = tf.placeholder(tf.float32, [None, n_steps, n_inputs]) y = tf.placeholder(tf.float32, [None, n_steps, n_outputs]) # 现在在每个时间迭代,有一个大小为100的输出向量,但是实际上我们需要一个单独的输出值。 # 最简单的解决方案是将单元格包装在OutputProjectionWrapper中。 # cell = OutputProjectionWrapper(BasicRNNCell(num_units=n_neurous, activation=tf.nn.relu), output_size=n_outputs) # 用技巧提高速度 cell = BasicRNNCell(num_units=n_neurous, activation=tf.nn.relu) rnn_outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32) stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurous]) stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None) outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs]) loss = tf.reduce_mean(tf.square(outputs - y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) training_op = optimizer.minimize(loss) init = tf.global_variables_initializer() X_data = np.linspace(0, 15, 101) X_batch = X_data[:-1][np.newaxis, :, np.newaxis]
def main(model, T, n_iter, n_batch, n_hidden, capacity, comp, FFT, learning_rate, decay, learning_rate_decay, norm, grid_name): learning_rate = float(learning_rate) decay = float(decay) # --- Set data params ---------------- n_input = 10 n_output = 9 n_sequence = 10 n_train = n_iter * n_batch n_test = n_batch n_steps = T + 20 n_classes = 9 # --- Create data -------------------- train_x, train_y = copying_data(T, n_train, n_sequence) test_x, test_y = copying_data(T, n_test, n_sequence) # --- Create graph and compute gradients ---------------------- with tf.name_scope('inputs'): x = tf.placeholder("int32", [None, n_steps], name='x_input') y = tf.placeholder("int64", [None, n_steps], name='y_input') input_data = tf.one_hot(x, n_input, dtype=tf.float32) # --- Input to hidden layer ---------------------- #with tf.name_scope('layer'): if model == "LSTM": cell = BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GRU": cell = GRUCell(n_hidden, kernel_initializer=tf.orthogonal_initializer()) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "RUM": cell = RUMCell(n_hidden, T_norm=norm) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "ARUM": cell = ARUMCell(n_hidden, T_norm=norm) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "EUNN": cell = EUNNCell(n_hidden, capacity, FFT, comp) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GORU": cell = GORUCell(n_hidden, capacity, FFT) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "RNN": cell = BasicRNNCell(n_hidden) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) # --- Hidden Layer to Output ---------------------- V_init_val = np.sqrt(6.) / np.sqrt(n_output + n_input) V_weights = tf.get_variable("V_weights", shape=[n_hidden, n_classes], dtype=tf.float32, initializer=tf.random_uniform_initializer( -V_init_val, V_init_val)) V_bias = tf.get_variable("V_bias", shape=[n_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.01)) hidden_out_list = tf.unstack(hidden_out, axis=1) temp_out = tf.stack([tf.matmul(i, V_weights) for i in hidden_out_list]) output_data = tf.nn.bias_add(tf.transpose(temp_out, [1, 0, 2]), V_bias) # --- evaluate process ---------------------- with tf.name_scope('evaluate'): with tf.name_scope('cost'): cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=output_data, labels=y)) tf.summary.scalar('cost', cost) with tf.name_scope('correnct_pred'): correct_pred = tf.equal(tf.argmax(output_data, 2), y) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) tf.summary.scalar('accuracy', accuracy) # --- Initialization ---------------------- optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=decay).minimize(cost) init = tf.global_variables_initializer() print("\n###") sumz = 0 for i in tf.global_variables(): print(i.name, i.shape, np.prod(np.array(i.get_shape().as_list()))) sumz += np.prod(np.array(i.get_shape().as_list())) print("# parameters: ", sumz) print("###\n") # --- save result ---------------------- filename = "./output/copying/" if grid_name != None: filename += grid_name + "/" filename += "T=" + str(T) + "/" research_filename = filename + "researchModels" + "/" + model + "_N=" + str( n_hidden) + "_lambda=" + str(learning_rate) + "_decay=" + str( decay) + "/" filename += model + "_N=" + str(n_hidden) + "_lambda=" + str( learning_rate) + "_decay=" + str(decay) if norm is not None: filename += "_norm=" + str(norm) filename = filename + ".txt" if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise if not os.path.exists(os.path.dirname(research_filename)): try: os.makedirs(os.path.dirname(research_filename)) except OSError as exc: if exc.errno != errno.EEXIST: raise if not os.path.exists( os.path.dirname(research_filename + "/modelCheckpoint/")): try: os.makedirs( os.path.dirname(research_filename + "/modelCheckpoint/")) except OSError as exc: if exc.errno != errno.EEXIST: raise f = open(filename, 'w') f.write("########\n\n") f.write("## \tModel: %s with N=%d" % (model, n_hidden)) f.write("\n\n") f.write("########\n\n") # --- Training Loop ---------------------- saver = tf.train.Saver() mx2 = 0 step = 0 with tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=False)) as sess: merged = tf.summary.merge_all() writer = tf.summary.FileWriter("./logs/", sess.graph) sess.run(init) steps = [] losses = [] accs = [] while step < n_iter: batch_x = train_x[step * n_batch:(step + 1) * n_batch] batch_y = train_y[step * n_batch:(step + 1) * n_batch] sess.run(optimizer, feed_dict={x: batch_x, y: batch_y}) result = sess.run(merged, feed_dict={x: batch_x, y: batch_y}) writer.add_summary(result, step) result = sess.run(merged, feed_dict={x: batch_x, y: batch_y}) writer.add_summary(result, step) #with tf.name_scope('loss'): with tf.name_scope('loss'): with tf.name_scope('acc'): acc = sess.run(accuracy, feed_dict={ x: batch_x, y: batch_y }) with tf.name_scope('loss'): loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y}) tf.summary.scalar('loss', loss) merged = tf.summary.merge_all() write = tf.summary.FileWriter("logs/", sess.graph) result = sess.run(merged, feed_dict={x: batch_x, y: batch_y}) writer.add_summary(result, step) print("Iter " + str(step) + ", Minibatch Loss= " + \ "{:.6f}".format(loss) + ", Training Accuracy= " + \ "{:.5f}".format(acc)) steps.append(step) losses.append(loss) accs.append(acc) if step == 0: f.write("%d\t%f\t%f\n" % (step, loss, acc)) step += 1 if step % 200 == 199: f.write("%d\t%f\t%f\n" % (step, loss, acc)) if step % 10000 == 0: saver.save(sess, research_filename + "/modelCheckpoint/") if step % 1000 == 0: if model == "GRU": tmp = "gru" if model == "RUM": tmp = "rum" if model == "ARUM": tmp = "arum" if model == "GRU" or model == "RUM" or model == "ARUM": kernel = [ v for v in tf.global_variables() if v.name == "rnn/" + tmp + "_cell/gates/kernel:0" ][0] bias = [ v for v in tf.global_variables() if v.name == "rnn/" + tmp + "_cell/gates/bias:0" ][0] k, b = sess.run([kernel, bias]) np.save(research_filename + "/kernel_" + str(step), k) np.save(research_filename + "/bias_" + str(step), b) if model == "RUM" or model == "ARUM": kernel_emb = [ v for v in tf.global_variables() if v.name == "rnn/" + tmp + "_cell/candidate/kernel:0" ][0] bias_emb = [ v for v in tf.global_variables() if v.name == "rnn/" + tmp + "_cell/candidate/bias:0" ][0] k_emb, b_emb = sess.run([kernel_emb, bias_emb]) np.save(research_filename + "/kernel_emb_" + str(step), k_emb) np.save(research_filename + "/bias_emb_" + str(step), b_emb) #result = sess.run(merged,feed_dict={x: batch_x, y: batch_y}) #writer.add_summary(result, step) print("Optimization Finished!") # --- test ---------------------- test_acc = sess.run(accuracy, feed_dict={x: test_x, y: test_y}) test_loss = sess.run(cost, feed_dict={x: test_x, y: test_y}) #tf.scalar_summary('test_loss',test_loss) #result = sess.run(merged,feed_dict={x: batch_x, y: batch_y}) #writer.add_summary(result, step) f.write("Test result: Loss= " + "{:.6f}".format(test_loss) + \ ", Accuracy= " + "{:.5f}".format(test_acc))
import tensorflow as tf from tensorflow.contrib.rnn import BasicRNNCell # numero de neuronas dim = 12 # el primer elemento es el tamano del batch x = tf.placeholder(tf.float32, shape=[None, dim]) y = tf.placeholder(tf.float32, shape=[4, dim]) z = tf.placeholder(tf.float32, shape=[None, dim + 1]) print('x, y, z:', x.shape, y.shape, z.shape) # exp1: sin ProjectionWrapper print("exp1") cell = BasicRNNCell(dim) state1 = cell.zero_state(batch_size=4, dtype=tf.float32) state2 = cell.zero_state(batch_size=8, dtype=tf.float32) # output, state out1, out2 = cell(x, state1) print(out1.shape, out2.shape) out1, out2 = cell(x, state2) print(out1.shape, out2.shape) out1, out2 = cell(y, state1) print(out1.shape, out2.shape) # exp2: con ProjectionWrapper print("exp2")