def trainModel(train_op, int_text, train_graph, initial_state, input_text, targets): # 获得训练用的所有batch batches = get_batches(int_text, batch_size, seq_length) # 打开session开始训练,将上面创建的graph对象传递给session with tf.Session(graph=train_graph) as sess: sess.run(tf.global_variables_initializer()) for epoch_i in range(num_epochs): state = sess.run(initial_state, {input_text: batches[0][0]}) for batch_i, (x, y) in enumerate(batches): feed = { input_text: x, targets: y, initial_state: state, lr: learning_rate } train_loss, state, _ = sess.run([cost, final_state, train_op], feed) # 打印训练信息 if (epoch_i * len(batches) + batch_i) % show_every_n_batches == 0: print('Epoch {:>3} Batch {:>4}/{} train_loss = {:.3f}'. format(epoch_i, batch_i, len(batches), train_loss)) # 保存模型 saver = tf.train.Saver() saver.save(sess, save_dir) print('Model Trained and Saved') helper.save_params((seq_length, save_dir))
def train_model(): batches = get_batches(int_text, batch_size, seq_length) with tf.Session(graph=train_graph) as sess: sess.run(tf.global_variables_initializer()) for epoch_i in range(num_epochs): state = sess.run(initial_state, {input_text: batches[0][0]}) for batch_i, (x, y) in enumerate(batches): feed = { input_text: x, targets: y, initial_state: state, lr: learning_rate} train_loss, state, _ = sess.run([cost, final_state, train_op], feed) # Show every <show_every_n_batches> batches if (epoch_i * len(batches) + batch_i) % show_every_n_batches == 0: print('Epoch {:>3} Batch {:>4}/{} train_loss = {:.3f}'.format( epoch_i, batch_i, len(batches), train_loss)) # Save Model saver = tf.train.Saver() saver.save(sess, save_dir) print('Model Trained and Saved') # Save parameters for checkpoint helper.save_params((seq_length, save_dir))
def createModel(int_to_vocab): train_graph = tf.Graph() with train_graph.as_default(): # 文字总量 vocab_size = len(int_to_vocab) # 获取模型的输入,目标以及学习率节点,这些都是tf的placeholder input_text, targets, lr = get_inputs() # 输入数据的shape input_data_shape = tf.shape(input_text) # 创建rnn的cell和初始状态节点,rnn的cell已经包含了lstm,dropout # 这里的rnn_size表示每个lstm cell中包含了多少的神经元 cell, initial_state = get_init_cell(input_data_shape[0], rnn_size) # 创建计算loss和finalstate的节点 logits, final_state = build_nn(cell, rnn_size, input_text, vocab_size, embed_dim) # 使用softmax计算最后的预测概率 probs = tf.nn.softmax(logits, name='probs') # 计算loss cost = seq2seq.sequence_loss( logits, targets, tf.ones([input_data_shape[0], input_data_shape[1]])) # 使用Adam提督下降 optimizer = tf.train.AdamOptimizer(lr) # 裁剪一下Gradient输出,最后的gradient都在[-1, 1]的范围内 gradients = optimizer.compute_gradients(cost) capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None] train_op = optimizer.apply_gradients(capped_gradients) #return train_op, train_graph, initial_state, input_text, targets # 获得训练用的所有batch batches = get_batches(int_text, batch_size, seq_length) # 打开session开始训练,将上面创建的graph对象传递给session with tf.Session(graph=train_graph) as sess: sess.run(tf.global_variables_initializer()) for epoch_i in range(num_epochs): state = sess.run(initial_state, {input_text: batches[0][0]}) for batch_i, (x, y) in enumerate(batches): feed = { input_text: x, targets: y, initial_state: state, lr: learning_rate } train_loss, state, _ = sess.run( [cost, final_state, train_op], feed) # 打印训练信息 if (epoch_i * len(batches) + batch_i) % show_every_n_batches == 0: print( 'Epoch {:>3} Batch {:>4}/{} train_loss = {:.3f}'. format(epoch_i, batch_i, len(batches), train_loss)) # 保存模型 saver = tf.train.Saver() saver.save(sess, save_dir) print('Model Trained and Saved') helper.save_params((seq_length, save_dir))
def minimize(self, space, ncalls, minimize_seed, path_params='best_params.json'): exp_name = self.exp_name + '_{}'.format(datetime.datetime.now()) mlflow.create_experiment(exp_name) mlflow.set_experiment(exp_name) best_params = forest_minimize(self.objective, space, n_calls=ncalls, random_state=minimize_seed)['x'] save_params(best_params, path_params=path_params)
epoch_i, batch_i, len(batches), train_loss)) # Save Model saver = tf.train.Saver() saver.save(sess, save_dir) print('Model Trained and Saved') # ## Save Parameters # Save `seq_length` and `save_dir` for generating a new TV script. # In[59]: """ DON'T MODIFY ANYTHING IN THIS CELL """ # Save parameters for checkpoint helper.save_params((seq_length, save_dir)) # # Checkpoint # In[60]: """ DON'T MODIFY ANYTHING IN THIS CELL """ import tensorflow as tf import numpy as np import helper import problem_unittests as tests _, vocab_to_int, int_to_vocab, token_dict = helper.load_preprocess() seq_length, load_dir = helper.load_params()
def train_model(): # Split data to training and validation sets train_source = source_int_text[batch_size:] train_target = target_int_text[batch_size:] valid_source = source_int_text[:batch_size] valid_target = target_int_text[:batch_size] (valid_sources_batch, valid_targets_batch, valid_sources_lengths, valid_targets_lengths) = next( get_batches(valid_source, valid_target, batch_size, source_vocab_to_int['<PAD>'], target_vocab_to_int['<PAD>'])) with tf.Session(graph=train_graph) as sess: sess.run(tf.global_variables_initializer()) for epoch_i in range(epochs): for batch_i, (source_batch, target_batch, sources_lengths, targets_lengths) in enumerate( get_batches(train_source, train_target, batch_size, source_vocab_to_int['<PAD>'], target_vocab_to_int['<PAD>'])): _, loss = sess.run( [train_op, cost], { input_data: source_batch, targets: target_batch, lr: learning_rate, target_sequence_length: targets_lengths, source_sequence_length: sources_lengths, keep_prob: keep_probability }) if batch_i % display_step == 0 and batch_i > 0: batch_train_logits = sess.run( inference_logits, { input_data: source_batch, source_sequence_length: sources_lengths, target_sequence_length: targets_lengths, keep_prob: 1.0 }) batch_valid_logits = sess.run( inference_logits, { input_data: valid_sources_batch, source_sequence_length: valid_sources_lengths, target_sequence_length: valid_targets_lengths, keep_prob: 1.0 }) train_acc = get_accuracy(target_batch, batch_train_logits) valid_acc = get_accuracy(valid_targets_batch, batch_valid_logits) print( 'Epoch {:>3} Batch {:>4}/{} - Train Accuracy: {:>6.4f}, Validation Accuracy: {:>6.4f}, Loss: {:>6.4f}' .format(epoch_i, batch_i, len(source_int_text) // batch_size, train_acc, valid_acc, loss)) # Save Model saver = tf.train.Saver() saver.save(sess, save_path) print('Model Trained and Saved') helper.save_params(save_path) helper.preprocess_and_save_data(source_path, target_path, text_to_ids)
input_flags=par.input_flags) #Encoder(par) mapNet_model.cuda() mapNet_model.train() optimizer = optim.Adam(mapNet_model.parameters(), lr=par.lr_rate) scheduler = StepLR(optimizer, step_size=par.step_size, gamma=par.gamma) # Load the dataset print("Loading the training data...") mp3d = Habitat_MP3D(par, seq_len=par.seq_len, config_file=par.train_config) ''' # save sampled data to reproduce validation results avd_file = open(par.model_dir+"mp3d_data.pkl", 'wb') pickle.dump(mp3d, avd_file) ''' log = open(par.model_dir + "train_log_" + par.model_id + ".txt", 'w') hl.save_params(par, par.model_dir, name="mapNet") loss_list = [] #all_ids = list(range(len(mp3d))) #test_ids = all_ids[::100] # select a small subset for testing #train_ids = list(set(all_ids) - set(test_ids)) # the rest for training #nData = len(train_ids) #iters_per_epoch = int(nData / float(par.batch_size)) iters_per_epoch = 1000 log.write("Iters_per_epoch:" + str(iters_per_epoch) + "\n") print("Iters per epoch:", iters_per_epoch) for ep in range(par.nEpochs): #random.shuffle(train_ids) for i in range(iters_per_epoch):
input_text: x, targets: y, initial_state: state, lr: learning_rate} train_loss, state, _ = sess.run([cost, final_state, train_op], feed) # 打印训练信息 if (epoch_i * len(batches) + batch_i) % show_every_n_batches == 0: print('Epoch='+str(batch_i)+' Batch='+str(len(batches))+' train_loss='+str(train_loss)) # 保存模型 saver = tf.train.Saver() saver.save(sess, save_dir) print('Model Trained and Saved') helper.save_params((seq_length, save_dir)) # 将使用到的变量保存起来,以便下次直接读取 # 使用训练好的模型来生成自己的小说 # 生成文本的长度 gen_length = 1000 # 文章开头的字,指定一个即可,这个字必须是在训练词汇列表中的 prime_word = '希' import getsave loaded_graph = tf.Graph() with tf.Session(graph=loaded_graph) as sess: # 加载保存过的session loader = tf.train.import_meta_graph(getsave.load_dir + '.meta') loader.restore(sess, getsave.load_dir)
capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for \ grad, var in gradients] train_op = optimizer.apply_gradients(capped_gradients) ### Train Network batches = get_batches(int_text, batch_size, seq_length) with tf.Session(graph=train_graph) as s: s.run(tf.global_variables_initializer()) for e in range(num_epochs): state = s.run(initial_state, {input_text: batches[0][0]}) for b, (x, y) in enumerate(batches): feed = { input_text: x, targets: y, initial_state: state, lr: learning_rate } train_loss, state, _ = s.run([cost, final_state, train_op], feed) if (e * len(batches) + b) % every_n_batches == 0: print('Epoch {:>3} Batch {:>4}/{} train_loss = {:.3f}'.format( (e + 1), b, len(batches), train_loss)) saver = tf.train.Saver() saver.save(s, './save') print('Model Trained and Saved') helper.save_params((seq_length, './save'))
import tensorflow as tf import numpy as np import helper import re import problem_unittests as tests _, vocab_to_int, int_to_vocab, token_dict = helper.load_preprocess() seq_length, load_dir = helper.load_params() # 训练步长 seq_length = 30 helper.save_params((seq_length, "module/bible")) def get_tensors(loaded_graph): inputs = loaded_graph.get_tensor_by_name("inputs:0") initial_state = loaded_graph.get_tensor_by_name("init_state:0") final_state = loaded_graph.get_tensor_by_name("final_state:0") probs = loaded_graph.get_tensor_by_name("probs:0") return inputs, initial_state, final_state, probs def pick_word(probabilities, int_to_vocab): chances = [] for idx, prob in enumerate(probabilities): if prob >= 0.01: chances.append(int_to_vocab[idx]) if len(chances) == 0: return '帝' else: rand = np.random.randint(0, len(chances)) return str(chances[rand])
optimizer = optim.Adam(all_params, lr=parIL.lr_rate) scheduler = StepLR(optimizer, step_size=parIL.step_size, gamma=parIL.gamma) if parIL.use_ego_obsv: ego_encoder = Encoder() ego_encoder.cuda() ego_encoder.eval() # Collect the training episodes print("Loading training episodes...") mp3d = Habitat_MP3D_IL(par=parIL, seq_len=parIL.seq_len, config_file=parIL.train_config, action_list=parIL.action_list) hl.save_params(parIL, parIL.model_dir, name="IL") hl.save_params(parMapNet, parIL.model_dir, name="mapNet") log = open(parIL.model_dir + "train_log_" + parIL.model_id + ".txt", 'w') #nData = len(train_ids) #iters_per_epoch = int(nData / float(parIL.batch_size)) iters_per_epoch = 1000 log.write("Iters_per_epoch:" + str(iters_per_epoch) + "\n") print("Iters per epoch:", iters_per_epoch) loss_list = [] #mapNet_batch, IL_batch = get_minibatch(batch_size=parIL.batch_size, tvec_dim=parIL.nTargets, # seq_len=parIL.seq_len, nActions=len(action_list), data=mp3d) # **** temp for ep in range(parIL.nEpochs): data_index = 0
def Train(embed_dim=512, num_epochs=20, learning_rate=0.01, seq_length=10, rnn_size=700, batch_size=100): data_dir = './data/simpsons/moes_tavern_lines.txt' text = helper.load_data(data_dir) # Ignore notice, since we don't use it for analysing the data text = text[81:] helper.preprocess_and_save_data(data_dir, token_lookup, create_lookup_tables) int_text, _, int_to_vocab, _ = helper.load_preprocess() show_every_n_batches = 50 train_graph = tf.Graph() with train_graph.as_default(): vocab_size = len(int_to_vocab) input_text, targets, lr = get_inputs() input_data_shape = tf.shape(input_text) cell, initial_state = get_init_cell(input_data_shape[0], rnn_size) logits, final_state = build_nn(cell, rnn_size, input_text, vocab_size, embed_dim) # Probabilities for generating words tf.nn.softmax(logits, name='probs') # Loss function cost = seq2seq.sequence_loss( logits, targets, tf.ones([input_data_shape[0], input_data_shape[1]])) # Optimizer optimizer = tf.train.AdamOptimizer(lr) # Gradient Clipping gradients = optimizer.compute_gradients(cost) capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None] train_op = optimizer.apply_gradients(capped_gradients) batches = get_batches(int_text, batch_size, seq_length) with tf.Session(graph=train_graph) as sess: sess.run(tf.global_variables_initializer()) for epoch_i in range(num_epochs): state = sess.run(initial_state, {input_text: batches[0][0]}) for batch_i, (x, y) in enumerate(batches): feed = { input_text: x, targets: y, initial_state: state, lr: learning_rate } train_loss, state, _ = sess.run([cost, final_state, train_op], feed) # Show every <show_every_n_batches> batches if (epoch_i * len(batches) + batch_i) % show_every_n_batches == 0: print('Epoch {:>3} Batch {:>4}/{} train_loss = {:.3f}'. format(epoch_i, batch_i, len(batches), train_loss)) # Save Model saver = tf.train.Saver() saver.save(sess, "./save") print('Model Trained and Saved') # Save parameters for checkpoint helper.save_params((seq_length, "./save"))
def train(): (source_int_text, target_int_text), (source_vocab_to_int, target_vocab_to_int), _ = helper.load_preprocess() # Check TensorFlow Version assert LooseVersion(tf.__version__) >= LooseVersion( '1.0'), 'Please use TensorFlow version 1.0 or newer' print('TensorFlow Version: {}'.format(tf.__version__)) # Check for a GPU if not tf.test.gpu_device_name(): warnings.warn( 'No GPU found. Please use a GPU to train your neural network.') else: print('Default GPU Device: {}'.format(tf.test.gpu_device_name())) epochs = 3 # Batch Size batch_size = 128 # RNN Size rnn_size = 256 # Number of Layers num_layers = 2 # Embedding Size encoding_embedding_size = 200 decoding_embedding_size = 200 # Learning Rate learning_rate = 0.001 # Dropout Keep Probability keep_probability = 0.5 # ### Build the Graph save_path = 'ckpt' (source_int_text, target_int_text), (source_vocab_to_int, target_vocab_to_int), _ = helper.load_preprocess() max_target_sentence_length = max( [len(sentence) for sentence in source_int_text]) train_graph = tf.Graph() with train_graph.as_default(): input_data, targets, lr, keep_prob = model_inputs() sequence_length = tf.placeholder_with_default( max_target_sentence_length, None, name='sequence_length') input_shape = tf.shape(input_data) train_logits, inference_logits = seq2seq_model( tf.reverse(input_data, [-1]), targets, keep_prob, batch_size, sequence_length, len(source_vocab_to_int), len(target_vocab_to_int), encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, target_vocab_to_int) tf.identity(inference_logits, 'logits') with tf.name_scope("optimization"): # Loss function cost = tf.contrib.seq2seq.sequence_loss( train_logits, targets, tf.ones([input_shape[0], sequence_length])) # Optimizer optimizer = tf.train.AdamOptimizer(lr) # Gradient Clipping gradients = optimizer.compute_gradients(cost) capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None] train_op = optimizer.apply_gradients(capped_gradients) def get_accuracy(target, logits): """ Calculate accuracy """ max_seq = max(target.shape[1], logits.shape[1]) if max_seq - target.shape[1]: target = np.pad(target_batch, [(0, 0), (0, max_seq - target_batch.shape[1]), (0, 0)], 'constant') if max_seq - batch_train_logits.shape[1]: logits = np.pad(logits, [(0, 0), (0, max_seq - logits.shape[1]), (0, 0)], 'constant') return np.mean(np.equal(target, np.argmax(logits, 2))) train_source = source_int_text[batch_size:] train_target = target_int_text[batch_size:] valid_source = helper.pad_sentence_batch(source_int_text[:batch_size]) valid_target = helper.pad_sentence_batch(target_int_text[:batch_size]) with tf.Session(graph=train_graph) as sess: sess.run(tf.global_variables_initializer()) for epoch_i in range(epochs): for batch_i, (source_batch, target_batch) in enumerate( helper.batch_data(train_source, train_target, batch_size)): start_time = time.time() _, loss = sess.run( [train_op, cost], { input_data: source_batch, targets: target_batch, lr: learning_rate, sequence_length: target_batch.shape[1], keep_prob: keep_probability }) batch_train_logits = sess.run(inference_logits, { input_data: source_batch, keep_prob: 1.0 }) batch_valid_logits = sess.run(inference_logits, { input_data: valid_source, keep_prob: 1.0 }) train_acc = get_accuracy(target_batch, batch_train_logits) valid_acc = get_accuracy(np.array(valid_target), batch_valid_logits) end_time = time.time() print( 'Epoch {:>3} Batch {:>4}/{} - Train Accuracy: {:>6.3f}, Validation Accuracy: {:>6.3f}, Loss: {:>6.3f}' .format(epoch_i, batch_i, len(source_int_text) // batch_size, train_acc, valid_acc, loss)) # Save Model saver = tf.train.Saver() saver.save(sess, save_path) print('Model Trained and Saved') helper.save_params(save_path)
def train(): # Number of Epochs epochs = 8 # Batch Size batch_size = 512 # RNN Size rnn_size = 512 # Number of Layers num_layers = 2 # Embedding Size encoding_embedding_size = 200 decoding_embedding_size = 200 # Learning Rate learning_rate = 0.01 # Dropout Keep Probability keep_probability = 0.5 display_step = 1 save_path = 'checkpoints/dev' (source_int_text, target_int_text), (source_vocab_to_int, target_vocab_to_int), _ = helper.load_preprocess() max_target_sentence_length = max([len(sentence) for sentence in source_int_text]) train_graph = tf.Graph() with train_graph.as_default(): input_data, targets, lr, keep_prob, target_sequence_length, max_target_sequence_length, source_sequence_length = model_inputs() # sequence_length = tf.placeholder_with_default(max_target_sentence_length, None, name='sequence_length') input_shape = tf.shape(input_data) train_logits, inference_logits = seq2seq_model(tf.reverse(input_data, [-1]), targets, keep_prob, batch_size, source_sequence_length, target_sequence_length, max_target_sequence_length, len(source_vocab_to_int), len(target_vocab_to_int), encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, target_vocab_to_int) training_logits = tf.identity(train_logits.rnn_output, name='logits') inference_logits = tf.identity(inference_logits.sample_id, name='predictions') masks = tf.sequence_mask(target_sequence_length, max_target_sequence_length, dtype=tf.float32, name='masks') with tf.name_scope("optimization"): # Loss function cost = tf.contrib.seq2seq.sequence_loss( training_logits, targets, masks) # Optimizer optimizer = tf.train.AdamOptimizer(lr) # Gradient Clipping gradients = optimizer.compute_gradients(cost) capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None] train_op = optimizer.apply_gradients(capped_gradients) # Split data to training and validation sets train_source = source_int_text[batch_size:] train_target = target_int_text[batch_size:] valid_source = source_int_text[:batch_size] valid_target = target_int_text[:batch_size] (valid_sources_batch, valid_targets_batch, valid_sources_lengths, valid_targets_lengths) = next( get_batches(valid_source, valid_target, batch_size, source_vocab_to_int['<PAD>'], target_vocab_to_int['<PAD>'])) with tf.Session(graph=train_graph) as sess: sess.run(tf.global_variables_initializer()) for epoch_i in range(epochs): for batch_i, (source_batch, target_batch, sources_lengths, targets_lengths) in enumerate( get_batches(train_source, train_target, batch_size, source_vocab_to_int['<PAD>'], target_vocab_to_int['<PAD>'])): _, loss = sess.run( [train_op, cost], {input_data: source_batch, targets: target_batch, lr: learning_rate, target_sequence_length: targets_lengths, source_sequence_length: sources_lengths, keep_prob: keep_probability}) if batch_i % display_step == 0 and batch_i > 0: batch_train_logits = sess.run( inference_logits, {input_data: source_batch, source_sequence_length: sources_lengths, target_sequence_length: targets_lengths, keep_prob: 1.0}) batch_valid_logits = sess.run( inference_logits, {input_data: valid_sources_batch, source_sequence_length: valid_sources_lengths, target_sequence_length: valid_targets_lengths, keep_prob: 1.0}) train_acc = get_accuracy(target_batch, batch_train_logits) valid_acc = get_accuracy(valid_targets_batch, batch_valid_logits) print( 'Epoch {:>3} Batch {:>4}/{} - Train Accuracy: {:>6.4f}, Validation Accuracy: {:>6.4f}, Loss: {:>6.4f}' .format(epoch_i, batch_i, len(source_int_text) // batch_size, train_acc, valid_acc, loss)) # Save Model saver = tf.train.Saver() saver.save(sess, save_path) print('Model Trained and Saved') helper.save_params(save_path)
# Save Model saver = tf.train.Saver() saver.save(sess, save_path) print('Model Trained and Saved') # ### 保存参数 # # 保存 `batch_size` 和 `save_path` 参数以进行推论(for inference)。 # In[17]: """ DON'T MODIFY ANYTHING IN THIS CELL """ # Save parameters for checkpoint helper.save_params(save_path) # # 检查点 # In[18]: """ DON'T MODIFY ANYTHING IN THIS CELL """ import tensorflow as tf import numpy as np import helper import problem_unittests as tests _, (source_vocab_to_int, target_vocab_to_int), (source_int_to_vocab, target_int_to_vocab) = helper.load_preprocess()
targets: target_batch, lr: learning_rate, sequence_length: target_batch.shape[1], keep_prob: keep_probability }) batch_train_logits = sess.run(inference_logits, { input_data: source_batch, keep_prob: 1.0 }) batch_valid_logits = sess.run(inference_logits, { input_data: valid_source, keep_prob: 1.0 }) train_acc = get_accuracy(target_batch, batch_train_logits) valid_acc = get_accuracy(np.array(valid_target), batch_valid_logits) end_time = time.time() if (batch_i % 5 == 0): print( 'Epoch {:>3} Batch {:>4}/{} - Train Accuracy: {:>6.3f}, Validation Accuracy: {:>6.3f}, Loss: {:>6.3f} Time elapsed: {:>6.3f} s' .format(epoch_i, batch_i, len(source_int_text) // batch_size, train_acc, valid_acc, loss, (end_time - start_time))) # Save Model saver = tf.train.Saver() saver.save(sess, save_path) print('Model Trained and Saved') helper.save_params(save_path, PARAM_PATH)
def run(self, epochs=3, batch_size=512, rnn_size=128, num_layers=1, encoding_embedding_size=200, decoding_embedding_size=200, learning_rate=0.01, k_p=.75): max_target_sent_length = max([len(sent) for sent in \ self.source_int_text]) train_graph = tf.Graph() with train_graph.as_default(): input_data, targets, lr, keep_prob = self.model_placeholders() sequence_length = tf.placeholder_with_default( \ max_target_sent_length, None, name = 'sequence_length') input_shape = tf.shape(input_data) train, infer = self.seq2seq_model( tf.reverse(input_data, [-1]), targets, keep_prob, batch_size, sequence_length, len(self.source_vocab_to_int), len(self.target_vocab_to_int), encoding_embedding_size, decoding_embedding_size, rnn_size, num_layers, self.target_vocab_to_int) tf.identity(infer, 'logits') with tf.name_scope('optimization'): cost = tf.contrib.seq2seq.sequence_loss( train, targets, tf.ones([input_shape[0], sequence_length])) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) gradients = optimizer.compute_gradients(cost) capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) \ for grad, var in gradients if grad is not None] train_op = optimizer.apply_gradients(capped_gradients) train_source = self.source_int_text[batch_size:] train_target = self.target_int_text[batch_size:] valid_source = h.pad_sentence_batch(self.source_int_text[:batch_size]) valid_target = h.pad_sentence_batch(self.target_int_text[:batch_size]) with tf.Session(graph=train_graph) as s: s.run(tf.global_variables_initializer()) for e in range(1, epochs + 1): for idx, (source_batch, target_batch) in enumerate( h.batch_data(train_source, train_target, batch_size)): _, loss = s.run( [train_op, cost], feed_dict={ input_data: source_batch, targets: target_batch, lr: learning_rate, sequence_length: target_batch.shape[1], keep_prob: k_p }) batch_train_logits = s.run(infer, feed_dict={ input_data: source_batch, keep_prob: 1.0 }) batch_valid_logits = s.run(infer, feed_dict={ input_data: valid_source, keep_prob: 1.0 }) train_acc = self.get_accuracy(target_batch, batch_train_logits) valid_acc = self.get_accuracy(np.array(valid_target), batch_valid_logits) print('Epoch {:>3} Batch {:>4}/{} - Train Accuracy:' \ ' {:>6.3f}, Validation Accuracy: {:>6.3f},'\ ' Loss: {:>6.3f}'.format(e, idx, \ len(self.source_int_text) // batch_size, train_acc, \ valid_acc, loss)) saver = tf.train.Saver() saver.save(s, 'checkpoints/dev') h.save_params('checkpoints/dev') print('Done')
def optimize(self, early_stop=True): # Return training Report start_time = time.time() #Set the epochs value high to try to trigger early_stop conditions. if early_stop: num_epochs = 90 else: num_epochs = 10 start_time = time.time() #Get Preprocessing Data - Also use helper.load_preprocess() source_text = '\n'.join(self.training_set[0]) target_text = '\n'.join(self.training_set[1]) source_validation_text = '\n'.join(self.validation_set[0]) target_validation_text = '\n'.join(self.validation_set[1]) # Join the Training And Validation Text for Creating Lookup Tables source_vocab_to_int, source_int_to_vocab = self.create_lookup_tables('\n'.join([source_text, source_validation_text])) target_vocab_to_int, target_int_to_vocab = self.create_lookup_tables('\n'.join([target_text, target_validation_text])) source_text_ids, target_text_ids = Lexicon.text_to_ids(source_text, target_text, source_vocab_to_int, target_vocab_to_int) # Build Graph self.train_graph = self.build_graph(source_vocab_to_int, target_vocab_to_int) # (val_source_vocab_to_int, # val_source_int_to_vocab) = self.create_lookup_tables(source_validation_text) # (val_target_vocab_to_int, # val_target_int_to_vocab) = self.create_lookup_tables(target_validation_text) source_validation_text_ids, target_validation_text_ids = Lexicon.text_to_ids(source_validation_text, target_validation_text, source_vocab_to_int, target_vocab_to_int) (valid_sources_batch, valid_targets_batch, valid_sources_lengths, valid_targets_lengths ) = next(self.get_batches(source_validation_text_ids, target_validation_text_ids, source_vocab_to_int['<PAD>'], target_vocab_to_int['<PAD>'])) with tf.Session(graph=self.train_graph) as sess: init = tf.global_variables_initializer() self.model.sess = sess # Launch the session sess.run(init) for epoch_i in range(num_epochs): self.shuffle_training_data() early_stopping = False for batch_i, (source_batch, target_batch, sources_lengths, targets_lengths) in enumerate( self.get_batches(source_text_ids, source_text_ids, source_vocab_to_int['<PAD>'], target_vocab_to_int['<PAD>'])): total_iterations = int((epoch_i+1)*(batch_i+1)) _, loss = sess.run( [self.train_op, self.loss], {self.input_data_ph: source_batch, self.targets_ph: target_batch, self.lr_ph: self.model.learning_rate, self.target_sequence_length_ph: targets_lengths, self.source_sequence_length_ph: sources_lengths, self.keep_prob_ph: 0.75}) if batch_i % self.model._display_step == 0 and batch_i > 0: batch_train_logits = sess.run( self.inference_logits, {self.input_data_ph: source_batch, self.source_sequence_length_ph: sources_lengths, self.target_sequence_length_ph: targets_lengths, self.keep_prob_ph: 1.0}) batch_valid_logits = sess.run( self.inference_logits, {self.input_data_ph: valid_sources_batch, self.source_sequence_length_ph: valid_sources_lengths, self.target_sequence_length_ph: valid_targets_lengths, self.keep_prob_ph: 1.0}) train_acc = Lexicon.get_accuracy(target_batch, batch_train_logits) valid_acc = Lexicon.get_accuracy(valid_targets_batch, batch_valid_logits) if valid_acc > self.best_validation_accuracy: # Update the best-known validation accuracy. self.best_validation_accuracy = valid_acc # Set the iteration for the last improvement to current. self.last_improvement = total_iterations # A string to be printed below, shows improvement found. improved_str = '**' # Save model checkpoint_dir = os.path.join(self.cache_dir,'checkpoints') checkpoint_path = os.path.join(checkpoint_dir, 'model.ckpt') self.model.save_model(sess, checkpoint_path,total_iterations) elif self.current_validation_accuracy < valid_acc: improved_str = '*' # Set the iteration for the last improvement to current. self.last_improvement = total_iterations else: # An empty string to be printed below shows that no improvement was found. improved_str = '' if (valid_acc < self.best_validation_accuracy) and \ (total_iterations - self.last_improvement) > self.require_improvement: print("No improvement found in a while, stopping optimization.") # Break out from the for-loop. early_stopping = True break # Set Current Validation Accuracy self.current_validation_accuracy = valid_acc # Status-message for printing. print('Epoch {0:>3} Batch {1:>4}/{2} - Train Accuracy: {3:>6.4f}, Validation Accuracy: {4:>6.4f}, Loss:{5:>6.4f} Improved?: {6}'.format(epoch_i, batch_i, len(source_text_ids) // self.model.batch_size, train_acc, self.current_validation_accuracy, loss, improved_str)) # Save Model checkpoint_dir = os.path.join(self.cache_dir,'checkpoints') checkpoint_path = os.path.join(checkpoint_dir, 'model.ckpt') self.model.save_model(sess, checkpoint_path,total_iterations) #print("Model saved in file: %s" % checkpoint_path) helper.save_params(checkpoint_path) if early_stopping is True: break end_time = time.time() # Difference between start and end-times. time_dif = end_time - start_time # Print the time-usage. print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))