def train(FLAGS): # Load the data en_token_ids, en_seq_lens, en_vocab_dict, en_rev_vocab_dict = \ process_data('data/en.p', max_vocab_size=5000, target_lang=False) sp_token_ids, sp_seq_lens, sp_vocab_dict, sp_rev_vocab_dict = \ process_data('data/sp.p', max_vocab_size=5000, target_lang=True) # Split into train and validation sets train_encoder_inputs, train_decoder_inputs, train_targets, \ train_en_seq_lens, train_sp_seq_len, \ valid_encoder_inputs, valid_decoder_inputs, valid_targets, \ valid_en_seq_lens, valid_sp_seq_len = \ split_data(en_token_ids, sp_token_ids, en_seq_lens, sp_seq_lens, train_ratio=0.8) # Update parameters FLAGS.en_vocab_size = len(en_vocab_dict) FLAGS.sp_vocab_size = len(sp_vocab_dict) # Start session with tf.Session() as sess: # Create new model or load old one model = create_model(sess, FLAGS) # Training begins losses = [] for epoch_num, epoch in enumerate( generate_epoch(train_encoder_inputs, train_decoder_inputs, train_targets, train_en_seq_lens, train_sp_seq_len, FLAGS.num_epochs, FLAGS.batch_size)): print "EPOCH: %i" % (epoch_num) # Decay learning rate sess.run(tf.assign(model.lr, FLAGS.learning_rate * \ (FLAGS.learning_rate_decay_factor ** epoch_num))) batch_loss = [] for batch_num, (batch_encoder_inputs, batch_decoder_inputs, batch_targets, batch_en_seq_lens, batch_sp_seq_lens) in enumerate(epoch): loss, _ = model.step(sess, FLAGS, batch_encoder_inputs, batch_decoder_inputs, batch_targets, batch_en_seq_lens, batch_sp_seq_lens, FLAGS.dropout) batch_loss.append(loss) losses.append(np.mean(batch_loss)) plt.plot(losses, label='loss') plt.legend() plt.show()
def train(params): hindi_token_ids, hindi_seq_lens, hindi_vocab_dict, hindi_rev_vocab_dict = process_data('../data/hindi_dump.p', max_vocab_size=100000, target_lang=False) bengali_token_ids, bengali_seq_lens, bengali_vocab_dict, bengali_rev_vocab_dict = process_data('../data/bengali_dump.p', max_vocab_size=100000, target_lang=True) train_encoder_inputs, train_decoder_inputs, train_targets, train_hindi_seq_lens, train_bengali_seq_len, valid_encoder_inputs, valid_decoder_inputs, valid_targets, valid_hindi_seq_lens, valid_bengali_seq_lens = split_data(hindi_token_ids, bengali_token_ids, hindi_seq_lens, bengali_seq_lens,train_ratio=0.8) params.hindi_vocab_size = len(hindi_vocab_dict) params.bengali_vocab_size = len(bengali_vocab_dict) print params.hindi_vocab_size, params.bengali_vocab_size with tf.Session() as sess: _model = model(params) sess.run(tf.global_variables_initializer()) losses = [] accs = [] for epoch_num, epoch in enumerate(generate_epoch(train_encoder_inputs,train_decoder_inputs, train_targets,train_hindi_seq_lens, train_bengali_seq_len,params.num_epochs, params.batch_size)): print "EPOCH : ", epoch_num sess.run(tf.assign(_model.lr, 0.01 * (0.99 ** epoch_num))) batch_loss = [] batch_acc = [] for batch_num, (batch_encoder_inputs, batch_decoder_inputs,batch_targets, batch_hindi_seq_lens,batch_bengali_seq_lens) in enumerate(epoch): loss, _,acc = _model.step(sess, params,batch_encoder_inputs, batch_decoder_inputs, batch_targets,batch_hindi_seq_lens, batch_bengali_seq_lens,params.dropout) batch_loss.append(loss) batch_acc.append(acc) losses.append(np.mean(batch_loss)) accs.append(np.mean(batch_acc)) print "Training Loss: ",losses[-1] print "Training Accuracy",accs[-1] plt.plot(losses, label='loss') plt.legend() # plt.show() plt.title('Plot for Training Error versus Epochs', fontsize='20', style='oblique') plt.xlabel('Epochs', fontsize='16', color='green') plt.ylabel('Training Error', fontsize='16', color='green') plt.savefig('../output/plot.png') plt.show() acc = _model.test(sess, params, valid_encoder_inputs, valid_decoder_inputs, valid_targets, valid_hindi_seq_lens, valid_bengali_seq_lens, params.dropout) print acc
def train(FLAGS): """ Train the model on the associative retrieval task. """ # Load the train/valid datasets print("Loading datasets:") with open(os.path.join(FLAGS.data_dir, 'train.p'), 'rb') as f: train_X, train_y = pickle.load(f) print("train_X:", np.shape(train_X), ",train_y:", np.shape(train_y)) with open(os.path.join(FLAGS.data_dir, 'valid.p'), 'rb') as f: valid_X, valid_y = pickle.load(f) print("valid_X:", np.shape(valid_X), ",valid_y:", np.shape(valid_y)) with tf.Session() as sess: # Load the model model = create_model(sess, FLAGS) start_time = time.time() # Start training train_epoch_loss = [] valid_epoch_loss = [] train_epoch_accuracy = [] valid_epoch_accuracy = [] train_epoch_gradient_norm = [] for train_epoch_num, train_epoch in enumerate( generate_epoch(train_X, train_y, FLAGS.num_epochs, FLAGS.batch_size)): print("EPOCH:", train_epoch_num) # Assign the learning rate sess.run(tf.assign(model.lr, FLAGS.learning_rate)) #sess.run(tf.assign(model.lr, FLAGS.learning_rate)) # Decay the learning rate #sess.run(tf.assign(model.lr, FLAGS.learning_rate * \ # (FLAGS.learning_rate_decay_factor ** epoch_num))) #if epoch_num < 1000: # sess.run(tf.assign(model.lr, FLAGS.learning_rate)) #elif epoch_num >= 1000: # slow down now # sess.run(tf.assign(model.lr, 1e-4)) # Custom decay (empirically decided) #if (epoch_num%1000 == 0): # sess.run(tf.assign(model.lr, # FLAGS.learning_rate/(10**(epoch_num//1000)))) # Train set train_batch_loss = [] train_batch_accuracy = [] train_batch_gradient_norm = [] for train_batch_num, (batch_X, batch_y) in enumerate(train_epoch): loss, accuracy, norm, _ = model.step(sess, batch_X, batch_y, FLAGS.l, FLAGS.e, forward_only=False) train_batch_loss.append(loss) train_batch_accuracy.append(accuracy) train_batch_gradient_norm.append(norm) train_epoch_loss.append(np.mean(train_batch_loss)) train_epoch_accuracy.append(np.mean(train_batch_accuracy)) train_epoch_gradient_norm.append( np.mean(train_batch_gradient_norm)) print('Epoch: [%i/%i] time: %.4f, loss: %.7f,' ' acc: %.7f, norm: %.7f' % (train_epoch_num, FLAGS.num_epochs, time.time() - start_time, train_epoch_loss[-1], train_epoch_accuracy[-1], train_epoch_gradient_norm[-1])) # Validation set valid_batch_loss = [] valid_batch_accuracy = [] for valid_epoch_num, valid_epoch in enumerate( generate_epoch(valid_X, valid_y, num_epochs=1, batch_size=FLAGS.batch_size)): for valid_batch_num, (batch_X, batch_y) in enumerate(valid_epoch): loss, accuracy = model.step(sess, batch_X, batch_y, FLAGS.l, FLAGS.e, forward_only=True) valid_batch_loss.append(loss) valid_batch_accuracy.append(accuracy) valid_epoch_loss.append(np.mean(valid_batch_loss)) valid_epoch_accuracy.append(np.mean(valid_batch_accuracy)) # Save the model if (train_epoch_num % FLAGS.save_every == 0 or train_epoch_num == (FLAGS.num_epochs-1)) and \ (train_epoch_num > 0): if not os.path.isdir(FLAGS.ckpt_dir): os.makedirs(FLAGS.ckpt_dir) checkpoint_path = os.path.join(FLAGS.ckpt_dir, "%s.ckpt" % model_name) print("Saving the model.") model.saver.save(sess, checkpoint_path, global_step=model.global_step) plt.plot(train_epoch_accuracy, label='train accuracy') plt.plot(valid_epoch_accuracy, label='valid accuracy') plt.legend(loc=4) plt.title('%s_Accuracy' % FLAGS.model_name) plt.show() plt.plot(train_epoch_loss, label='train loss') plt.plot(valid_epoch_loss, label='valid loss') plt.legend(loc=3) plt.title('%s_Loss' % FLAGS.model_name) plt.show() plt.plot(train_epoch_gradient_norm, label='gradient norm') plt.legend(loc=4) plt.title('%s_Gradient Norm' % FLAGS.model_name) plt.show() # Store results for global plot with open('%s_results.p' % FLAGS.model_name, 'wb') as f: pickle.dump([ train_epoch_accuracy, valid_epoch_accuracy, train_epoch_loss, valid_epoch_loss, train_epoch_gradient_norm ], f)
def test(FLAGS): """ Sample inputs of your own. """ # Corpus for indexing corpus = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '?' ] # Render the sample to proper input format sample = 'g5o8k1??g' X = [] for item in sample: X.append(corpus.index(item)) X_one_hot = np.eye(26 + 10 + 1)[np.array(X).astype('int')] with tf.Session() as sess: if FLAGS.model_name == 'RNN-LN-FW': # Inputs need to real inputs of batch_size 128 # because we use A(t) which updates even during testing # Load the model model = create_model(sess, FLAGS) # Load real samples with open(os.path.join(FLAGS.data_dir, 'train.p'), 'rb') as f: train_X, train_y = pickle.load(f) for train_epoch_num, train_epoch in enumerate( generate_epoch(train_X, train_y, 1, FLAGS.batch_size)): for train_batch_num, (batch_X, batch_y) in enumerate(train_epoch): batch_X[0] = X_one_hot logits = model.logits.eval(feed_dict={ model.X: batch_X, model.l: FLAGS.l, model.e: FLAGS.e }) print("INPUT:", sample) print("PREDICTION:", corpus[np.argmax(logits[0])]) return else: # Reset from train sizes to sample sizes FLAGS.batch_size = 1 # Load the model model = create_model(sess, FLAGS) logits = model.logits.eval(feed_dict={ model.X: [X_one_hot], model.l: FLAGS.l, model.e: FLAGS.e }) print("INPUT:", sample) print("PREDICTION:", corpus[np.argmax(logits)])
def train(FLAGS): # Load the data en_token_ids, en_seq_lens, en_vocab_dict, en_rev_vocab_dict = \ process_data('data/en.p', max_vocab_size=5000, target_lang=False) sp_token_ids, sp_seq_lens, sp_vocab_dict, sp_rev_vocab_dict = \ process_data('data/sp.p', max_vocab_size=5000, target_lang=True) # Split into train and validation sets train_encoder_inputs, train_decoder_inputs, train_targets, \ train_en_seq_lens, train_sp_seq_len, \ valid_encoder_inputs, valid_decoder_inputs, valid_targets, \ valid_en_seq_lens, valid_sp_seq_len = \ split_data(en_token_ids, sp_token_ids, en_seq_lens, sp_seq_lens, train_ratio=0.8) # Update parameters FLAGS.en_vocab_size = len(en_vocab_dict) FLAGS.sp_vocab_size = len(sp_vocab_dict) FLAGS.sp_max_len = max(sp_seq_lens) + 1 # GO token # Start session with tf.Session() as sess: # Create new model or load old one model = create_model(sess, FLAGS, forward_only=False) # Training begins train_losses = [] valid_losses = [] for epoch_num, epoch in enumerate(generate_epoch(train_encoder_inputs, train_decoder_inputs, train_targets, train_en_seq_lens, train_sp_seq_len, FLAGS.num_epochs, FLAGS.batch_size)): print "EPOCH: %i" % (epoch_num) # Decay learning rate sess.run(tf.assign(model.lr, FLAGS.learning_rate * \ (FLAGS.learning_rate_decay_factor ** epoch_num))) batch_loss = [] for batch_num, (batch_encoder_inputs, batch_decoder_inputs, batch_targets, batch_en_seq_lens, batch_sp_seq_lens) in enumerate(epoch): y_pred, loss, _ = model.step(sess, FLAGS, batch_encoder_inputs, batch_decoder_inputs, batch_targets, batch_en_seq_lens, batch_sp_seq_lens, FLAGS.dropout, forward_only=False) batch_loss.append(loss) train_losses.append(np.mean(batch_loss)) for valid_epoch_num, valid_epoch in enumerate(generate_epoch(valid_encoder_inputs, valid_decoder_inputs, valid_targets, valid_en_seq_lens, valid_sp_seq_len, num_epochs=1, batch_size=FLAGS.batch_size)): batch_loss = [] for batch_num, (batch_encoder_inputs, batch_decoder_inputs, batch_targets, batch_en_seq_lens, batch_sp_seq_lens) in enumerate(valid_epoch): loss = model.step(sess, FLAGS, batch_encoder_inputs, batch_decoder_inputs, batch_targets, batch_en_seq_lens, batch_sp_seq_lens, dropout=0.0, forward_only=True, sampling=False) batch_loss.append(loss) valid_losses.append(np.mean(batch_loss)) # Save checkpoint. if not os.path.isdir(FLAGS.ckpt_dir): os.makedirs(FLAGS.ckpt_dir) checkpoint_path = os.path.join(FLAGS.ckpt_dir, "model.ckpt") print "Saving the model." model.saver.save(sess, checkpoint_path, global_step=model.global_step) plt.plot(train_losses, label='train_loss') plt.plot(valid_losses, label='valid_loss') plt.legend() plt.show()
def train(FLAGS): # Load the data en_token_ids, en_seq_lens, en_vocab_dict, en_rev_vocab_dict = \ process_data('data/tst2013.en', max_vocab_size=30000, target_lang=False) sp_token_ids, sp_seq_lens, sp_vocab_dict, sp_rev_vocab_dict = \ process_data('data/tst2013.tr', max_vocab_size=30000, target_lang=True) # Split into train and validation sets train_encoder_inputs, train_decoder_inputs, train_targets, \ train_en_seq_lens, train_sp_seq_len, \ valid_encoder_inputs, valid_decoder_inputs, valid_targets, \ valid_en_seq_lens, valid_sp_seq_len = \ split_data(en_token_ids, sp_token_ids, en_seq_lens, sp_seq_lens, train_ratio=0.8) output = open('data/vocab_en.pkl', 'wb') pickle.dump(en_vocab_dict, output) output.close() output = open('data/vocab_sp.pkl', 'wb') pickle.dump(sp_vocab_dict, output) output.close() # Update parameters FLAGS.en_vocab_size = len(en_vocab_dict) FLAGS.sp_vocab_size = len(sp_vocab_dict) print 'len(en_vocab_dict)', len(en_vocab_dict) print 'len(sp_vocab_dict)', len(sp_vocab_dict) # Start session with tf.Session() as sess: model = None # Create new model or load old one f = checkpoint_path + ".index" print f exit() if os.path.isfile(f): model = restore_model(sess) else: model = create_model(sess, FLAGS) # Training begins losses = [] for epoch_num, epoch in enumerate(generate_epoch(train_encoder_inputs, train_decoder_inputs, train_targets, train_en_seq_lens, train_sp_seq_len, FLAGS.num_epochs, FLAGS.batch_size)): print "EPOCH: %i" % (epoch_num) # Decay learning rate sess.run(tf.assign(model.lr, FLAGS.learning_rate * \ (FLAGS.learning_rate_decay_factor ** epoch_num))) batch_loss = [] for batch_num, (batch_encoder_inputs, batch_decoder_inputs, batch_targets, batch_en_seq_lens, batch_sp_seq_lens) in enumerate(epoch): loss, _ = model.step(sess, FLAGS, batch_encoder_inputs, batch_decoder_inputs, batch_targets, batch_en_seq_lens, batch_sp_seq_lens, FLAGS.dropout) print loss batch_loss.append(loss) print 'mean: ', np.mean(batch_loss) print "Saving the model." model.saver.save(sess, checkpoint_path)
def train(FLAGS): # Load the data en_token_ids, en_seq_lens, en_vocab_dict, en_rev_vocab_dict = \ process_data('data/my_en.txt', max_vocab_size=5000, target_lang=False) sp_token_ids, sp_seq_lens, sp_vocab_dict, sp_rev_vocab_dict = \ process_data('data/my_sp.txt', max_vocab_size=5000, target_lang=True) # Split into train and validation sets train_encoder_inputs, train_decoder_inputs, train_targets, \ train_en_seq_lens, train_sp_seq_len, \ valid_encoder_inputs, valid_decoder_inputs, valid_targets, \ valid_en_seq_lens, valid_sp_seq_len = \ split_data(en_token_ids, sp_token_ids, en_seq_lens, sp_seq_lens, train_ratio=0.8) output = open('data/vocab_en.pkl', 'wb') pickle.dump(en_vocab_dict, output) output.close() output = open('data/vocab_sp.pkl', 'wb') pickle.dump(sp_vocab_dict, output) output.close() # Update parameters FLAGS.en_vocab_size = len(en_vocab_dict) FLAGS.sp_vocab_size = len(sp_vocab_dict) print 'len(en_vocab_dict)', len(en_vocab_dict) print 'len(sp_vocab_dict)', len(sp_vocab_dict) # Start session with tf.Session() as sess: # Create new model or load old one model = create_model(sess, FLAGS) # Training begins losses = [] for epoch_num, epoch in enumerate( generate_epoch(train_encoder_inputs, train_decoder_inputs, train_targets, train_en_seq_lens, train_sp_seq_len, FLAGS.num_epochs, FLAGS.batch_size)): print "EPOCH: %i" % (epoch_num) # Decay learning rate sess.run(tf.assign(model.lr, FLAGS.learning_rate * \ (FLAGS.learning_rate_decay_factor ** epoch_num))) batch_loss = [] for batch_num, (batch_encoder_inputs, batch_decoder_inputs, batch_targets, batch_en_seq_lens, batch_sp_seq_lens) in enumerate(epoch): loss, _ = model.step(sess, FLAGS, batch_encoder_inputs, batch_decoder_inputs, batch_targets, batch_en_seq_lens, batch_sp_seq_lens, FLAGS.dropout) batch_loss.append(loss) losses.append(np.mean(batch_loss)) checkpoint_path = "/tmp/model.ckpt" print "Saving the model." model.saver.save(sess, checkpoint_path) plt.plot(losses, label='loss') plt.legend() plt.savefig('seq_01.png')
def train(): X, y = load_data_and_labels() vocab_list, vocab_dict, rev_vocab_dict = create_vocabulary( X, FLAGS.en_vocab_size) X, seq_lens = data_to_token_ids(X, vocab_dict) train_X, train_y, train_seq_lens, valid_X, valid_y, valid_seq_lens = \ split_data(X, y, seq_lens) FLAGS.max_sequence_length = len(train_X[0]) with tf.Session() as sess: # Load old model or create new one model = create_model(sess, FLAGS) # Train results for epoch_num, epoch in enumerate( generate_epoch(train_X, train_y, train_seq_lens, FLAGS.num_epochs, FLAGS.batch_size)): print "EPOCH:", epoch_num sess.run(tf.assign(model.lr, FLAGS.learning_rate * \ (FLAGS.learning_rate_decay_factor ** epoch_num))) train_loss = [] train_accuracy = [] for batch_num, (batch_X, batch_y, batch_seq_lens) in enumerate(epoch): _, loss, accuracy = model.step( sess, batch_X, batch_seq_lens, batch_y, dropout_keep_prob=FLAGS.dropout_keep_prob, forward_only=False, sampling=False) train_loss.append(loss) train_accuracy.append(accuracy) print print "EPOCH %i SUMMARY" % epoch_num print "Training loss %.3f" % np.mean(train_loss) print "Training accuracy %.3f" % np.mean(train_accuracy) print "----------------------" # Validation results for valid_epoch_num, valid_epoch in enumerate( generate_epoch(valid_X, valid_y, valid_seq_lens, num_epochs=1, batch_size=FLAGS.batch_size)): valid_loss = [] valid_accuracy = [] for valid_batch_num, \ (valid_batch_X, valid_batch_y, valid_batch_seq_lens) in \ enumerate(valid_epoch): loss, accuracy = model.step(sess, valid_batch_X, valid_batch_seq_lens, valid_batch_y, dropout_keep_prob=1.0, forward_only=True, sampling=False) valid_loss.append(loss) valid_accuracy.append(accuracy) print "Validation loss %.3f" % np.mean(valid_loss) print "Validation accuracy %.3f" % np.mean(valid_accuracy) print "----------------------" # Save checkpoint every epoch. if not os.path.isdir(FLAGS.ckpt_dir): os.makedirs(FLAGS.ckpt_dir) checkpoint_path = os.path.join(FLAGS.ckpt_dir, "model.ckpt") print "Saving the model." model.saver.save(sess, checkpoint_path, global_step=model.global_step)