def evaluate(hparams, ckpt): if hparams.model_architecture == "rnn-model": model_creator = model.RNN else: raise ValueError("Unknown model architecture. Only simple_rnn is supported so far.") if hparams.val_target_path: eval_model = model_helper.create_eval_model(model_creator, hparams, tf.contrib.learn.ModeKeys.EVAL) eval_sess = tf.Session(config=utils.get_config_proto(), graph=eval_model.graph) with eval_model.graph.as_default(): loaded_eval_model = model_helper.load_model(eval_model.model, eval_sess, "evaluation", ckpt) iterator_feed_dict={ eval_model.input_file_placeholder: hparams.eval_input_path, eval_model.output_file_placeholder: hparams.eval_target_path } eval_loss = eval(loaded_eval_model, eval_sess, eval_model.iterator, iterator_feed_dict) print("Eval loss: %.3f"%eval_loss) print("Starting predictions:") prediction_model = model_helper.create_infer_model(model_creator, hparams, tf.contrib.learn.ModeKeys.INFER) prediction_sess = tf.Session(config=utils.get_config_proto(), graph=prediction_model.graph) with prediction_model.graph.as_default(): loaded_prediction_model = model_helper.load_model(prediction_model.model, prediction_sess, "prediction", ckpt) iterator_feed_dict = { prediction_model.input_file_placeholder: hparams.val_input_path, } predictions=predict(loaded_prediction_model, prediction_sess, prediction_model.iterator, iterator_feed_dict) np.savetxt(os.path.join(hparams.eval_output_folder, "classes.txt"), predictions["classes"]) np.savetxt(os.path.join(hparams.eval_output_folder, "probabilities.txt"), predictions["probabilities"])
def start_sess_and_load_model(infer_model, ckpt_path): """Start session and load model.""" sess = tf.Session(graph=infer_model.graph, config=utils.get_config_proto()) with infer_model.graph.as_default(): loaded_infer_model = model_helper.load_model(infer_model.model, ckpt_path, sess, "infer") return sess, loaded_infer_model
def main(args): mnist = utils.read_data_sets(args.train_dir) config_proto = utils.get_config_proto() with tf.device('/gpu:0'): if not os.path.exists("../saves"): os.mkdir("../saves") sess = tf.Session(config=config_proto) model = gan.GAN(args, sess) total_batch = mnist.train.num_examples // args.batch_size for epoch in range(1, args.nb_epochs + 1): for i in range(1, total_batch + 1): global_step = sess.run(model.global_step) x_batch, _ = mnist.train.next_batch(args.batch_size) noise = np.random.normal(size=[args.batch_size, args.noise_dim]) D_loss = model.d_batch_fit(x_batch, noise) G_loss = model.g_batch_fit(noise) if i % args.log_period == 0: print "Epoch: ", '%02d' % epoch, "Batch: ", '%04d' % i, "D_loss: ", '%9.9f' % D_loss, "G_loss: ", '%9.9f' % G_loss if epoch % 50 == 0: print "- " * 50 if epoch % args.save_period == 0: if not os.path.exists("../saves/imgs"): os.mkdir("../saves/imgs") z = np.random.normal(size=[100, args.noise_dim]) gen_images = np.reshape(model.generate(z), (100, 28, 28, 1)) utils.save_images(gen_images, [10, 10], os.path.join(args.save_dir, "imgs/sample%s.jpg" % epoch))
def main(args): # save_dir = os.path.join(args.save_dir, args.model_type) img_dir = os.path.join(args.img_dir, args.model_type) log_dir = os.path.join(args.log_dir, args.model_type) train_dir = args.train_dir if not os.path.exists(save_dir): os.makedirs(save_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists(img_dir): os.makedirs(img_dir) mnist = utils.read_data_sets(args.train_dir) summary_writer = tf.summary.FileWriter(log_dir) config_proto = utils.get_config_proto() sess = tf.Session(config=config_proto) if args.model_type == "vae": model = VAE(args, sess, name="vae") elif args.model_type == "dcvae": model = DCVAE(args, sess, name="dcvae") total_batch = mnist.train.num_examples // args.batch_size for epoch in range(1, args.nb_epoch + 1): print "Epoch %d start with learning rate %f" % ( epoch, model.learning_rate.eval(sess)) print "- " * 50 epoch_start_time = time.time() step_start_time = epoch_start_time for i in range(1, total_batch + 1): x_batch, y_batch = mnist.train.next_batch(args.batch_size) _, loss, loss_rec, loss_kl, global_step, summaries = model.train( x_batch) summary_writer.add_summary(summaries, global_step) step_start_time = time.time() if global_step % args.log_period == 0: print "global step %d, loss %.9f, loss_rec %.9f, loss_kl %.9f, time %.2fs" \ % (global_step, loss, loss_rec, loss_kl, time.time() - step_start_time) step_start_time = time.time() if args.anneal and epoch >= args.anneal_start: sess.run(model.learning_rate_decay_op) if epoch % args.save_period == 0: z = np.random.normal(size=[100, args.latent_dim]) if args.model_type == "vae": gen_images = np.reshape(model.generate(z), (100, 28, 28, 1)) elif args.model_type == "dcvae": gen_images = np.reshape(model.generate(z, 100), (100, 28, 28, 1)) utils.save_images(gen_images, [10, 10], os.path.join(img_dir, "sample%s.jpg" % epoch)) model.saver.save(sess, os.path.join(save_dir, "model.ckpt")) print "Model stored...."
def main(args): # save_dir = os.path.join(args.save_dir, args.model_type) img_dir = os.path.join(args.img_dir, args.model_type) log_dir = os.path.join(args.log_dir, args.model_type) train_dir = args.train_dir if not os.path.exists(save_dir): os.makedirs(save_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists(img_dir): os.makedirs(img_dir) mnist = utils.read_data_sets(args.train_dir) summary_writer = tf.summary.FileWriter(log_dir) config_proto = utils.get_config_proto() sess = tf.Session(config=config_proto) model = VQVAE(args, sess, name="vqvae") total_batch = mnist.train.num_examples // args.batch_size for epoch in range(1, args.nb_epoch + 1): print "Epoch %d start with learning rate %f" % ( epoch, model.learning_rate.eval(sess)) print "- " * 50 epoch_start_time = time.time() step_start_time = epoch_start_time for i in range(1, total_batch + 1): global_step = sess.run(model.global_step) x_batch, y_batch = mnist.train.next_batch(args.batch_size) _, loss, rec_loss, vq, commit, global_step, summaries = model.train( x_batch) summary_writer.add_summary(summaries, global_step) if i % args.print_step == 0: print "epoch %d, step %d, loss %f, rec_loss %f, vq_loss %f, commit_loss %f, time %.2fs" \ % (epoch, global_step, loss, rec_loss, vq, commit, time.time()-step_start_time) step_start_time = time.time() if epoch % 50 == 0: print "- " * 5 if args.anneal and epoch >= args.anneal_start: sess.run(model.lr_decay_op) if epoch % args.save_epoch == 0: x_batch, y_batch = mnist.test.next_batch(100) x_recon = model.reconstruct(x_batch) utils.save_images(x_batch.reshape(-1, 28, 28, 1), [10, 10], os.path.join(img_dir, "rawImage%s.jpg" % epoch)) utils.save_images( x_recon, [10, 10], os.path.join(img_dir, "reconstruct%s.jpg" % epoch)) model.saver.save(sess, os.path.join(save_dir, "model.ckpt")) print "Model stored...."
def test(args): if not op.exists(args.test_save): os.makedirs(args.test_save) latest_ckpt = tf.train.latest_checkpoint(args.save_dir) config_proto = utils.get_config_proto() sess = tf.Session(config=config_proto) model = DarkEnhance(args, sess, name="darkenhance") model.saver.restore(sess, latest_ckpt) print('start testing ...') start_time = time.time() evaluation(args, model) end_time = time.time()
def main(args): # save_dir = args.save_dir log_dir = args.log_dir train_dir = args.data_dir if not os.path.exists(save_dir): os.makedirs(save_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = utils.load_data( args.data_type) features = utils.preprocess_features(features) support = [utils.preprocess_adj(adj)] args.num_supports = 1 args.input_size, args.features_size = features[2][1], features[2] args.output_size = y_train.shape[1] config_proto = utils.get_config_proto() sess = tf.Session(config=config_proto) model = GCN(args, sess, name="gcn") summary_writer = tf.summary.FileWriter(log_dir) for epoch in range(1, args.nb_epoch + 1): epoch_start_time = time.time() feed_dict = utils.construct_feed_dict(model, features, support, y_train, train_mask) _, train_loss, train_acc, summaries = model.train(feed_dict) if epoch % args.summary_epoch == 0: summary_writer.add_summary(summaries, epoch) if epoch % args.print_epoch == 0: feed_dict_val = utils.construct_feed_dict(model, features, support, y_val, val_mask) val_loss, val_acc = model.evaluate(feed_dict_val) print "epoch %d, train_loss %f, train_acc %f, val_loss %f, val_acc %f, time %.5fs" % \ (epoch, train_loss, train_acc, val_loss, val_acc, time.time()-epoch_start_time) if args.anneal and epoch >= args.anneal_start: sess.run(model.lr_decay_op) model.saver.save(sess, os.path.join(save_dir, "model.ckpt")) print "Model stored...."
def main(args): save_dir = os.path.join(args.save_dir) log_dir = os.path.join(args.log_dir) if not os.path.exists(save_dir): os.makedirs(save_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) summary_writer = tf.summary.FileWriter(log_dir) config_proto = utils.get_config_proto() sess = tf.Session(config=config_proto) models = get_multi_gpu_models(args, sess) # trainer = MultiGPU(args, models, sess) model = models[0] saver=tf.train.Saver(max_to_keep=1) train_loss = model.total_loss train_step = tf.train.AdamOptimizer(args.learning_rate).minimize(train_loss) sess.run(tf.global_variables_initializer()) if args.use_pretrained: models = restore_models(args, sess, models) model = models[0] for step in range(1, args.train_steps + 1): step_start_time = time.time() train_images, train_labels = utils.read_to_batch(sess, filename='3D-Block-2016-05.npy', batch_size=args.batch_size * args.num_gpu) images_batch,labels_batch = sess.run([train_images,train_labels]) # print(type(train_images),type(train_labels)) feed_dict = model.build_feed_dict(images_batch, labels_batch, True) _,loss_value = sess.run([train_step, train_loss],feed_dict=feed_dict) # _, loss,summaries = trainer.train(sess, images_batch, labels_batch) # _, loss, accuracy, summaries = trainer.train(sess, train_images, train_labels) summaries = sess.run(model.summary,feed_dict=feed_dict) summary_writer.add_summary(summaries, step) if step % args.log_step == 0: print ("step %d, loss %.5f, time %.2fs" % (step, loss_value, time.time() - step_start_time)) # if step % args.eval_step == 0: # val_accuracy, test_time = valid(args, 'list/testlist.txt', trainer, sess) # print ("test accuracy: %.5f, test time: %.5f" % (val_accuracy, test_time)) saver.save(sess,'./save/StepNetwork.ckpt', global_step=args.train_steps)
def main(args): utils.print_out("Sequence-to-Sequence dialogue") utils.print_out("- " * 50) queries, answers, index2word, word2index = prepare_quries_answers(args) batch_data = utils.get_batches(queries, answers, args.batch_size) config_proto = utils.get_config_proto() with tf.device("/gpu:0"): if not os.path.exists("../saves"): os.mkdir("../saves") save_dir = args.save_dir train_model = helper.build_train_model(args) eval_model = helper.build_eval_model(args) infer_model = helper.build_infer_model(args) train_sess = tf.Session(config=config_proto, graph=train_model.graph) eval_sess = tf.Session(config=config_proto, graph=eval_model.graph) infer_sess = tf.Session(config=config_proto, graph=infer_model.graph) with train_model.graph.as_default(): new_train_model, global_step = helper.create_or_load_model( train_model.model, args.save_dir, train_sess, name="train") for epoch in range(1, args.nb_epoch + 1): utils.print_out("Epoch: %d start" % epoch) utils.print_out("- " * 50) loss = 0.0 start_train_time = time.time() for idx, batch in enumerate(batch_data): queries, qry_lens, answers, ans_lens = batch loss_t = new_train_model.train(queries, qry_lens, answers, ans_lens, train_sess) if idx % 100 == 0: print "Epoch: ", '%01d' % epoch, "Batch: ", '%04d' % idx, "Loss: ", '%9.9f' % loss_t #saver.save(session, save_dir, global_step=new_train_model.global_step) infer_test(infer_model, infer_sess, args, queries, index2word)
def main(args): save_dir = os.path.join(args.save_dir) log_dir = os.path.join(args.log_dir) if not os.path.exists(save_dir): os.makedirs(save_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) summary_writer = tf.summary.FileWriter(log_dir) config_proto = utils.get_config_proto() sess = tf.Session(config=config_proto) models = get_multi_gpu_models(args, sess) trainer = MultiGPU(args, models) sess.run(tf.global_variables_initializer()) if args.use_pretrained: models = restore_models(args, sess, models) for step in range(1, args.nb_steps + 1): step_start_time = time.time() train_images, train_labels, _, _, _ = utils.read_clip_and_label( filename='list/trainlist.txt', batch_size=args.batch_size * args.num_gpu, num_frames_per_clip=args.frame_size, crop_size=args.img_h, shuffle=True) _, loss, accuracy, summaries = trainer.train(sess, train_images, train_labels) summary_writer.add_summary(summaries, step) if step % args.log_step == 0: print("step %d, loss %.5f, accuracy %.5f, time %.2fs" % (step, loss, accuracy, time.time() - step_start_time)) if step % args.eval_step == 0: val_accuracy, test_time = valid(args, 'list/testlist.txt', trainer, sess) print("test accuracy: %.5f, test time: %.5f" % (val_accuracy, test_time))
def main(args): # save_dir = os.path.join(args.save_dir, args.model_type) img_dir = os.path.join(args.img_dir, args.model_type) log_dir = os.path.join(args.log_dir, args.model_type) train_dir = args.train_dir if not os.path.exists(save_dir): os.makedirs(save_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists(img_dir): os.makedirs(img_dir) summary_writer = tf.summary.FileWriter(log_dir) config_proto = utils.get_config_proto() sess = tf.Session(config=config_proto) model = VQVAE(args, sess, name="vqvae") img_paths = glob.glob('data/img_align_celeba/*.jpg') train_paths, test_paths = train_test_split(img_paths, test_size=0.1, random_state=args.random_seed) celeba = utils.DiskImageData(sess, train_paths, args.batch_size, shape=[218, 178, 3]) total_batch = celeba.num_examples // args.batch_size for epoch in range(1, args.nb_epoch + 1): print "Epoch %d start with learning rate %f" % ( epoch, model.learning_rate.eval(sess)) print "- " * 50 epoch_start_time = time.time() step_start_time = epoch_start_time for i in range(1, total_batch + 1): global_step = sess.run(model.global_step) x_batch = celeba.next_batch() _, loss, rec_loss, vq, commit, global_step, summaries = model.train( x_batch) summary_writer.add_summary(summaries, global_step) if i % args.print_step == 0: print "epoch %d, step %d, loss %f, rec_loss %f, vq_loss %f, commit_loss %f, time %.2fs" \ % (epoch, global_step, loss, rec_loss, vq, commit, time.time()-step_start_time) step_start_time = time.time() if args.anneal and epoch >= args.anneal_start: sess.run(model.lr_decay_op) if epoch % args.save_epoch == 0: x_batch = celeba.next_batch() x_recon = model.reconstruct(x_batch) utils.save_images(x_batch, [10, 10], os.path.join(img_dir, "rawImage%s.jpg" % epoch)) utils.save_images( x_recon, [10, 10], os.path.join(img_dir, "reconstruct%s.jpg" % epoch)) model.saver.save(sess, os.path.join(save_dir, "model.ckpt")) print "Model stored...."
def train(hparams, scope=None, target_session=""): """Train a translation model.""" log_device_placement = hparams.log_device_placement out_dir = hparams.out_dir num_train_steps = hparams.num_train_steps steps_per_stats = hparams.steps_per_stats steps_per_external_eval = hparams.steps_per_external_eval steps_per_eval = 10 * steps_per_stats if not steps_per_external_eval: steps_per_external_eval = 5 * steps_per_eval # Create model model_creator = get_model_creator(hparams) train_model = model_helper.create_train_model(model_creator, hparams, scope) eval_model = model_helper.create_eval_model(model_creator, hparams, scope) infer_model = model_helper.create_infer_model(model_creator, hparams, scope) # Preload data for sample decoding. dev_src_file = "%s.%s" % (hparams.dev_prefix, hparams.src) dev_tgt_file = "%s.%s" % (hparams.dev_prefix, hparams.tgt) dev_lbl_file = "%s.%s" % (hparams.dev_prefix, hparams.lbl) sample_src_data = inference.load_data(dev_src_file) sample_tgt_data = inference.load_data(dev_tgt_file) sample_lbl_data = inference.load_data(dev_lbl_file) summary_name = "train_log" model_dir = hparams.out_dir # Log and output files log_file = os.path.join(out_dir, "log_%d" % time.time()) log_f = tf.gfile.GFile(log_file, mode="a") utils.print_out("\n# log_file=%s" % log_file, log_f) # TensorFlow model config_proto = utils.get_config_proto( log_device_placement=log_device_placement) train_sess = tf.Session(target=target_session, config=config_proto, graph=train_model.graph) eval_sess = tf.Session(target=target_session, config=config_proto, graph=eval_model.graph) infer_sess = tf.Session(target=target_session, config=config_proto, graph=infer_model.graph) with train_model.graph.as_default(): loaded_train_model, global_step = model_helper.create_or_load_model( train_model.model, model_dir, train_sess, "train") # Summary writer summary_writer = tf.summary.FileWriter(os.path.join(out_dir, summary_name), train_model.graph) # First evaluation run_full_eval(model_dir, infer_model, infer_sess, eval_model, eval_sess, hparams, summary_writer, sample_src_data, sample_tgt_data, sample_lbl_data) last_stats_step = global_step last_eval_step = global_step last_external_eval_step = global_step # This is the training loop. stats, info, start_train_time = before_train(loaded_train_model, train_model, train_sess, global_step, hparams, log_f) while global_step < num_train_steps: ### Run a step ### start_time = time.time() try: step_result = loaded_train_model.train(train_sess) hparams.epoch_step += 1 except tf.errors.OutOfRangeError: # Finished going through the training dataset. Go to next epoch. hparams.epoch_step = 0 utils.print_out("# Finished an epoch, step %d." % global_step) #run_sample_decode(infer_model, infer_sess, model_dir, hparams, # summary_writer, sample_src_data, sample_tgt_data) #run_external_eval(infer_model, infer_sess, model_dir, hparams, # summary_writer) train_sess.run(train_model.iterator.initializer, feed_dict={train_model.skip_count_placeholder: 0}) continue # Process step_result, accumulate stats, and write summary global_step, info["learning_rate"], step_summary = update_stats( hparams, stats, start_time, step_result) summary_writer.add_summary(step_summary, global_step) # Once in a while, we print statistics. if global_step - last_stats_step >= steps_per_stats: last_stats_step = global_step is_overflow = process_stats(hparams, stats, info, global_step, steps_per_stats, log_f) print_step_info(" ", global_step, info, get_best_results(hparams), log_f) if is_overflow: break # Reset statistics stats = init_stats() if global_step - last_eval_step >= steps_per_eval: last_eval_step = global_step utils.print_out("# Save eval, global step %d" % global_step) add_info_summaries(summary_writer, global_step, info) # Save checkpoint loaded_train_model.saver.save(train_sess, os.path.join(out_dir, "translate.ckpt"), global_step=global_step) # Evaluate on dev/test run_sample_decode(infer_model, infer_sess, model_dir, hparams, summary_writer, sample_src_data, sample_tgt_data, sample_lbl_data) run_external_eval(infer_model, infer_sess, model_dir, hparams, summary_writer) # Done training loaded_train_model.saver.save(train_sess, os.path.join(out_dir, "translate.ckpt"), global_step=global_step) (result_summary, _, final_eval_metrics) = (run_full_eval( model_dir, infer_model, infer_sess, eval_model, eval_sess, hparams, summary_writer, sample_src_data, sample_tgt_data, sample_lbl_data)) print_step_info("# Final, ", global_step, info, result_summary, log_f) utils.print_time("# Done training!", start_train_time) summary_writer.close() utils.print_out("# Start evaluating saved best models.") for metric in hparams.metrics: best_model_dir = getattr(hparams, "best_" + metric + "_dir") summary_writer = tf.summary.FileWriter( os.path.join(best_model_dir, summary_name), infer_model.graph) result_summary, best_global_step, _ = run_full_eval( best_model_dir, infer_model, infer_sess, eval_model, eval_sess, hparams, summary_writer, sample_src_data, sample_tgt_data, sample_lbl_data) print_step_info("# Best %s, " % metric, best_global_step, info, result_summary, log_f) summary_writer.close() return final_eval_metrics, global_step
def main(args): # main save_dir = args.save_dir train_data = utils.load_data(args.train_dir) eval_data = utils.load_data(args.eval_dir) infer_data = utils.load_data(args.infer_dir) index2word, word2index = utils.load_vocab_from_file(args.vocab_dir) train_data_vec = utils.vectorize(train_data, word2index) eval_data_vec = utils.vectorize(eval_data, word2index) infer_data_vec = utils.vectorize(infer_data, word2index) train_data_size = len(train_data_vec) train_model = model_helper.build_train_model(args, use_attention=False) eval_model = model_helper.build_eval_model(args, use_attention=False) infer_model = model_helper.build_infer_model(args, use_attention=False) config_proto = utils.get_config_proto() train_sess = tf.Session(config=config_proto, graph=train_model.graph) eval_sess = tf.Session(config=config_proto, graph=eval_model.graph) infer_sess = tf.Session(config=config_proto, graph=infer_model.graph) with train_model.graph.as_default(): loaded_train_model, global_step = model_helper.create_or_load_model( train_model.model, args.save_dir, train_sess, name="train") for epoch in range(1, args.max_epoch + 1): print "Epoch %d start with learning rate %f" % \ (epoch, loaded_train_model.learning_rate.eval(train_sess)) print "- " * 50 epoch_start_time = time.time() all_batch = utils.get_batches(train_data_vec, args.batch_size) step_start_time = epoch_start_time for idx, batch in enumerate(all_batch): train_loss, train_ppl, global_step, predict_count, batch_size = loaded_train_model.train( train_sess, batch.user, batch.product, batch.rating, batch.review_input, batch.review_output, batch.review_length) if global_step % args.print_step == 0: print "global step: %d, loss: %.9f, ppl: %.2f, time %.2fs" % \ (global_step, train_loss, train_ppl, time.time() - step_start_time) step_start_time = time.time() if global_step % args.eval_step == 0: loaded_train_model.saver.save(train_sess, os.path.join( args.save_dir, "gen_review.ckpt"), global_step=global_step) eval_start_time = time.time() eval_avg_loss, eval_ppl = run_internal_eval( args, eval_model, eval_sess, args.save_dir, eval_data_vec) print "eval loss: %f, eval ppl: %.2f after training of step %d, time %.2fs" % \ (eval_avg_loss, eval_ppl, global_step, time.time() - eval_start_time) run_sample_decode(args, infer_model, infer_sess, args.save_dir, infer_data_vec) step_start_time = time.time() if args.anneal and global_step > (train_data_size / batch_size) * args.anneal_start: train_sess.run(train_model.learning_rate_decay_op) print "one epoch finish, time %.2fs" % (time.time() - epoch_start_time)
def train(hparams): num_epochs = hparams.num_epochs num_ckpt_epochs = hparams.num_ckpt_epochs summary_name = "train_log" out_dir = hparams.out_dir model_dir = out_dir log_device_placement = hparams.log_device_placement input_emb_weights = np.loadtxt( hparams.input_emb_file, delimiter=' ') if hparams.input_emb_file else None if hparams.model_architecture == "rnn-model": model_creator = model.RNN else: raise ValueError( "Unknown model architecture. Only simple_rnn is supported so far.") #create 2 models in 2 graphs for train and evaluation, with 2 sessions sharing the same variables. train_model = model_helper.create_train_model( model_creator, hparams, hparams.train_input_path, hparams.train_target_path, mode=tf.contrib.learn.ModeKeys.TRAIN) eval_model = model_helper.create_eval_model(model_creator, hparams, tf.contrib.learn.ModeKeys.EVAL) # some configuration of gpus logging config_proto = utils.get_config_proto( log_device_placement=log_device_placement, allow_soft_placement=True) # create two separate sessions for trai/eval train_sess = tf.Session(config=config_proto, graph=train_model.graph) eval_sess = tf.Session(config=config_proto, graph=eval_model.graph) # create a new train model by initializing all variables of the train graph in the train_sess. # or, using the latest checkpoint in the model_dir, load all variables of the train graph in the train_sess. # Note that at this point, the eval graph variables are not initialized. with train_model.graph.as_default(): loaded_train_model = model_helper.create_or_load_model( train_model.model, train_sess, "train", model_dir, input_emb_weights) # create a log file with name summary_name in out_dir. The file is written asynchronously during the training process. # We also passed the train graph in order to be able to display it in Tensorboard summary_writer = tf.summary.FileWriter(os.path.join(out_dir, summary_name), train_model.graph) #run first evaluation before starting training dev_loss = run_evaluation(eval_model, eval_sess, model_dir, hparams.val_input_path, hparams.val_target_path, input_emb_weights, summary_writer) train_loss = run_evaluation(eval_model, eval_sess, model_dir, hparams.train_input_path, hparams.train_target_path, input_emb_weights, summary_writer) print("Dev loss before training: %.3f" % dev_loss) print("Train loss before training: %.3f" % train_loss) # Start training start_train_time = time.time() epoch_time = 0.0 batch_loss, epoch_loss = 0.0, 0.0 batch_count = 0.0 #initialize train iterator in train_sess train_sess.run(train_model.iterator.initializer) #keep lists of train/val losses for all epochs train_losses = [] dev_losses = [] #train the model for num_epochs. One epoch means a pass through the whole train dataset, i.e., through all the batches. for epoch in range(num_epochs): #go through all batches for the current epoch while True: start_batch_time = 0.0 try: # this call will run operations of train graph in train_sess step_result = loaded_train_model.train(train_sess) (_, batch_loss, batch_summary, global_step, learning_rate, batch_size, inputs, targets) = step_result epoch_time += (time.time() - start_batch_time) epoch_loss += batch_loss batch_count += 1 except tf.errors.OutOfRangeError: #when the iterator of the train batches reaches the end, break the loop #and reinitialize the iterator to start from the beginning of the train data. train_sess.run(train_model.iterator.initializer) break # average epoch loss and epoch time over batches epoch_loss /= batch_count epoch_time /= batch_count batch_count = 0.0 #print results if the current epoch is a print results epoch if (epoch + 1) % num_ckpt_epochs == 0: print("Saving checkpoint...") model_helper.add_summary(summary_writer, "train_loss", epoch_loss) # save checkpoint. We save the values of the variables of the train graph. # train_sess is the session in which the train graph was launched. # global_step parameter is optional and is appended to the name of the checkpoint. loaded_train_model.saver.save(train_sess, os.path.join(out_dir, "rnn.ckpt"), global_step=epoch) print("Results: ") dev_loss = run_evaluation(eval_model, eval_sess, model_dir, hparams.val_input_path, hparams.val_target_path, input_emb_weights, summary_writer) # tr_loss = run_evaluation(eval_model, eval_sess, model_dir, hparams.train_input_path, hparams.train_target_path, input_emb_weights, summary_writer) # print("check %.3f:"%tr_loss) print(" epoch %d lr %g " "train_loss %.3f, dev_loss %.3f" % (epoch, loaded_train_model.learning_rate.eval(session=train_sess), epoch_loss, dev_loss)) train_losses.append(epoch_loss) dev_losses.append(dev_loss) # save final model loaded_train_model.saver.save(train_sess, os.path.join(out_dir, "rnn.ckpt"), global_step=num_epochs) print("Done training in %.2fK" % (time.time() - start_train_time)) min_dev_loss = np.min(dev_losses) min_dev_idx = np.argmin(dev_losses) print("Min val loss: %.3f at epoch %d" % (min_dev_loss, min_dev_idx)) summary_writer.close()
def main(args): print "loadding reviews and labels from dataset" data = pd.read_csv('data/labeledTrainData.tsv.zip', compression='zip', delimiter='\t', header=0, quoting=3) reviews = data["review"] labels = list(data['sentiment']) sentences = [] for review in reviews: if len(review) > 0: sentences.append( utils.review_to_wordlist(review.decode('utf8').strip(), remove_stopwords=True)) print "loaded %d reviews from dataset" % len(sentences) word_dict = utils.build_vocab(sentences, max_words=10000) vec_reviews = utils.vectorize(sentences, word_dict, verbose=True) train_x = vec_reviews[0:20000] train_y = labels[0:20000] train_y = utils.one_hot(train_y, args.nb_classes) test_x = vec_reviews[20000:] test_y = labels[20000:] test_y = utils.one_hot(test_y, args.nb_classes) save_dir = args.save_dir log_dir = args.log_dir if not os.path.exists(save_dir): os.makedirs(save_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) with tf.Graph().as_default(): config_proto = utils.get_config_proto() sess = tf.Session(config=config_proto) if args.model_type == "cnn": model = TextCNN(args, "TextCNN") test_batch = utils.get_batches(test_x, test_y, args.max_size) elif args.model_type in ["rnn", "bi_rnn"]: model = TextRNN(args, "TextRNN") test_batch = utils.get_batches(test_x, test_y, args.max_size, type="rnn") sess.run(tf.global_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) for epoch in range(1, args.nb_epochs + 1): print "epoch %d start" % epoch print "- " * 50 loss = 0. total_reviews = 0 accuracy = 0. if args.model_type == "cnn": train_batch = utils.get_batches(train_x, train_y, args.batch_size) elif args.model_type in ["rnn", "bi_rnn"]: train_batch = utils.get_batches(train_x, train_y, args.batch_size, type="rnn") epoch_start_time = time.time() step_start_time = epoch_start_time for idx, batch in enumerate(train_batch): reviews, reviews_length, labels = batch _, loss_t, accuracy_t, global_step, batch_size, summaries = model.train( sess, reviews, reviews_length, labels, args.keep_prob) loss += loss_t * batch_size total_reviews += batch_size accuracy += accuracy_t * batch_size summary_writer.add_summary(summaries, global_step) if global_step % 50 == 0: print "epoch %d, step %d, loss %f, accuracy %.4f, time %.2fs" % \ (epoch, global_step, loss_t, accuracy_t, time.time() - step_start_time) step_start_time = time.time() epoch_time = time.time() - epoch_start_time print "%.2f seconds in this epoch" % (epoch_time) print "train loss %f, train accuracy %.4f" % ( loss / total_reviews, accuracy / total_reviews) total_reviews = 0 accuracy = 0. for batch in test_batch: reviews, reviews_length, labels = batch accuracy_t, batch_size = model.test(sess, reviews, reviews_length, labels, 1.0) total_reviews += batch_size accuracy += accuracy_t * batch_size print "accuracy %.4f in %d test reviews" % ( accuracy / total_reviews, total_reviews)
def main(args): print "loadding data and labels from dataset" train = pd.read_csv(args.train_dir) ch_train = pd.read_csv(args.chtrain_dir) x_train = train["comment_text"] x_chtrain = ch_train["comment_text"] target_cols = [ 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate' ] x = [] x_ch = [] for line in x_train: if len(line) > 0: x.append(utils.review_to_wordlist(line.strip())) print "loaded %d comments from dataset" % len(x) for line in x_chtrain: if len(line) > 0: x_ch.append(utils.review_to_wordlist_char(line.strip())) print "loaded %d comments from dataset" % len(x) y = train[target_cols].values index2word, word2index = utils.load_vocab(args.vocab_dir) index2char, char2index = utils.load_char(args.char_dir) x_vector = utils.vectorize(x, word2index, verbose=False) x_vector = np.array(x_vector) char_vector = utils.vectorize_char(x_ch, char2index, verbose=False) char_vector = np.array(char_vector) print char_vector[0] save_dir = os.path.join(args.save_dir, args.model_type) if not os.path.exists(save_dir): os.makedirs(save_dir) if args.model_type in ["cnn", "cnnfe", "chcnn", "chcnn2"]: max_step = args.max_step_cnn max_size = args.max_size_cnn nb_epochs = args.nb_epochs_cnn elif args.model_type in [ "rnn", "rnnfe", "rnnfe2", "chrnn", "chrnnfe", "rcnn" ]: max_step = args.max_step_rnn max_size = args.max_size_rnn nb_epochs = args.nb_epochs_rnn ex_features = add_features("../data/train.csv") nfolds = args.nfolds skf = KFold(n_splits=nfolds, shuffle=True, random_state=2018) test_prob = [] stack_logits = np.zeros((len(x_vector), len(target_cols))) for (f, (train_index, test_index)) in enumerate(skf.split(x_vector)): x_train, x_eval = x_vector[train_index], x_vector[test_index] char_train, char_eval = char_vector[train_index], char_vector[ test_index] y_train, y_eval = y[train_index], y[test_index] with tf.Graph().as_default(): config_proto = utils.get_config_proto() sess = tf.Session(config=config_proto) if args.model_type == "cnn": model = TextCNN(args, "TextCNN") elif args.model_type == "cnnfe": model = TextCNNFE(args, "TextCNNFE") elif args.model_type == "rnn": model = TextRNN(args, "TextRNN") elif args.model_type == "rnnfe": model = TextRNNFE(args, "TextRNNFE") elif args.model_type == "rcnn": model = TextRCNN(args, "TextRCNN") elif args.model_type == "attention": model = RNNWithAttention(args, "Attention") elif args.model_type == "chrnn": model = TextRNNChar(args, "TextRNNChar") elif args.model_type == "chcnn": model = TextCNNChar(args, "TextCNNChar") elif args.model_type == "chcnn2": model = TextCNNChar(args, "TextCNNChar2") elif args.model_type == "rnnfe2": model = TextRNNFE2(args, "TextCNNCharFE2") elif args.model_type == "chrnnfe": model = TextRNNCharFE(args, "TextCNNCharFE") else: raise ValueError("Unknown model_type %s" % args.model_type) sess.run(tf.global_variables_initializer()) if args.use_ft: pretrain_dir = args.ft_dir print "use FastText word vector" embedding = utils.load_fasttext(pretrain_dir, index2word) if not args.use_ft: pretrain_dir = args.glove_dir print "use Glove word vector" embedding = utils.load_glove(pretrain_dir, index2word) sess.run(model.embedding_init, {model.embedding_placeholder: embedding}) for line in model.tvars: print line print "training %s model for toxic comments classification" % ( args.model_type) print "%d fold start training" % f for epoch in range(1, nb_epochs + 1): print "epoch %d start with lr %f" % ( epoch, model.learning_rate.eval(session=sess)), "\n", "- " * 50 loss, total_comments = 0.0, 0 if args.model_type in ["cnn", "rnn", "rcnn"]: train_batch = utils.get_batches(x_train, y_train, args.batch_size, args.max_len) valid_batch = utils.get_batches(x_eval, y_eval, max_size, args.max_len, False) elif args.model_type in ["chrnn", "chcnn", "chcnn2"]: train_batch = utils.get_batches_with_char( x_train, char_train, y_train, args.batch_size, args.max_len) valid_batch = utils.get_batches_with_char( x_eval, char_eval, y_eval, max_size, args.max_len, False) elif args.model_type in ["rnnfe", "cnnfe", "rnnfe2"]: train_batch = utils.get_batches_with_fe( x_train, y_train, ex_features, args.batch_size, args.max_len) valid_batch = utils.get_batches_with_fe( x_eval, y_eval, ex_features, max_size, args.max_len, False) elif args.model_type in ["chrnnfe"]: train_batch = utils.get_batches_with_charfe( x_train, char_train, y_train, ex_features, args.batch_size, args.max_len) valid_batch = utils.get_batches_with_charfe( x_eval, char_eval, y_eval, ex_features, max_size, args.max_len, False) epoch_start_time = time.time() step_start_time = epoch_start_time for idx, batch in enumerate(train_batch): if args.model_type in ["cnn", "rnn", "rcnn"]: comments, comments_length, labels = batch _, loss_t, global_step, batch_size = model.train( sess, comments, comments_length, labels) elif args.model_type in ["chrnn", "chcnn", "chcnn2"]: comments, comments_length, chs, labels = batch _, loss_t, global_step, batch_size = model.train( sess, comments, comments_length, chs, labels) elif args.model_type in ["rnnfe", "cnnfe", "rnnfe2"]: comments, comments_length, exs, labels = batch _, loss_t, global_step, batch_size = model.train( sess, comments, comments_length, labels, exs) elif args.model_type in ["chrnnfe"]: comments, comments_length, chs, exs, labels = batch _, loss_t, global_step, batch_size = model.train( sess, comments, comments_length, chs, labels, exs) loss += loss_t * batch_size total_comments += batch_size if global_step % 200 == 0: print "epoch %d step %d loss %f time %.2fs" % ( epoch, global_step, loss_t, time.time() - step_start_time) if global_step % 200 == 0: _ = run_valid(valid_batch, model, sess, args.model_type) # model.saver.save(sess, os.path.join(save_dir, "model.ckpt"), global_step=global_step) step_start_time = time.time() epoch_time = time.time() - epoch_start_time sess.run(model.learning_rate_decay_op) print "%.2f seconds in this epoch with train loss %f" % ( epoch_time, loss / total_comments) test_prob.append(run_test(args, model, sess)) stack_logits[test_index] = run_valid(valid_batch, model, sess, args.model_type) preds = np.zeros((test_prob[0].shape[0], len(target_cols))) for prob in test_prob: preds += prob print prob[0] preds /= len(test_prob) print len(test_prob) write_predict(stack_logits, args.model_type) write_results(preds, args.model_type)