def main(): """Entry point. """ # Load data train_data, dev_data, test_data = data_utils.load_data_numpy( config_data.input_dir, config_data.filename_prefix) with open(config_data.vocab_file, 'rb') as f: id2w = pickle.load(f) beam_width = getattr(config_model, "beam_width", 1) # Create logging tx.utils.maybe_create_dir(args.model_dir) logging_file = os.path.join(args.model_dir, 'logging.txt') logger = utils.get_logger(logging_file) print(f"logging file is saved in: {logging_file}") model = Transformer(config_model, config_data) if torch.cuda.is_available(): model = model.cuda() device = torch.cuda.current_device() else: device = None best_results = {'score': 0, 'epoch': -1} lr_config = config_model.lr_config if lr_config["learning_rate_schedule"] == "static": init_lr = lr_config["static_lr"] scheduler_lambda = lambda x: 1.0 else: init_lr = lr_config["lr_constant"] scheduler_lambda = functools.partial( utils.get_lr_multiplier, warmup_steps=lr_config["warmup_steps"]) optim = torch.optim.Adam( model.parameters(), lr=init_lr, betas=(0.9, 0.997), eps=1e-9) scheduler = torch.optim.lr_scheduler.LambdaLR(optim, scheduler_lambda) def _eval_epoch(epoch, mode): torch.cuda.empty_cache() if mode == 'eval': eval_data = dev_data elif mode == 'test': eval_data = test_data else: raise ValueError("`mode` should be either \"eval\" or \"test\".") references, hypotheses = [], [] bsize = config_data.test_batch_size for i in tqdm(range(0, len(eval_data), bsize)): sources, targets = zip(*eval_data[i:i + bsize]) with torch.no_grad(): x_block = data_utils.source_pad_concat_convert( sources, device=device) predictions = model( encoder_input=x_block, is_train_mode=False, beam_width=beam_width) if beam_width == 1: decoded_ids = predictions[0].sample_id else: decoded_ids = predictions["sample_id"][:, :, 0] hypotheses.extend(h.tolist() for h in decoded_ids) references.extend(r.tolist() for r in targets) hypotheses = utils.list_strip_eos(hypotheses, eos_token_id) references = utils.list_strip_eos(references, eos_token_id) if mode == 'eval': # Writes results to files to evaluate BLEU # For 'eval' mode, the BLEU is based on token ids (rather than # text tokens) and serves only as a surrogate metric to monitor # the training process # TODO: Use texar.evals.bleu fname = os.path.join(args.model_dir, 'tmp.eval') hwords, rwords = [], [] for hyp, ref in zip(hypotheses, references): hwords.append([str(y) for y in hyp]) rwords.append([str(y) for y in ref]) hwords = tx.utils.str_join(hwords) rwords = tx.utils.str_join(rwords) hyp_fn, ref_fn = tx.utils.write_paired_text( hwords, rwords, fname, mode='s', src_fname_suffix='hyp', tgt_fname_suffix='ref') eval_bleu = bleu_wrapper(ref_fn, hyp_fn, case_sensitive=True) eval_bleu = 100. * eval_bleu logger.info("epoch: %d, eval_bleu %.4f", epoch, eval_bleu) print(f"epoch: {epoch:d}, eval_bleu {eval_bleu:.4f}") if eval_bleu > best_results['score']: logger.info("epoch: %d, best bleu: %.4f", epoch, eval_bleu) best_results['score'] = eval_bleu best_results['epoch'] = epoch model_path = os.path.join(args.model_dir, args.model_fn) logger.info("Saving model to %s", model_path) print(f"Saving model to {model_path}") states = { 'model': model.state_dict(), 'optimizer': optim.state_dict(), 'scheduler': scheduler.state_dict(), } torch.save(states, model_path) elif mode == 'test': # For 'test' mode, together with the cmds in README.md, BLEU # is evaluated based on text tokens, which is the standard metric. fname = os.path.join(args.model_dir, 'test.output') hwords, rwords = [], [] for hyp, ref in zip(hypotheses, references): hwords.append([id2w[y] for y in hyp]) rwords.append([id2w[y] for y in ref]) hwords = tx.utils.str_join(hwords) rwords = tx.utils.str_join(rwords) hyp_fn, ref_fn = tx.utils.write_paired_text( hwords, rwords, fname, mode='s', src_fname_suffix='hyp', tgt_fname_suffix='ref') logger.info("Test output written to file: %s", hyp_fn) print(f"Test output written to file: {hyp_fn}") def _train_epoch(epoch: int): torch.cuda.empty_cache() random.shuffle(train_data) train_iter = data.iterator.pool( train_data, config_data.batch_size, key=lambda x: (len(x[0]), len(x[1])), # key is not used if sort_within_batch is False by default batch_size_fn=utils.batch_size_fn, random_shuffler=data.iterator.RandomShuffler()) for _, train_batch in tqdm(enumerate(train_iter)): optim.zero_grad() in_arrays = data_utils.seq2seq_pad_concat_convert( train_batch, device=device) loss = model( encoder_input=in_arrays[0], is_train_mode=True, decoder_input=in_arrays[1], labels=in_arrays[2], ) loss.backward() optim.step() scheduler.step() step = scheduler.last_epoch if step % config_data.display_steps == 0: logger.info('step: %d, loss: %.4f', step, loss) lr = optim.param_groups[0]['lr'] print(f"lr: {lr} step: {step}, loss: {loss:.4}") if step and step % config_data.eval_steps == 0: _eval_epoch(epoch, mode='eval') if args.run_mode == 'train_and_evaluate': logger.info("Begin running with train_and_evaluate mode") model_path = os.path.join(args.model_dir, args.model_fn) if os.path.exists(model_path): logger.info("Restore latest checkpoint in", model_path) ckpt = torch.load(model_path) model.load_state_dict(ckpt['model']) optim.load_state_dict(ckpt['optimizer']) scheduler.load_state_dict(ckpt['scheduler']) _eval_epoch(0, mode='test') for epoch in range(config_data.max_train_epoch): _train_epoch(epoch) _eval_epoch(epoch, mode='eval') elif args.run_mode == 'eval': logger.info("Begin running with evaluate mode") model_path = os.path.join(args.model_dir, args.model_fn) logger.info("Restore latest checkpoint in %s", model_path) ckpt = torch.load(model_path) model.load_state_dict(ckpt['model']) _eval_epoch(0, mode='eval') elif args.run_mode == 'test': logger.info("Begin running with test mode") model_path = os.path.join(args.model_dir, args.model_fn) logger.info("Restore latest checkpoint in", model_path) ckpt = torch.load(model_path) model.load_state_dict(ckpt['model']) _eval_epoch(0, mode='test') else: raise ValueError(f"Unknown mode: {args.run_mode}")
def main(): """Entrypoint. """ # Load data print('Loading data ...') train_data, dev_data, test_data = data_utils.load_data_numpy( config_data.input_dir, config_data.filename_prefix) print('Load data done') with open(config_data.vocab_file, 'rb') as f: id2w = pickle.load(f) vocab_size = len(id2w) print('vocab_size {}'.format(vocab_size)) bos_token_id, eos_token_id = 1, 2 beam_width = config_model.beam_width # Create logging tx.utils.maybe_create_dir(FLAGS.model_dir) logging_file = os.path.join(FLAGS.model_dir, 'logging.txt') logger = utils.get_logger(logging_file) print('logging file is saved in: %s', logging_file) # Build model graph encoder_input = tf.placeholder(tf.int64, shape=(None, None)) decoder_input = tf.placeholder(tf.int64, shape=(None, None)) # (text sequence length excluding padding) encoder_input_length = tf.reduce_sum( 1 - tf.to_int32(tf.equal(encoder_input, 0)), axis=1) decoder_input_length = tf.reduce_sum( 1 - tf.to_int32(tf.equal(decoder_input, 0)), axis=1) labels = tf.placeholder(tf.int64, shape=(None, None)) is_target = tf.to_float(tf.not_equal(labels, 0)) global_step = tf.Variable(0, dtype=tf.int64, trainable=False) learning_rate = tf.placeholder(tf.float64, shape=(), name='lr') embedder = tx.modules.WordEmbedder(vocab_size=vocab_size, hparams=config_model.emb) encoder = TransformerEncoder(hparams=config_model.encoder) encoder_output = encoder(inputs=embedder(encoder_input), sequence_length=encoder_input_length) # The decoder ties the input word embedding with the output logit layer. # As the decoder masks out <PAD>'s embedding, which in effect means # <PAD> has all-zero embedding, so here we explicitly set <PAD>'s embedding # to all-zero. tgt_embedding = tf.concat( [tf.zeros(shape=[1, embedder.dim]), embedder.embedding[1:, :]], axis=0) decoder = TransformerDecoder(embedding=tgt_embedding, hparams=config_model.decoder) # For training outputs = decoder(memory=encoder_output, memory_sequence_length=encoder_input_length, inputs=embedder(decoder_input), sequence_length=decoder_input_length, decoding_strategy='train_greedy', mode=tf.estimator.ModeKeys.TRAIN) mle_loss = transformer_utils.smoothing_cross_entropy( outputs.logits, labels, vocab_size, config_model.loss_label_confidence) mle_loss = tf.reduce_sum(mle_loss * is_target) / tf.reduce_sum(is_target) train_op = tx.core.get_train_op(mle_loss, learning_rate=learning_rate, global_step=global_step, hparams=config_model.opt) tf.summary.scalar('lr', learning_rate) tf.summary.scalar('mle_loss', mle_loss) summary_merged = tf.summary.merge_all() # For inference start_tokens = tf.fill([tx.utils.get_batch_size(encoder_input)], bos_token_id) predictions = decoder(memory=encoder_output, memory_sequence_length=encoder_input_length, decoding_strategy='infer_greedy', beam_width=beam_width, alpha=config_model.alpha, start_tokens=start_tokens, end_token=eos_token_id, max_decoding_length=config_data.max_decoding_length, mode=tf.estimator.ModeKeys.PREDICT) if beam_width <= 1: inferred_ids = predictions[0].sample_id else: # Uses the best sample by beam search inferred_ids = predictions['sample_id'][:, :, 0] saver = tf.train.Saver(max_to_keep=5) best_results = {'score': 0, 'epoch': -1} def _eval_epoch(sess, epoch, mode): if mode == 'eval': eval_data = dev_data elif mode == 'test': eval_data = test_data else: raise ValueError('`mode` should be either "eval" or "test".') references, hypotheses = [], [] bsize = config_data.test_batch_size for i in range(0, len(eval_data), bsize): #print("eval {}/{}".format(i, len(eval_data))) sources, targets = zip(*eval_data[i:i + bsize]) x_block = data_utils.source_pad_concat_convert(sources) feed_dict = { encoder_input: x_block, tx.global_mode(): tf.estimator.ModeKeys.EVAL, } fetches = { 'inferred_ids': inferred_ids, } fetches_ = sess.run(fetches, feed_dict=feed_dict) hypotheses.extend(h.tolist() for h in fetches_['inferred_ids']) references.extend(r.tolist() for r in targets) hypotheses = utils.list_strip_eos(hypotheses, eos_token_id) references = utils.list_strip_eos(references, eos_token_id) if mode == 'eval': # Writes results to files to evaluate BLEU # For 'eval' mode, the BLEU is based on token ids (rather than # text tokens) and serves only as a surrogate metric to monitor # the training process fname = os.path.join(FLAGS.model_dir, 'tmp.eval') hypotheses = tx.utils.str_join(hypotheses) references = tx.utils.str_join(references) hyp_fn, ref_fn = tx.utils.write_paired_text(hypotheses, references, fname, mode='s') eval_bleu = bleu_wrapper(ref_fn, hyp_fn, case_sensitive=True) eval_bleu = 100. * eval_bleu logger.info('epoch: %d, eval_bleu %.4f', epoch, eval_bleu) print('epoch: %d, eval_bleu %.4f' % (epoch, eval_bleu)) if eval_bleu > best_results['score']: logger.info('epoch: %d, best bleu: %.4f', epoch, eval_bleu) best_results['score'] = eval_bleu best_results['epoch'] = epoch model_path = os.path.join(FLAGS.model_dir, 'best-model.ckpt') logger.info('saving model to %s', model_path) print('saving model to %s' % model_path) saver.save(sess, model_path) elif mode == 'test': # For 'test' mode, together with the cmds in README.md, BLEU # is evaluated based on text tokens, which is the standard metric. fname = os.path.join(FLAGS.model_dir, 'test.output') hwords, rwords = [], [] for hyp, ref in zip(hypotheses, references): hwords.append([id2w[y] for y in hyp]) rwords.append([id2w[y] for y in ref]) hwords = tx.utils.str_join(hwords) rwords = tx.utils.str_join(rwords) hyp_fn, ref_fn = tx.utils.write_paired_text(hwords, rwords, fname, mode='s') logger.info('Test output writtn to file: %s', hyp_fn) print('Test output writtn to file: %s' % hyp_fn) def _train_epoch(sess, epoch, step, smry_writer): random.shuffle(train_data) train_iter = data.iterator.pool( train_data, config_data.batch_size, key=lambda x: (len(x[0]), len(x[1])), batch_size_fn=utils.batch_size_fn, random_shuffler=data.iterator.RandomShuffler()) for _, train_batch in enumerate(train_iter): if len(train_batch) == 0: continue in_arrays = data_utils.seq2seq_pad_concat_convert(train_batch) feed_dict = { encoder_input: in_arrays[0], decoder_input: in_arrays[1], labels: in_arrays[2], learning_rate: utils.get_lr(step, config_model.lr) } fetches = { 'step': global_step, 'train_op': train_op, 'smry': summary_merged, 'loss': mle_loss, } fetches_ = sess.run(fetches, feed_dict=feed_dict) step, loss = fetches_['step'], fetches_['loss'] if step and step % config_data.display_steps == 0: logger.info('step: %d, loss: %.4f', step, loss) print('step: %d, loss: %.4f' % (step, loss)) smry_writer.add_summary(fetches_['smry'], global_step=step) if step and step % config_data.eval_steps == 0: _eval_epoch(sess, epoch, mode='eval') return step # Run the graph with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(tf.tables_initializer()) smry_writer = tf.summary.FileWriter(FLAGS.model_dir, graph=sess.graph) if FLAGS.run_mode == 'train_and_evaluate': step = 0 for epoch in range(config_data.max_train_epoch): step = _train_epoch(sess, epoch, step, smry_writer) elif FLAGS.run_mode == 'test': saver.restore(sess, tf.train.latest_checkpoint(FLAGS.model_dir)) _eval_epoch(sess, 0, mode='test')
def main(): """Entrypoint. """ # Load data train_data, dev_data, test_data = data_utils.load_data_numpy( config_data.input_dir, config_data.filename_prefix) with open(config_data.vocab_file, 'rb') as f: id2w = pickle.load(f) vocab_size = len(id2w) beam_width = config_model.beam_width # Create logging tx.utils.maybe_create_dir(FLAGS.model_dir) logging_file = os.path.join(FLAGS.model_dir, 'logging.txt') logger = utils.get_logger(logging_file) print('logging file is saved in: %s', logging_file) # Build model graph encoder_input = tf.placeholder(tf.int64, shape=(None, None)) decoder_input = tf.placeholder(tf.int64, shape=(None, None)) batch_size = tf.shape(encoder_input)[0] # (text sequence length excluding padding) encoder_input_length = tf.reduce_sum( 1 - tf.cast(tf.equal(encoder_input, 0), tf.int32), axis=1) labels = tf.placeholder(tf.int64, shape=(None, None)) is_target = tf.cast(tf.not_equal(labels, 0), tf.float32) global_step = tf.Variable(0, dtype=tf.int64, trainable=False) learning_rate = tf.placeholder(tf.float64, shape=(), name='lr') # Source word embedding src_word_embedder = tx.modules.WordEmbedder(vocab_size=vocab_size, hparams=config_model.emb) src_word_embeds = src_word_embedder(encoder_input) src_word_embeds = src_word_embeds * config_model.hidden_dim**0.5 # Position embedding (shared b/w source and target) pos_embedder = tx.modules.SinusoidsPositionEmbedder( position_size=config_data.max_decoding_length, hparams=config_model.position_embedder_hparams) src_seq_len = tf.ones([batch_size], tf.int32) * tf.shape(encoder_input)[1] src_pos_embeds = pos_embedder(sequence_length=src_seq_len) src_input_embedding = src_word_embeds + src_pos_embeds encoder = TransformerEncoder(hparams=config_model.encoder) encoder_output = encoder(inputs=src_input_embedding, sequence_length=encoder_input_length) # The decoder ties the input word embedding with the output logit layer. # As the decoder masks out <PAD>'s embedding, which in effect means # <PAD> has all-zero embedding, so here we explicitly set <PAD>'s embedding # to all-zero. tgt_embedding = tf.concat([ tf.zeros(shape=[1, src_word_embedder.dim]), src_word_embedder.embedding[1:, :] ], axis=0) tgt_embedder = tx.modules.WordEmbedder(tgt_embedding) tgt_word_embeds = tgt_embedder(decoder_input) tgt_word_embeds = tgt_word_embeds * config_model.hidden_dim**0.5 tgt_seq_len = tf.ones([batch_size], tf.int32) * tf.shape(decoder_input)[1] tgt_pos_embeds = pos_embedder(sequence_length=tgt_seq_len) tgt_input_embedding = tgt_word_embeds + tgt_pos_embeds _output_w = tf.transpose(tgt_embedder.embedding, (1, 0)) decoder = TransformerDecoder(vocab_size=vocab_size, output_layer=_output_w, hparams=config_model.decoder) # For training outputs = decoder(memory=encoder_output, memory_sequence_length=encoder_input_length, inputs=tgt_input_embedding, decoding_strategy='train_greedy', mode=tf.estimator.ModeKeys.TRAIN) # Graph matching in Transformer _tgt_embedding = tgt_embedder(soft_ids=outputs.logits) src_words = tf.nn.l2_normalize(src_word_embeds, 2, epsilon=1e-12) tgt_words = tf.nn.l2_normalize(_tgt_embedding, 2, epsilon=1e-12) cosine_cost = 1 - tf.einsum('aij,ajk->aik', src_words, tf.transpose(tgt_words, [0, 2, 1])) # NOTE: prune _beta = 0.2 minval = tf.reduce_min(cosine_cost) maxval = tf.reduce_max(cosine_cost) threshold = minval + _beta * (maxval - minval) cosine_cost = tf.nn.relu(cosine_cost - threshold) # TODO: Gromov wasserstein distance Cs = 1 - tf.einsum('aij,ajk->aik', src_words, tf.transpose(src_words, [0, 2, 1])) Ct = 1 - tf.einsum('aij,ajk->aik', tgt_words, tf.transpose(tgt_words, [0, 2, 1])) Css = OT.prune(Cs) Ctt = OT.prune(Ct) # OT_loss = tf.reduce_mean(OT.IPOT_distance2(cosine_cost)) # GW_loss = tf.reduce_mean(OT.GW_distance(Css, Ctt)) GW_loss, W_loss = OT.FGW_distance(Css, Ctt, cosine_cost) FGW_loss = tf.reduce_mean(0.1 * GW_loss + 1 * W_loss) mle_loss = transformer_utils.smoothing_cross_entropy( outputs.logits, labels, vocab_size, config_model.loss_label_confidence) mle_loss = tf.reduce_sum(mle_loss * is_target) / tf.reduce_sum(is_target) total_loss = mle_loss + FGW_loss * 0.1 train_op = tx.core.get_train_op(total_loss, learning_rate=learning_rate, global_step=global_step, hparams=config_model.opt) tf.summary.scalar('lr', learning_rate) tf.summary.scalar('mle_loss', mle_loss) summary_merged = tf.summary.merge_all() # For inference (beam-search) start_tokens = tf.fill([batch_size], bos_token_id) def _embedding_fn(x, y): x_w_embed = tgt_embedder(x) y_p_embed = pos_embedder(y) return x_w_embed * config_model.hidden_dim**0.5 + y_p_embed predictions = decoder(memory=encoder_output, memory_sequence_length=encoder_input_length, beam_width=beam_width, length_penalty=config_model.length_penalty, start_tokens=start_tokens, end_token=eos_token_id, embedding=_embedding_fn, max_decoding_length=config_data.max_decoding_length, mode=tf.estimator.ModeKeys.PREDICT) # Uses the best sample by beam search beam_search_ids = predictions['sample_id'][:, :, 0] saver = tf.train.Saver(max_to_keep=5) best_results = {'score': 0, 'epoch': -1} def _eval_epoch(sess, epoch, mode): if mode == 'eval': eval_data = dev_data elif mode == 'test': eval_data = test_data else: raise ValueError('`mode` should be either "eval" or "test".') references, hypotheses = [], [] bsize = config_data.test_batch_size for i in range(0, len(eval_data), bsize): sources, targets = zip(*eval_data[i:i + bsize]) x_block = data_utils.source_pad_concat_convert(sources) feed_dict = { encoder_input: x_block, tx.global_mode(): tf.estimator.ModeKeys.EVAL, } fetches = { 'beam_search_ids': beam_search_ids, } fetches_ = sess.run(fetches, feed_dict=feed_dict) hypotheses.extend(h.tolist() for h in fetches_['beam_search_ids']) references.extend(r.tolist() for r in targets) hypotheses = utils.list_strip_eos(hypotheses, eos_token_id) references = utils.list_strip_eos(references, eos_token_id) if mode == 'eval': # Writes results to files to evaluate BLEU # For 'eval' mode, the BLEU is based on token ids (rather than # text tokens) and serves only as a surrogate metric to monitor # the training process fname = os.path.join(FLAGS.model_dir, 'tmp.eval') hypotheses = tx.utils.str_join(hypotheses) references = tx.utils.str_join(references) hyp_fn, ref_fn = tx.utils.write_paired_text(hypotheses, references, fname, mode='s') eval_bleu = bleu_wrapper(ref_fn, hyp_fn, case_sensitive=True) eval_bleu = 100. * eval_bleu logger.info('epoch: %d, eval_bleu %.4f', epoch, eval_bleu) print('epoch: %d, eval_bleu %.4f' % (epoch, eval_bleu)) if eval_bleu > best_results['score']: logger.info('epoch: %d, best bleu: %.4f', epoch, eval_bleu) best_results['score'] = eval_bleu best_results['epoch'] = epoch model_path = os.path.join(FLAGS.model_dir, 'best-model.ckpt') logger.info('saving model to %s', model_path) print('saving model to %s' % model_path) saver.save(sess, model_path) elif mode == 'test': # For 'test' mode, together with the cmds in README.md, BLEU # is evaluated based on text tokens, which is the standard metric. fname = os.path.join(FLAGS.model_dir, 'test.output') hwords, rwords = [], [] for hyp, ref in zip(hypotheses, references): hwords.append([id2w[y] for y in hyp]) rwords.append([id2w[y] for y in ref]) hwords = tx.utils.str_join(hwords) rwords = tx.utils.str_join(rwords) hyp_fn, ref_fn = tx.utils.write_paired_text(hwords, rwords, fname, mode='s', src_fname_suffix='hyp', tgt_fname_suffix='ref') logger.info('Test output writtn to file: %s', hyp_fn) print('Test output writtn to file: %s' % hyp_fn) def _train_epoch(sess, epoch, step, smry_writer): random.shuffle(train_data) train_iter = data.iterator.pool( train_data, config_data.batch_size, key=lambda x: (len(x[0]), len(x[1])), batch_size_fn=utils.batch_size_fn, random_shuffler=data.iterator.RandomShuffler()) for _, train_batch in enumerate(train_iter): in_arrays = data_utils.seq2seq_pad_concat_convert(train_batch) feed_dict = { encoder_input: in_arrays[0], decoder_input: in_arrays[1], labels: in_arrays[2], learning_rate: utils.get_lr(step, config_model.lr) } fetches = { 'step': global_step, 'train_op': train_op, 'smry': summary_merged, 'loss': mle_loss, } fetches_ = sess.run(fetches, feed_dict=feed_dict) step, loss = fetches_['step'], fetches_['loss'] if step and step % config_data.display_steps == 0: logger.info('step: %d, loss: %.4f', step, loss) print('step: %d, loss: %.4f' % (step, loss)) smry_writer.add_summary(fetches_['smry'], global_step=step) if step and step % config_data.eval_steps == 0: _eval_epoch(sess, epoch, mode='eval') return step # Run the graph with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) sess.run(tf.tables_initializer()) smry_writer = tf.summary.FileWriter(FLAGS.model_dir, graph=sess.graph) if FLAGS.run_mode == 'train_and_evaluate': logger.info('Begin running with train_and_evaluate mode') if tf.train.latest_checkpoint(FLAGS.model_dir) is not None: logger.info('Restore latest checkpoint in %s' % FLAGS.model_dir) saver.restore(sess, tf.train.latest_checkpoint(FLAGS.model_dir)) step = 0 for epoch in range(config_data.max_train_epoch): step = _train_epoch(sess, epoch, step, smry_writer) elif FLAGS.run_mode == 'test': logger.info('Begin running with test mode') logger.info('Restore latest checkpoint in %s' % FLAGS.model_dir) saver.restore(sess, tf.train.latest_checkpoint(FLAGS.model_dir)) _eval_epoch(sess, 0, mode='test') else: raise ValueError('Unknown mode: {}'.format(FLAGS.run_mode))