def create_or_load_hparams(out_dir, default_hparams, flags): """Create hparams or load hparams from out_dir.""" hparams = utils.load_hparams(out_dir, verbose=not flags.chat) if not hparams: # Parse the ones from the command line hparams = default_hparams hparams = utils.maybe_parse_standard_hparams(hparams, flags.hparams_path, verbose=not flags.chat) hparams = extend_hparams(hparams) else: hparams = ensure_compatible_hparams(hparams, default_hparams, flags) # Save HParams utils.save_hparams(out_dir, hparams, verbose=not flags.chat) for metric in hparams.metrics: utils.save_hparams(getattr(hparams, "best_" + metric + "_dir"), hparams, verbose=not flags.chat) # Print HParams if not flags.chat: utils.print_hparams(hparams) return hparams
def create_or_load_hparams(load_dir, default_hparams, hparams_path, save_hparams): """Create hparams or load hparams from out_dir.""" hparams = utils.load_hparams(load_dir) if not hparams: hparams = default_hparams # Override hparams values with existing standard hparams config hparams = utils.maybe_parse_standard_hparams(hparams, hparams_path) hparams = process_input_path(hparams) hparams = extend_hparams(hparams) else: hparams = ensure_compatible_hparams(hparams, default_hparams, hparams_path) hparams = process_input_path(hparams) # Save HParams if save_hparams: utils.save_hparams(default_hparams.out_dir, hparams) for metric in hparams.metrics: utils.save_hparams(getattr(hparams, "best_" + metric + "_dir"), hparams) # Print HParams utils.print_hparams(hparams) return hparams
def create_or_load_hparams(out_dir, default_hparams, hparams_path): """Create hparams or load hparams from out_dir.""" hparams = utils.load_hparams(out_dir) # print(hparams); assert False #debug if not hparams: hparams = default_hparams hparams = utils.maybe_parse_standard_hparams( hparams, hparams_path) hparams = extend_hparams(hparams) else: hparams = ensure_compatible_hparams(hparams, default_hparams, hparams_path) if FLAGS.inference_input_file: hparams.src_vocab_file = os.path.join(out_dir, "../data/vocab.cor") hparams.tgt_vocab_file = os.path.join(out_dir, "../data/vocab.man") hparams.out_dir = out_dir hparams.best_bleu_dir = os.path.join(out_dir, "best_bleu") hparams.train_prefix = os.path.join(out_dir, "../data/train") hparams.dev_prefix = os.path.join(out_dir, "../data/dev_test") hparams.vocab_prefix = os.path.join(out_dir, "../data/vocab") hparams.rc_vocab_file = os.path.join(out_dir, "../data/vocab.cor") hparams.test_prefix = os.path.join(out_dir, "../data/test") # Save HParams utils.save_hparams(out_dir, hparams) for metric in hparams.metrics: utils.save_hparams(getattr(hparams, "best_" + metric + "_dir"), hparams) # Print HParams utils.print_hparams(hparams) return hparams
def create_or_load_hparams(out_dir, default_hparams, hparams_path, save_hparams=True): """Create hparams or load hparams from out_dir.""" print('[new hparams]\n') hparams = default_hparams hparams = utils.maybe_parse_standard_hparams(hparams, hparams_path) hparams = extend_hparams(hparams) ''' hparams = utils.load_hparams(out_dir) if not hparams: print('[new hparams]\n') hparams = default_hparams hparams = utils.maybe_parse_standard_hparams( hparams, hparams_path) hparams = extend_hparams(hparams) else: print('[load hparams]\n') hparams = ensure_compatible_hparams(hparams, default_hparams, hparams_path) ''' # Save HParams if save_hparams: utils.save_hparams(out_dir, hparams) for metric in hparams.metrics: utils.save_hparams(getattr(hparams, "best_" + metric + "_dir"), hparams) # Print HParams utils.print_hparams(hparams) return hparams
def _external_eval(model, global_step, sess, hparams, iterator, iterator_feed_dict, tgt_file, label, summary_writer, save_on_best, avg_ckpts=False): """External evaluation such as BLEU and ROUGE scores.""" out_dir = hparams.out_dir decode = global_step > 0 if avg_ckpts: label = "avg_" + label if decode: utils.print_out("# External evaluation, global step %d" % global_step) sess.run(iterator.initializer, feed_dict=iterator_feed_dict) output = os.path.join(out_dir, "output_%s" % label) scores = nmt_utils.decode_and_evaluate( label, model, sess, output, ref_file=tgt_file, metrics=hparams.metrics, subword_option=hparams.subword_option, beam_width=hparams.beam_width, tgt_eos=hparams.eos, decode=decode, infer_mode=hparams.infer_mode) # Save on best metrics if decode: for metric in hparams.metrics: if avg_ckpts: best_metric_label = "avg_best_" + metric else: best_metric_label = "best_" + metric utils.add_summary(summary_writer, global_step, "%s_%s" % (label, metric), scores[metric]) # metric: larger is better if save_on_best and scores[metric] > getattr( hparams, best_metric_label): setattr(hparams, best_metric_label, scores[metric]) model.saver.save(sess, os.path.join( getattr(hparams, best_metric_label + "_dir"), "translate.ckpt"), global_step=model.global_step) utils.save_hparams(out_dir, hparams) return scores
def create_or_load_hparams(out_dir, default_hparams, save_hparams=True): hparams = utils.load_hparams(out_dir) if not hparams: hparams = default_hparams hparams = extend_hparams(hparams) # Save HParams if save_hparams: utils.save_hparams(out_dir, hparams) # Print HParams utils.print_hparams(hparams) return hparams
def external_eval(model, global_step, sess, hparams, iterator, iterator_feed_dict, tgt_file, label, summary_writer, save_on_best, avg_ckpts=False): """ External evaluation such as BLEU and ROUGE scores. """ out_dir = hparams.out_dir if avg_ckpts: label = "avg_" + label sess.run(iterator.initializer, feed_dict=iterator_feed_dict) output = os.path.join(out_dir, "output_%s" % label) scores = model.decode_and_evaluate(label, sess, output, ref_file=tgt_file, beam_width=hparams.beam_width, tgt_eos=hparams.eos) # Save on best metrics if avg_ckpts: best_metric_label = "avg_best_bleu" else: best_metric_label = "best_bleu" utils.add_summary(summary_writer, global_step, "%s_bleu" % (label, ), scores['BLEU']) # metric: larger is better if save_on_best and scores['BLEU'] > getattr(hparams, best_metric_label): setattr(hparams, best_metric_label, scores['BLEU']) model.saver.save(sess, os.path.join( getattr(hparams, best_metric_label + "_dir"), "translate.ckpt"), global_step=model.global_step) utils.save_hparams(out_dir, hparams) return scores
def _external_eval(model, global_step, sess, hparams, iterator, iterator_feed_dict, tgt_file, label, summary_writer, save_on_best_dev): """External evaluation such as BLEU and ROUGE scores. If save on best then keep the best scores in the hparams""" out_dir = hparams.out_dir # Avoids running eval when global step is 0 decode = global_step > 0 if decode: utils.print_out("# External evaluation, global step %d" % global_step) # Initialize the iterator sess.run(iterator.initializer, feed_dict=iterator_feed_dict) # Create the output file for the logs output_file = os.path.join(out_dir, "output_%s" % label) # Get the scores for the metrics scores = chatbot_utils.decode_and_evaluate( name=label, model=model, sess=sess, output_file=output_file, reference_file=tgt_file, metrics=hparams.metrics, bpe_delimiter=hparams.bpe_delimiter, beam_width=hparams.beam_width, eos=hparams.eos, number_token=hparams.number_token, name_token=hparams.name_token, decode=decode ) # Create the summaries and also save the best if decode: for metric in hparams.metrics: # Create the summary utils.add_summary(summary_writer, global_step, "%s_%s" % (label, metric), scores[metric]) # Is the current metric score better than the last if save_on_best_dev and scores[metric] > getattr(hparams, "best_" + metric): # Update the hparams score setattr(hparams, "best_" + metric, scores[metric]) # Save the model which got the best for this metric to file model.saver.save(sess, os.path.join(getattr(hparams, "best_" + metric + "_dir"), "dialogue.ckpt"), global_step=model.global_step) # For safety # Save the hparams to file utils.save_hparams(out_dir, hparams, verbose=True) return scores
def create_or_load_hparams(out_dir, default_hparams, hparams_path): """Create hparams or load hparams from out_dir.""" hparams = utils.load_hparams(out_dir) if not hparams: hparams = default_hparams hparams = utils.maybe_parse_standard_hparams(hparams, hparams_path) hparams = extend_hparams(hparams) else: hparams = ensure_compatible_hparams(hparams, default_hparams, hparams_path) # Save HParams utils.save_hparams(out_dir, hparams) for metric in hparams.metrics: utils.save_hparams(getattr(hparams, "best_bleu_dir"), hparams) # Print HParams utils.print_hparams(hparams) return hparams
def create_or_load_hparams(out_dir, default_hparams, hparams_path, save_hparams=True): hparams = utils.load_hparams(out_dir) if not hparams: hparams = default_hparams hparams = utils.maybe_parse_standard_hparams(hparams, hparams_path) hparams = extend_hparams(hparams) else: hparams = ensure_compatible_hparams(hparams, default_hparams, hparams_path) if save_hparams: utils.save_hparams(out_dir, hparams) for metric in hparams.metrics: utils.save_hparams(getattr(hparams, "best_" + metric + "_dir"), hparams) utils.print_hparams(hparams) return hparams
def create_or_load_hparams(out_dir, default_hparams): """ Create hparams or load hparams from out_dir. """ hparams = utils.load_hparams(out_dir) if not hparams: hparams = default_hparams hparams.add_hparam("best_bleu", 0) best_bleu_dir = os.path.join(out_dir, "best_bleu") hparams.add_hparam("best_bleu_dir", best_bleu_dir) os.makedirs(best_bleu_dir) hparams.add_hparam("avg_best_bleu", 0) best_bleu_dir = os.path.join(hparams.out_dir, "avg_best_bleu") hparams.add_hparam("avg_best_bleu_dir", os.path.join(hparams.out_dir, "avg_best_bleu")) os.makedirs(best_bleu_dir) # Set num_train_steps train_src_file = "%s.%s" % (hparams.train_prefix, hparams.src) train_tgt_file = "%s.%s" % (hparams.train_prefix, hparams.tgt) with open(train_src_file, 'r', encoding='utf-8') as f: train_src_steps = len(f.readlines()) with open(train_tgt_file, 'r', encoding='utf-8') as f: train_tgt_steps = len(f.readlines()) hparams.add_hparam( "num_train_steps", min([train_src_steps, train_tgt_steps]) * hparams.epochs) # Set encoder/decoder layers hparams.add_hparam("num_encoder_layers", hparams.num_layers) hparams.add_hparam("num_decoder_layers", hparams.num_layers) # Set residual layers num_encoder_residual_layers = 0 num_decoder_residual_layers = 0 if hparams.num_encoder_layers > 1: num_encoder_residual_layers = hparams.num_encoder_layers - 1 if hparams.num_decoder_layers > 1: num_decoder_residual_layers = hparams.num_decoder_layers - 1 # The first unidirectional layer (after the bi-directional layer) in # the GNMT encoder can't have residual connection due to the input is # the concatenation of fw_cell and bw_cell's outputs. num_encoder_residual_layers = hparams.num_encoder_layers - 2 # Compatible for GNMT models if hparams.num_encoder_layers == hparams.num_decoder_layers: num_decoder_residual_layers = num_encoder_residual_layers hparams.add_hparam("num_encoder_residual_layers", num_encoder_residual_layers) hparams.add_hparam("num_decoder_residual_layers", num_decoder_residual_layers) # Vocab # Get vocab file names first if hparams.vocab_prefix: src_vocab_file = hparams.vocab_prefix + "." + hparams.src tgt_vocab_file = hparams.vocab_prefix + "." + hparams.tgt else: raise ValueError("hparams.vocab_prefix must be provided.") # Source vocab src_vocab_size, src_vocab_file = vocab_utils.check_vocab( src_vocab_file, hparams.out_dir, sos=hparams.sos, eos=hparams.eos, unk=vocab_utils.UNK) # Target vocab if hparams.share_vocab: utils.log("Using source vocab for target") tgt_vocab_file = src_vocab_file tgt_vocab_size = src_vocab_size else: tgt_vocab_size, tgt_vocab_file = vocab_utils.check_vocab( tgt_vocab_file, hparams.out_dir, sos=hparams.sos, eos=hparams.eos, unk=vocab_utils.UNK) hparams.add_hparam("src_vocab_size", src_vocab_size) hparams.add_hparam("tgt_vocab_size", tgt_vocab_size) hparams.add_hparam("src_vocab_file", src_vocab_file) hparams.add_hparam("tgt_vocab_file", tgt_vocab_file) # Pretrained Embeddings: hparams.add_hparam("src_embed_file", "") hparams.add_hparam("tgt_embed_file", "") if hparams.embed_prefix: src_embed_file = hparams.embed_prefix + "." + hparams.src tgt_embed_file = hparams.embed_prefix + "." + hparams.tgt if os.path.exists(src_embed_file): hparams.src_embed_file = src_embed_file if os.path.exists(tgt_embed_file): hparams.tgt_embed_file = tgt_embed_file # Save HParams utils.save_hparams(out_dir, hparams) return hparams
def _external_eval(model, global_step, sess, hparams, iterator, iterator_feed_dict, tgt_file, label, summary_writer, save_on_best): """External evaluation such as BLEU and ROUGE scores.""" out_dir = hparams.out_dir decode = global_step > 0 if decode: utils.print_out("# External evaluation, global step %d" % global_step) sess.run(iterator.initializer, feed_dict=iterator_feed_dict) output = os.path.join(out_dir, "output_%s" % label) scores = nmt_utils.decode_and_evaluate(label, model, sess, output, ref_file=tgt_file, metrics=hparams.metrics, bpe_delimiter=hparams.bpe_delimiter, beam_width=hparams.beam_width, tgt_eos=hparams.eos, decode=decode) # Save on best metrics if decode: for metric in hparams.metrics: utils.add_summary(summary_writer, global_step, "%s_%s" % (label, metric), scores[metric]) # metric: larger is better # if save_on_best and scores[metric] > getattr(hparams, "best_" + metric): with open("./tmp/nmt_model/score", 'w+') as resu: resu.write(str(global_step) + ":" + str(scores[metric]) + "\n") if save_on_best and scores[metric] > getattr(hparams, "top_score")[0]: new_top_score = [] new_top_score_name = [] isTopScore = True for score, name in zip(getattr(hparams, "top_score"), getattr(hparams, "top_score_name")): if scores[metric] < score and isTopScore: new_top_score.append(scores[metric]) new_top_score_name.append(str(global_step)) isTopScore = False new_top_score.append(score) new_top_score_name.append(name) if isTopScore: new_top_score.append(scores[metric]) new_top_score_name.append(str(global_step)) setattr(hparams, "top_score", new_top_score[1:]) setattr(hparams, "top_score_name", new_top_score_name[1:]) setattr(hparams, "best_" + metric, new_top_score[len(new_top_score) - 1]) model.saver.save(sess, os.path.join( getattr(hparams, "best_" + metric + "_dir"), "translate.ckpt"), global_step=model.global_step) if new_top_score[0] != 0: os.system('rm ' + getattr(hparams, "best_" + metric + "_dir") + '/translate.ckpt-' + new_top_score_name[0] + '*') utils.save_hparams(out_dir, hparams) return scores