def __sample_decode(self, model, global_step, sess, iterator, eval_data, iterator_src_placeholder, iterator_batch_size_placeholder, summary_writer): """Pick a sentence and decode.""" decode_id = random.randint(0, len(eval_data) - 1) log.print_out(" # {}".format(decode_id)) iterator_feed_dict = { iterator_src_placeholder: ["\t".join(eval_data[decode_id])], iterator_batch_size_placeholder: 1, } sess.run(iterator.initializer, feed_dict=iterator_feed_dict) ncm_outputs, attention_summary = model.decode(sess) if self.config.beam_width > 0: # get the top translation. ncm_outputs = ncm_outputs[0] translation = ncm_utils.get_translation(ncm_outputs, sent_id=0) log.print_out(" sources:") for t, src in enumerate(eval_data[decode_id][:-1]): log.print_out(" @{} {}".format(t + 1, src)) log.print_out(" resp: {}".format(eval_data[decode_id][-1])) log.print_out(b" ncm: " + translation) # Summary if attention_summary is not None: summary_writer.add_summary(attention_summary, global_step)
def save(self): hparams_file = os.path.join( self.model_dir, "{}_config.yml".format(fs.file_name(self.config))) log.print_out(" saving config to %s" % hparams_file) to_dump_dict = dict(self.__dict__) if to_dump_dict['train_data']: to_dump_dict['train_data'] = os.path.abspath( to_dump_dict['train_data']) if to_dump_dict['test_data']: to_dump_dict['test_data'] = os.path.abspath( to_dump_dict['test_data']) if to_dump_dict['dev_data']: to_dump_dict['dev_data'] = os.path.abspath( to_dump_dict['dev_data']) if to_dump_dict['pretrain_data']: to_dump_dict['pretrain_data'] = os.path.abspath( to_dump_dict['pretrain_data']) else: to_dump_dict.pop('pretrain_data') if to_dump_dict['vocab_file']: to_dump_dict['vocab_file'] = os.path.abspath( to_dump_dict['vocab_file']) with codecs.getwriter("utf-8")(open(hparams_file, "wb")) as f: yaml.dump(to_dump_dict, f, default_flow_style=False)
def load_model(model, ckpt, session, name): start_time = time.time() model.saver.restore(session, ckpt) session.run(tf.tables_initializer()) log.print_out(" loaded %s model parameters from %s, time %.2fs" % (name, ckpt, time.time() - start_time)) return model
def compute_perplexity(model, sess, name): """Compute perplexity of the output of the model. Args: model: model for compute perplexity. sess: tensorflow session to use. name: name of the batch. Returns: The perplexity of the eval outputs. """ total_loss = 0 total_predict_count = 0 start_time = time.time() step = 0 while True: try: loss, predict_count, batch_size = model.eval(sess) total_loss += loss * batch_size total_predict_count += predict_count step += 1 if step % 500 == 0: ls = total_loss / total_predict_count ppl = misc.safe_exp(ls) log.print_out(" ## After %d steps, loss %.2f - ppl %.3f" % (step, ls, ppl)) except tf.errors.OutOfRangeError: break perplexity = misc.safe_exp(total_loss / total_predict_count) log.print_time(" eval %s: perplexity %.2f" % (name, perplexity), start_time) return perplexity
def decode_and_evaluate(name, model, sess, out_file, ref_file, metrics, beam_width, num_translations_per_input=1, decode=True): """Decode a test set and compute a score according to the evaluation task.""" # Decode if decode: log.print_out(" decoding to output %s." % out_file) start_time = time.time() num_sentences = 0 with codecs.getwriter("utf-8")( tf.gfile.GFile(out_file, mode="wb")) as trans_f: trans_f.write("") # Write empty string to ensure file is created. num_translations_per_input = max( min(num_translations_per_input, beam_width), 1) i = 0 while True: i += 1 try: if i % 1000 == 0: log.print_out(" decoding step {}, num sentences {}".format(i, num_sentences)) ncm_outputs, _ = model.decode(sess) if beam_width == 0: ncm_outputs = np.expand_dims(ncm_outputs, 0) batch_size = ncm_outputs.shape[1] num_sentences += batch_size for sent_id in range(batch_size): translations = [get_translation(ncm_outputs[beam_id], sent_id) for beam_id in range(num_translations_per_input)] trans_f.write(b"\t".join(translations).decode("utf-8") + "\n") except tf.errors.OutOfRangeError: log.print_time( " done, num sentences %d, num translations per input %d" % (num_sentences, num_translations_per_input), start_time) break # Evaluation evaluation_scores = {} # if ref_file and tf.gfile.Exists(out_file): # for metric in metrics: # score = evaluate(ref_file, out_file, metric) # evaluation_scores[metric] = score # log.print_out(" %s %s: %.1f" % (metric, name, score)) return evaluation_scores
def create_or_load_model(model, model_dir, session, name): """Create a model and initialize or load parameters in session.""" latest_ckpt = tf.train.latest_checkpoint(model_dir) if latest_ckpt: model = load_model(model, latest_ckpt, session, name) else: start_time = time.time() session.run(tf.global_variables_initializer()) session.run(tf.tables_initializer()) log.print_out(" created %s model with fresh parameters, time %.2fs" % (name, time.time() - start_time)) global_step = model.global_step.eval(session=session) return model, global_step
def init_embeddings(self, vocab_file, vocab_pkl, embedding_type, embedding_size, dtype=tf.float32, scope=None): vocab_list, vocab_size = vocab.load_vocab(vocab_file) embed_dict = fs.load_obj(vocab_pkl) with tf.variable_scope(scope or "embeddings", dtype=dtype): sqrt3 = math.sqrt(3) if embedding_type == 'random': log.print_out('# Using random embedding.') self.embeddings = tf.get_variable( "emb_random_mat", shape=[vocab_size * embedding_size], initializer=tf.random_uniform_initializer( -sqrt3, sqrt3, dtype)) else: log.print_out('# Using pretrained embedding: %s.' % embedding_type) trainable_words, const_words = [], [] for word in vocab_list: if word not in embed_dict: embed_dict[word] = np.random.normal( 0, sqrt3, size=(1, embedding_size)).tolist()[0] trainable_words.append(word) else: const_words.append(word) emb_mat = tf.constant( np.array([embed_dict[w] for w in vocab_list], dtype=dtype.as_numpy_dtype())) emb_const_mat = tf.slice(emb_mat, [len(trainable_words), 0], [-1, -1]) with tf.variable_scope(scope or "pretrained_embeddings", dtype=dtype): emb_reserved_mat = tf.get_variable( "emb_reserved_mat", shape=[len(trainable_words), embedding_size], dtype=dtype) self.embeddings = tf.concat([emb_reserved_mat, emb_const_mat], 0)
def interactive(self): from prompt_toolkit import prompt from prompt_toolkit.history import FileHistory from topic_model.lda import TopicInferer, DEFAULT_SEPARATOR from util.nlp import NLPToolkit nlp_toolkit = NLPToolkit() self._pre_model_creation() topic_inferer = TopicInferer(self.config.lda_model_dir) infer_model = taware_helper.create_infer_model(taware_model.TopicAwareSeq2SeqModel, self.config) config_proto = models.model_helper.get_config_proto(self.config.log_device) with tf.Session(graph=infer_model.graph, config=config_proto) as sess: ckpt = tf.train.latest_checkpoint(self.config.model_dir) loaded_infer_model = model_helper.load_model( infer_model.model, ckpt, sess, "infer") log.print_out("# Start decoding") sentence = prompt(">>> ", history=FileHistory(os.path.join(self.config.model_dir, ".chat_history"))).strip() while sentence: utterance = ' '.join(nlp_toolkit.tokenize(sentence)).lower() topic_words = topic_inferer.from_collection([utterance], dialogue_as_doc=True, words_per_topic=self.config.topic_words_per_utterance) iterator_feed_dict = { infer_model.src_placeholder: [utterance + DEFAULT_SEPARATOR + " ".join(topic_words)], infer_model.batch_size_placeholder: 1, } sess.run(infer_model.iterator.initializer, feed_dict=iterator_feed_dict) output, _ = loaded_infer_model.decode(sess) if self.config.beam_width > 0: # get the top translation. output = output[0] resp = ncm_utils.get_translation(output, sent_id=0) log.print_out(resp + b"\n") sentence = prompt(">>> ", history=FileHistory(os.path.join(self.config.model_dir, ".chat_history"))).strip()
def interactive(self): from prompt_toolkit import prompt from prompt_toolkit.history import FileHistory from util.nlp import NLPToolkit nlp_toolkit = NLPToolkit() infer_model = vanilla_helper.create_infer_model(self.config) config_proto = model_helper.get_config_proto(self.config.log_device) with tf.Session(graph=infer_model.graph, config=config_proto) as sess: ckpt = tf.train.latest_checkpoint(self.config.model_dir) loaded_infer_model = model_helper.load_model( infer_model.model, ckpt, sess, "infer") log.print_out("# Start decoding") sentence = prompt(">>> ", history=FileHistory( os.path.join(self.config.model_dir, ".chat_history"))).strip() while sentence: utterance = ' '.join(nlp_toolkit.tokenize(sentence)).lower() iterator_feed_dict = { infer_model.src_placeholder: [utterance], infer_model.batch_size_placeholder: 1, } sess.run(infer_model.iterator.initializer, feed_dict=iterator_feed_dict) output, _ = loaded_infer_model.decode(sess) if self.config.beam_width > 0: # get the top translation. output = output[0] resp = ncm_utils.get_translation(output, sent_id=0) log.print_out(resp + b"\n") sentence = prompt(">>> ", history=FileHistory( os.path.join(self.config.model_dir, ".chat_history"))).strip()
def check_stats(self, stats, global_step, steps_per_stats, log_f): """Print statistics and also check for overflow.""" # Print statistics for the previous epoch. avg_step_time = stats["step_time"] / steps_per_stats avg_grad_norm = stats["grad_norm"] / steps_per_stats train_ppl = misc.safe_exp(stats["loss"] / stats["predict_count"]) speed = stats["total_count"] / (1000 * stats["step_time"]) log.print_out( " global step %d lr %g " "step-time %.2fs wps %.2fK ppl %.2f gN %.2f" % (global_step, stats["learning_rate"], avg_step_time, speed, train_ppl, avg_grad_norm), log_f) # Check for overflow is_overflow = False if math.isnan(train_ppl) or math.isinf(train_ppl) or train_ppl > 1e20: log.print_out(" step %d overflow, stop early" % global_step, log_f) is_overflow = True return train_ppl, speed, is_overflow
def _get_sampling_probability(self, hparams, global_step, sampling_probability): if hparams.scheduled_sampling_decay_scheme == "luong10": start_decay_step = int(hparams.num_train_steps / 2) remain_steps = hparams.num_train_steps - start_decay_step decay_steps = int(remain_steps / 10) # decay 10 times decay_factor = 0.5 elif hparams.scheduled_sampling_decay_scheme == "luong234": start_decay_step = int(hparams.num_train_steps * 2 / 3) remain_steps = hparams.num_train_steps - start_decay_step decay_steps = int(remain_steps / 4) # decay 4 times decay_factor = 0.5 elif hparams.scheduled_sampling_decay_scheme == "manual": start_decay_step = hparams.start_decay_step decay_steps = hparams.decay_steps decay_factor = hparams.decay_factor else: start_decay_step = hparams.num_train_steps decay_steps = 0 decay_factor = 1.0 log.print_out( " scheduled sampling decay_scheme=%s, start_decay_step=%d, decay_steps %d, " "decay_factor %g" % (hparams.scheduled_sampling_decay_scheme, start_decay_step, decay_steps, decay_factor)) eff_global_step = global_step if hparams.is_pretrain_enabled(): eff_global_step -= hparams.num_pretrain_steps return tf.cond(eff_global_step < start_decay_step, lambda: sampling_probability, lambda: tf.train.exponential_decay(sampling_probability, (eff_global_step - start_decay_step), decay_steps, decay_factor, staircase=True), name="sampling_prob_decay_cond")
def _external_eval(self, model, global_step, sess, iterator, iterator_feed_dict, eval_file, label, summary_writer, save_on_best): """External evaluation such as BLEU and ROUGE scores.""" out_dir = self.config.model_dir decode = global_step > 0 if decode: log.print_out("# External evaluation, global step %d" % global_step) sess.run(iterator.initializer, feed_dict=iterator_feed_dict) output = os.path.join(out_dir, "output_%s" % label) scores = eval_metric.decode_and_evaluate( label, model, sess, output, ref_file=eval_file, metrics=self.config.metrics, beam_width=self.config.beam_width, decode=decode) # Save on best metrics if decode: for metric in self.config.metrics: log.add_summary(summary_writer, global_step, "%s_%s" % (label, metric), scores[metric]) # metric: larger is better if save_on_best and scores[metric] > getattr( self.config, "best_" + metric): setattr(self.config, "best_" + metric, scores[metric]) model.saver.save(sess, os.path.join( getattr(self.config, "best_" + metric + "_dir"), "vanilla.ckpt"), global_step=model.global_step) # self.config.save(out_dir) return scores
def _get_learning_rate_decay(self, hparams, global_step, learning_rate): """Get learning rate decay.""" if hparams.learning_rate_decay_scheme == "luong10": start_decay_step = int(hparams.num_train_steps / 2) remain_steps = hparams.num_train_steps - start_decay_step decay_steps = int(remain_steps / 10) # decay 10 times decay_factor = 0.5 elif hparams.learning_rate_decay_scheme == "luong234": start_decay_step = int(hparams.num_train_steps * 2 / 3) remain_steps = hparams.num_train_steps - start_decay_step decay_steps = int(remain_steps / 4) # decay 4 times decay_factor = 0.5 elif hparams.learning_rate_decay_scheme == "manual": start_decay_step = hparams.start_decay_step decay_steps = hparams.decay_steps decay_factor = hparams.decay_factor else: start_decay_step = hparams.num_train_steps decay_steps = 0 decay_factor = 1.0 log.print_out( " learning rate decay_scheme=%s, start_decay_step=%d, decay_steps %d, " "decay_factor %g" % (hparams.learning_rate_decay_scheme, start_decay_step, decay_steps, decay_factor)) eff_global_step = global_step if hparams.is_pretrain_enabled(): eff_global_step -= hparams.num_pretrain_steps return tf.cond(eff_global_step < start_decay_step, lambda: learning_rate, lambda: tf.train.exponential_decay(learning_rate, ( eff_global_step - start_decay_step), decay_steps, decay_factor, staircase=True), name="learning_rate_decay_cond")
def _sample_decode(self, model, global_step, sess, src_placeholder, batch_size_placeholder, eval_data, summary_writer): """Pick a sentence and decode.""" decode_ids = np.random.randint(low=0, high=len(eval_data) - 1, size=1) sample_data = [] for decode_id in decode_ids: sample_data.append(eval_data[decode_id][0]) iterator_feed_dict = { src_placeholder: sample_data, batch_size_placeholder: len(decode_ids), } sess.run(model.iterator.initializer, feed_dict=iterator_feed_dict) ncm_outputs, infer_summary = model.decode(sess) for i, decode_id in enumerate(decode_ids): log.print_out(" # %d" % decode_id) output = ncm_outputs[i] if self.config.beam_width > 0 and self._consider_beam(): # get the top translation. output = output[0] translation = ncm_utils.get_translation(output, sent_id=0) log.print_out(" sources:") for t, src in enumerate(eval_data[decode_id][0].split('\t')): log.print_out(" @%d %s" % (t + 1, src)) log.print_out(" resp: %s" % eval_data[decode_id][1]) log.print_out(b" generated: " + translation) # Summary if infer_summary is not None: summary_writer.add_summary(infer_summary, global_step)
def interactive(self, scope=None): import platform from prompt_toolkit import prompt from prompt_toolkit.history import FileHistory from topic_model.lda import TopicInferer, DEFAULT_SEPARATOR from util.nlp import NLPToolkit nlp_toolkit = NLPToolkit() __os = platform.system() self.config.infer_batch_size = 1 self._pre_model_creation() if self.config.lda_model_dir is not None: topic_inferer = TopicInferer(self.config.lda_model_dir) else: topic_inferer = None infer_model = self._get_model_helper().create_infer_model(self.config, scope) with tf.Session( config=model_helper.get_config_proto(self.config.log_device), graph=infer_model.graph) as sess: latest_ckpt = tf.train.latest_checkpoint(self.config.model_dir) loaded_infer_model = model_helper.load_model( infer_model.model, latest_ckpt, sess, "infer") log.print_out("# Start decoding") if __os == 'Windows': sentence = input("> ").strip() else: sentence = prompt(">>> ", history=FileHistory(os.path.join(self.config.model_dir, ".chat_history"))).strip() conversation = [vocab.EOS] * (self.config.num_turns - 1) while sentence: current_utterance = ' '.join(nlp_toolkit.tokenize(sentence)).lower() conversation.append(current_utterance) conversation.pop(0) feedable_context = "\t".join(conversation) if topic_inferer is None: iterator_feed_dict = { infer_model.src_placeholder: [feedable_context], infer_model.batch_size_placeholder: 1, } else: topic_words = topic_inferer.from_collection([feedable_context], dialogue_as_doc=True, words_per_topic=self.config.topic_words_per_utterance) iterator_feed_dict = { infer_model.src_placeholder: [feedable_context + DEFAULT_SEPARATOR + " ".join(topic_words)], infer_model.batch_size_placeholder: 1, } sess.run(infer_model.iterator.initializer, feed_dict=iterator_feed_dict) output, infer_summary = loaded_infer_model.decode(sess) if self.config.beam_width > 0 and self._consider_beam(): # get the top translation. output = output[0] resp = ncm_utils.get_translation(output, sent_id=0) log.print_out(resp + b"\n") if __os == 'Windows': sentence = input("> ").strip() else: sentence = prompt(">>> ", history=FileHistory(os.path.join(self.config.model_dir, ".chat_history"))).strip() print("Bye!!!")
def train(self, target_session="", scope=None): out_dir = self.config.model_dir model_dir = out_dir num_train_steps = self.config.num_train_steps steps_per_stats = self.config.steps_per_stats # steps_per_external_eval = self.config.steps_per_external_eval steps_per_eval = 20 * steps_per_stats # if not steps_per_external_eval: # steps_per_external_eval = 5 * steps_per_eval self._pre_model_creation() train_model = taware_helper.create_train_model(taware_model.TopicAwareSeq2SeqModel, self.config, scope) eval_model = taware_helper.create_eval_model(taware_model.TopicAwareSeq2SeqModel, self.config, scope) infer_model = taware_helper.create_infer_model(taware_model.TopicAwareSeq2SeqModel, self.config, scope) # Preload data for sample decoding. dev_file = self.config.dev_data eval_data = self._load_data(dev_file, include_target=True) summary_name = "train_log" # Log and output files log_file = os.path.join(out_dir, "log_%d" % time.time()) log_f = tf.gfile.GFile(log_file, mode="a") log.print_out("# log_file=%s" % log_file, log_f) avg_step_time = 0.0 # TensorFlow model config_proto = models.model_helper.get_config_proto(self.config.log_device) train_sess = tf.Session( target=target_session, config=config_proto, graph=train_model.graph) eval_sess = tf.Session( target=target_session, config=config_proto, graph=eval_model.graph) infer_sess = tf.Session( target=target_session, config=config_proto, graph=infer_model.graph) with train_model.graph.as_default(): loaded_train_model, global_step = model_helper.create_or_load_model( train_model.model, model_dir, train_sess, "train") # Summary writer summary_writer = tf.summary.FileWriter( os.path.join(out_dir, summary_name), train_model.graph) # First evaluation # self.run_full_eval( # model_dir, infer_model, infer_sess, # eval_model, eval_sess, summary_writer, eval_data) last_stats_step = global_step last_eval_step = global_step # last_external_eval_step = global_step patience = self.config.patience # This is the training loop. stats = self.init_stats() speed, train_ppl = 0.0, 0.0 start_train_time = time.time() log.print_out( "# Start step %d, epoch %d, lr %g, %s" % (global_step, self.config.epoch, loaded_train_model.learning_rate.eval(session=train_sess), time.ctime()), log_f) self.config.save() log.print_out("# Configs saved") # Initialize all of the iterators skip_count = self.config.batch_size * self.config.epoch_step log.print_out("# Init train iterator for %d steps, skipping %d elements" % (self.config.num_train_steps, skip_count)) train_sess.run( train_model.iterator.initializer, feed_dict={train_model.skip_count_placeholder: skip_count}) while self.config.epoch < self.config.num_train_epochs and patience > 0: ### Run a step ### start_time = time.time() try: step_result = loaded_train_model.train(train_sess) self.config.epoch_step += 1 except tf.errors.OutOfRangeError: # Finished going through the training dataset. Go to next epoch. sw = Stopwatch() log.print_out( "# Finished an epoch, step %d. Perform external evaluation" % global_step) self.run_sample_decode(infer_model, infer_sess, model_dir, summary_writer, eval_data) log.print_out( "## Done epoch %d in %d steps. step %d @ eval time: %ds" % (self.config.epoch, self.config.epoch_step, global_step, sw.elapsed())) self.config.epoch += 1 self.config.epoch_step = 0 self.config.save() train_sess.run( train_model.iterator.initializer, feed_dict={train_model.skip_count_placeholder: 0}) continue # Write step summary and accumulate statistics global_step = self.update_stats(stats, summary_writer, start_time, step_result) # Once in a while, we print statistics. if global_step - last_stats_step >= steps_per_stats: last_stats_step = global_step train_ppl, speed, is_overflow = self.check_stats(stats, global_step, steps_per_stats, log_f) if is_overflow: break # Reset statistics stats = self.init_stats() if global_step - last_eval_step >= steps_per_eval: last_eval_step = global_step log.print_out("# Save eval, global step %d" % global_step) log.add_summary(summary_writer, global_step, "train_ppl", train_ppl) # Save checkpoint loaded_train_model.saver.save( train_sess, self.config.checkpoint_file, global_step=global_step) # Evaluate on dev self.run_sample_decode(infer_model, infer_sess, model_dir, summary_writer, eval_data) dev_ppl, _ = self.run_internal_eval(eval_model, eval_sess, model_dir, summary_writer, use_test_set=False) if dev_ppl < self.config.best_dev_ppl: self.config.best_dev_ppl = dev_ppl patience = self.config.patience log.print_out(' ** Best model thus far, ep {}|{} dev_ppl {:.3f}'.format( self.config.epoch, self.config.epoch_step, dev_ppl)) elif dev_ppl > self.config.degrade_threshold * self.config.best_dev_ppl: patience -= 1 log.print_out( ' worsened, ep {}|{} patience {} best_dev_ppl {:.3f}'.format( self.config.epoch, self.config.epoch_step, self.config.patience, self.config.best_dev_ppl)) # Save config parameters self.config.save() # if global_step - last_external_eval_step >= steps_per_external_eval: # last_external_eval_step = global_step # # # Save checkpoint # loaded_train_model.saver.save( # train_sess, # self.config.checkpoint_file, # global_step=global_step) # self.run_sample_decode(infer_model, infer_sess, # model_dir, summary_writer, eval_data) # dev_scores, test_scores, _ = self.run_external_eval(infer_model, infer_sess, model_dir, summary_writer) # Done training loaded_train_model.saver.save( train_sess, self.config.checkpoint_file, global_step=global_step) # result_summary, _, dev_scores, test_scores, dev_ppl, test_ppl = self.run_full_eval( # model_dir, infer_model, infer_sess, # eval_model, eval_sess, # summary_writer, eval_data) dev_scores, test_scores, dev_ppl, test_ppl = None, None, None, None result_summary = "" log.print_out( "# Final, step %d lr %g " "step-time %.2f wps %.2fK ppl %.2f, %s, %s" % (global_step, loaded_train_model.learning_rate.eval(session=train_sess), avg_step_time, speed, train_ppl, result_summary, time.ctime()), log_f) log.print_time("# Done training!", start_train_time) summary_writer.close() # log.print_out("# Start evaluating saved best models.") # for metric in self.config.metrics: # best_model_dir = getattr(self.config, "best_" + metric + "_dir") # summary_writer = tf.summary.FileWriter( # os.path.join(best_model_dir, summary_name), infer_model.graph) # result_summary, best_global_step, _, _, _, _ = self.run_full_eval( # best_model_dir, infer_model, infer_sess, eval_model, eval_sess, # summary_writer, eval_data) # log.print_out("# Best %s, step %d " # "step-time %.2f wps %.2fK, %s, %s" % # (metric, best_global_step, avg_step_time, speed, # result_summary, time.ctime()), log_f) # summary_writer.close() return (dev_scores, test_scores, dev_ppl, test_ppl, global_step)
def test(self): start_test_time = time.time() assert self.config.n_responses >= 1 if self.config.beam_width > 0: assert self.config.n_responses <= self.config.beam_width else: assert self.config.n_responses == 1 self._pre_model_creation() infer_model = taware_helper.create_infer_model(taware_model.TopicAwareSeq2SeqModel, self.config) config_proto = models.model_helper.get_config_proto(self.config.log_device) ckpt = tf.train.latest_checkpoint(self.config.get_infer_model_dir()) with tf.Session(graph=infer_model.graph, config=config_proto) as infer_sess: loaded_infer_model = model_helper.load_model( infer_model.model, ckpt, infer_sess, "infer") log.print_out("# Start decoding") log.print_out(" beam width: {}".format(self.config.beam_width)) log.print_out(" length penalty: {}".format(self.config.length_penalty_weight)) log.print_out(" sampling temperature: {}".format(self.config.sampling_temperature)) log.print_out(" num responses per test instance: {}".format(self.config.n_responses)) feed_dict = { infer_model.src_placeholder: self._load_data(self.config.test_data), infer_model.batch_size_placeholder: self.config.infer_batch_size, } infer_sess.run(infer_model.iterator.initializer, feed_dict=feed_dict) if self.config.sampling_temperature > 0: label = "%s_t%.1f" % ( fs.file_name(self.config.test_data), self.config.sampling_temperature) else: label = "%s_bw%d_lp%.1f" % ( fs.file_name(self.config.test_data), self.config.beam_width, self.config.length_penalty_weight) out_file = os.path.join(self.config.model_dir, "output_{}".format(label)) eval_metric.decode_and_evaluate( "test", loaded_infer_model, infer_sess, out_file, ref_file=None, metrics=self.config.metrics, beam_width=self.config.beam_width, num_translations_per_input=self.config.n_responses) log.print_time("# Decoding done", start_test_time) eval_model = taware_helper.create_eval_model(taware_model.TopicAwareSeq2SeqModel, self.config) with tf.Session( config=models.model_helper.get_config_proto(self.config.log_device), graph=eval_model.graph) as eval_sess: loaded_eval_model = model_helper.load_model( eval_model.model, ckpt, eval_sess, "eval") log.print_out("# Compute Perplexity") feed_dict = { eval_model.eval_file_placeholder: self.config.test_data } eval_sess.run(eval_model.iterator.initializer, feed_dict=feed_dict) model_helper.compute_perplexity(loaded_eval_model, eval_sess, "test") log.print_time("# Test finished", start_test_time)
def pretrain(self, pretrain_sess, pretrain_model, log_f): pretrain_sess.run(tf.global_variables_initializer()) pretrain_sess.run(tf.tables_initializer()) global_step = pretrain_model.model.global_step.eval(session=pretrain_sess) num_pretrain_steps = self.config.num_pretrain_steps epoch, epoch_step = 0, 0 summary_name = "pretrain_log" summary_writer = tf.summary.FileWriter( os.path.join(self.config.model_dir, summary_name), pretrain_model.graph) last_stats_step = global_step # This is the training loop. stats = self.init_stats() speed, train_ppl = 0.0, 0.0 log.print_out("%% Pretraining starts for %d steps -> step %d, lr %g, %s" % (self.config.num_pretrain_steps, global_step, pretrain_model.model.learning_rate.eval(session=pretrain_sess), time.ctime()), log_f) pretrain_sess.run( pretrain_model.iterator.initializer, feed_dict={pretrain_model.skip_count_placeholder: 0}) # pretrain_sw = Stopwatch() while global_step < num_pretrain_steps: ### Run a step ### start_time = time.time() try: step_result = pretrain_model.model.train(pretrain_sess) epoch_step += 1 except tf.errors.OutOfRangeError: epoch_step = 0 epoch += 1 log.print_out( "%% Pretraining: finished epoch %d, step %d." % (epoch, global_step)) pretrain_sess.run( pretrain_model.iterator.initializer, feed_dict={pretrain_model.skip_count_placeholder: 0}) continue # Write step summary and accumulate statistics global_step = self.update_stats(stats, summary_writer, start_time, step_result) # Once in a while, we print statistics. if global_step - last_stats_step >= self.config.steps_per_stats: last_stats_step = global_step train_ppl, speed, is_overflow = self.check_stats(stats, global_step, self.config.steps_per_stats, log_f) if is_overflow: break # Reset statistics stats = self.init_stats() log.print_out("%% Pretraining finished at step %d" % global_step) pretrain_model.model.saver.save( pretrain_sess, self.config.checkpoint_file, global_step=global_step)
def __build_encoder(self, params, keep_prob, device): encoder_cell = {} if params.encoder_type == "uni": log.print_out(" build unidirectional encoder") encoder_cell['uni'] = rnn_factory.create_cell( params.cell_type, params.hidden_units, num_layers=1, input_keep_prob=keep_prob, devices=[device]) elif params.encoder_type == "bi": log.print_out(" build bidirectional encoder") encoder_cell['fw'] = rnn_factory.create_cell( params.cell_type, params.hidden_units, num_layers=1, input_keep_prob=keep_prob, devices=[device]) encoder_cell['bw'] = rnn_factory.create_cell( params.cell_type, params.hidden_units, num_layers=1, input_keep_prob=keep_prob, devices=[device]) else: raise ValueError("Unknown encoder type: '%s'" % params.encoder_type) encoding_devices = self.round_robin.assign(self.num_turns) encoder_results = [] for t in range(self.num_turns): scope_name = "encoder%d" % t if params.disable_encoder_var_sharing else "encoder" with variable_scope.variable_scope(scope_name) as scope: if t > 0 and not params.disable_encoder_var_sharing: scope.reuse_variables() with tf.device(encoding_devices[t]): encoder_embedded_inputs = tf.nn.embedding_lookup( params=self.embeddings, ids=self.iterator.sources[t]) if params.encoder_type == "bi": encoder_outputs, states = tf.nn.bidirectional_dynamic_rnn( encoder_cell['fw'], encoder_cell['bw'], inputs=encoder_embedded_inputs, dtype=self.dtype, sequence_length=self.iterator. source_sequence_lengths[t], swap_memory=True) fw_state, bw_state = states encoder_state = tf.concat([fw_state, bw_state], axis=1) else: encoder_outputs, encoder_state = tf.nn.dynamic_rnn( encoder_cell['uni'], inputs=encoder_embedded_inputs, sequence_length=self.iterator. source_sequence_lengths[t], dtype=self.dtype, swap_memory=True, scope=scope) # msg_attn_mechanism = attention_helper.create_attention_mechanism( # params.attention_type, # params.hidden_units, # encoder_outputs, # self.iterator.source_sequence_lengths[t]) encoder_results.append((encoder_outputs, encoder_state)) return encoder_results
def __build_encoder(self, params, keep_prob): with variable_scope.variable_scope("encoder"): iterator = self.iterator encoder_embedded_inputs = tf.nn.embedding_lookup( params=self.embeddings, ids=iterator.sources) if params.encoder_type == "uni": log.print_out( " build unidirectional encoder num_layers = %d" % params.num_layers) cell = rnn_factory.create_cell(params.cell_type, params.hidden_units, self.num_layers, input_keep_prob=keep_prob, devices=self.round_robin.assign( self.num_layers)) encoder_outputs, encoder_state = tf.nn.dynamic_rnn( cell, inputs=encoder_embedded_inputs, sequence_length=iterator.source_sequence_lengths, dtype=self.dtype, swap_memory=True) return encoder_outputs, encoder_state elif params.encoder_type == "bi": num_bi_layers = int(params.num_layers / 2) log.print_out(" build bidirectional encoder num_layers = %d" % params.num_layers) fw_cell = rnn_factory.create_cell( params.cell_type, params.hidden_units, num_bi_layers, input_keep_prob=keep_prob, devices=self.round_robin.assign(num_bi_layers)) bw_cell = rnn_factory.create_cell( params.cell_type, params.hidden_units, num_bi_layers, input_keep_prob=keep_prob, devices=self.round_robin.assign( num_bi_layers, self.device_manager.num_available_gpus() - 1)) encoder_outputs, bi_state = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, encoder_embedded_inputs, dtype=self.dtype, sequence_length=iterator.source_sequence_lengths, swap_memory=True) if num_bi_layers == 1: encoder_state = bi_state else: # alternatively concat forward and backward states encoder_state = [] for layer_id in range(num_bi_layers): encoder_state.append(bi_state[0][layer_id]) # forward encoder_state.append(bi_state[1][layer_id]) # backward encoder_state = tuple(encoder_state) return encoder_outputs, encoder_state else: raise ValueError("Unknown encoder type: %s" % params.encoder_type)
def test(self): start_test_time = time.time() assert self.config.n_responses >= 1 if self.config.beam_width > 0: assert self.config.n_responses <= self.config.beam_width else: assert self.config.n_responses == 1 self._pre_model_creation() infer_model = self._get_model_helper().create_infer_model(self.config) latest_ckpt = tf.train.latest_checkpoint(self.config.get_infer_model_dir()) with tf.Session( config=model_helper.get_config_proto(self.config.log_device), graph=infer_model.graph) as infer_sess: loaded_infer_model = model_helper.load_model( infer_model.model, latest_ckpt, infer_sess, "infer") log.print_out("# Start decoding") log.print_out(" beam width: {}".format(self.config.beam_width)) log.print_out(" length penalty: {}".format(self.config.length_penalty_weight)) log.print_out(" sampling temperature: {}".format(self.config.sampling_temperature)) log.print_out(" num responses per test instance: {}".format(self.config.n_responses)) feed_dict = { infer_model.src_placeholder: self._load_data(self.config.test_data), infer_model.batch_size_placeholder: self.config.infer_batch_size, } if self.config.sampling_temperature > 0: label = "%s_t%.1f" % ( fs.file_name(self.config.test_data), self.config.sampling_temperature) else: label = "%s_bw%d_lp%.1f" % ( fs.file_name(self.config.test_data), self.config.beam_width, self.config.length_penalty_weight) self._decode_and_evaluate(loaded_infer_model, infer_sess, feed_dict, label=label, num_responses_per_input=self.config.n_responses) log.print_time("# Decoding done", start_test_time) eval_model = self._get_model_helper().create_eval_model(self.config) with tf.Session( config=model_helper.get_config_proto(self.config.log_device), graph=eval_model.graph) as eval_sess: loaded_eval_model = model_helper.load_model( eval_model.model, latest_ckpt, eval_sess, "eval") log.print_out("# Compute Perplexity") dev_eval_iterator_feed_dict = { eval_model.eval_file_placeholder: self.config.test_data } eval_sess.run(eval_model.iterator.initializer, feed_dict=dev_eval_iterator_feed_dict) model_helper.compute_perplexity(loaded_eval_model, eval_sess, "test") log.print_time("# Test finished", start_test_time)
def __init__(self, mode, num_turns, iterator, params, rev_vocab_table=None, scope=None, log_trainables=True): log.print_out("# creating %s graph ..." % mode) self.dtype = tf.float32 self.mode = mode self.embedding_size = params.embedding_size self.num_turns = num_turns - 1 self.device_manager = DeviceManager() self.round_robin = RoundRobin(self.device_manager) self.num_gpus = min(params.num_gpus, self.device_manager.num_available_gpus()) log.print_out("# number of gpus %d" % self.num_gpus) self.iterator = iterator with tf.variable_scope(scope or 'thred_graph', dtype=self.dtype): self.init_embeddings(params.vocab_file, params.vocab_pkl, params.embedding_type, self.embedding_size, scope=scope) encoder_keep_prob, decoder_keep_prob = self.get_keep_probs( mode, params) if mode == tf.contrib.learn.ModeKeys.TRAIN: context_keep_prob = 1.0 - params.context_dropout_rate else: context_keep_prob = 1.0 with tf.variable_scope(scope or "build_network"): with tf.variable_scope( "decoder/output_projection") as output_scope: if params.boost_topic_gen_prob: self.output_layer = taware_layer.JointDenseLayer( params.vocab_size, params.topic_vocab_size, scope=output_scope, name="output_projection") else: self.output_layer = layers_core.Dense( params.vocab_size, use_bias=False, name="output_projection") self.batch_size = tf.size(self.iterator.source_sequence_lengths[0]) devices = self.round_robin.assign(2, base=self.num_gpus - 1) encoder_results = self.__build_encoder(params, encoder_keep_prob, devices[0]) context_outputs, context_state = self.__build_context( params, encoder_results, context_keep_prob, devices[0]) self.global_step = tf.Variable(0, trainable=False) if mode == tf.contrib.learn.ModeKeys.TRAIN: self.sampling_probability = tf.constant( params.scheduled_sampling_prob) self.sampling_probability = self._get_sampling_probability( params, self.global_step, self.sampling_probability) elif mode == tf.contrib.learn.ModeKeys.EVAL: self.sampling_probability = tf.constant(0.0) logits, sample_ids, final_decoder_state = self.__build_decoder( params, context_outputs, context_state, decoder_keep_prob, devices[1]) if mode != tf.contrib.learn.ModeKeys.INFER: with tf.device(self.device_manager.tail_gpu()): loss = self.__compute_loss(logits) else: loss, losses = None, None if mode == tf.contrib.learn.ModeKeys.TRAIN: self.train_loss = loss self.word_count = sum( [tf.reduce_sum(self.iterator.source_sequence_lengths[t]) for t in range(self.num_turns)]) + \ tf.reduce_sum(self.iterator.target_sequence_length) elif mode == tf.contrib.learn.ModeKeys.EVAL: self.eval_loss = loss elif mode == tf.contrib.learn.ModeKeys.INFER: self.sample_words = rev_vocab_table.lookup( tf.to_int64(sample_ids)) if mode != tf.contrib.learn.ModeKeys.INFER: ## Count the number of predicted words for compute ppl. self.predict_count = tf.reduce_sum( self.iterator.target_sequence_length) trainables = tf.trainable_variables() if mode == tf.contrib.learn.ModeKeys.TRAIN: self.learning_rate = tf.constant(params.learning_rate) # decay self.learning_rate = self._get_learning_rate_decay( params, self.global_step, self.learning_rate) # Optimizer if params.optimizer.lower() == "sgd": opt = tf.train.GradientDescentOptimizer(self.learning_rate) tf.summary.scalar("lr", self.learning_rate) elif params.optimizer.lower() == "adam": opt = tf.train.AdamOptimizer(self.learning_rate) tf.summary.scalar("lr", self.learning_rate) else: raise ValueError('Unknown optimizer: ' + params.optimizer) # Gradients gradients = tf.gradients(self.train_loss, trainables, colocate_gradients_with_ops=True) clipped_grads, grad_norm = tf.clip_by_global_norm( gradients, params.max_gradient_norm) grad_norm_summary = [tf.summary.scalar("grad_norm", grad_norm)] grad_norm_summary.append( tf.summary.scalar("clipped_gradient", tf.global_norm(clipped_grads))) self.grad_norm = grad_norm self.update = opt.apply_gradients(zip(clipped_grads, trainables), global_step=self.global_step) # Summary self.train_summary = tf.summary.merge([ tf.summary.scalar("lr", self.learning_rate), tf.summary.scalar("train_loss", self.train_loss), ] + grad_norm_summary) if mode == tf.contrib.learn.ModeKeys.INFER: self.infer_logits, self.sample_id = logits, sample_ids self.infer_summary = tf.no_op() # Saver self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=2) # Print trainable variables if log_trainables: log.print_out("# Trainable variables") for trainable in trainables: log.print_out(" %s, %s, %s" % (trainable.name, str(trainable.get_shape()), trainable.op.device))
def train(self, target_session="", scope=None): assert self.config.num_turns >= 2 if self.config.is_pretrain_enabled(): assert self.config.num_pretrain_turns >= 2 assert self.config.num_turns >= self.config.num_pretrain_turns out_dir = self.config.model_dir steps_per_stats = self.config.steps_per_stats steps_per_eval = 20 * steps_per_stats _helper = self._get_model_helper() self._pre_model_creation() train_model = _helper.create_train_model(self.config, scope) eval_model = _helper.create_eval_model(self.config, scope) infer_model = _helper.create_infer_model(self.config, scope) self._post_model_creation(train_model, eval_model, infer_model) # Preload data for sample decoding. dev_file = self.config.dev_data eval_data = self._load_data(dev_file, include_target=True) summary_name = "train_log" # Log and output files log_file = os.path.join(out_dir, "log_%d" % time.time()) log_f = tf.gfile.GFile(log_file, mode="a") log.print_out("# log_file=%s" % log_file, log_f) self.config.save() log.print_out("# Configs saved") avg_step_time = 0.0 # TensorFlow model config_proto = model_helper.get_config_proto(self.config.log_device) train_sess = tf.Session( target=target_session, config=config_proto, graph=train_model.graph) eval_sess = tf.Session( target=target_session, config=config_proto, graph=eval_model.graph) infer_sess = tf.Session( target=target_session, config=config_proto, graph=infer_model.graph) # Pretraining num_pretrain_steps = 0 if self.config.is_pretrain_enabled(): num_pretrain_steps = self.config.num_pretrain_steps pretrain_model = _helper.create_pretrain_model(self.config, scope) with tf.Session( target=target_session, config=config_proto, graph=pretrain_model.graph) as pretrain_sess: self.pretrain(pretrain_sess, pretrain_model, log_f) with train_model.graph.as_default(): loaded_train_model, global_step = model_helper.create_or_load_model( train_model.model, self.config.model_dir, train_sess, "train") # Summary writer summary_writer = tf.summary.FileWriter( os.path.join(out_dir, summary_name), train_model.graph) last_stats_step = global_step last_eval_step = global_step patience = self.config.patience stats = self.init_stats() speed, train_ppl = 0.0, 0.0 start_train_time = time.time() log.print_out( "# Start step %d, epoch %d, lr %g, %s" % (global_step, self.config.epoch, loaded_train_model.learning_rate.eval(session=train_sess), time.ctime()), log_f) # Initialize all of the iterators skip_count = self.config.batch_size * self.config.epoch_step log.print_out("# Init train iterator for %d steps, skipping %d elements" % (self.config.num_train_steps, skip_count)) train_sess.run( train_model.iterator.initializer, feed_dict={train_model.skip_count_placeholder: skip_count}) while self.config.epoch < self.config.num_train_epochs and patience > 0: ### Run a step ### start_time = time.time() try: step_result = loaded_train_model.train(train_sess) self.config.epoch_step += 1 except tf.errors.OutOfRangeError: # Finished going through the training dataset. Go to next epoch. sw = Stopwatch() self.run_sample_decode(infer_model, infer_sess, self.config.model_dir, summary_writer, eval_data) # if self.config.enable_epoch_evals: # dev_ppl, test_ppl = self.run_full_eval(infer_model, eval_model, # infer_sess, eval_sess, # out_dir, # fs.file_name(self.config.test_data) + '_' + global_step, # summary_writer) # log.print_out( # "%% done epoch %d #%d step %d - dev_ppl: %.2f test_ppl: %.2f @ eval time: %ds" % # (self.config.epoch, self.config.epoch_step, global_step, dev_ppl, test_ppl, sw.elapsed())) # else: log.print_out( "## Done epoch %d in %d steps. step %d @ eval time: %ds" % (self.config.epoch, self.config.epoch_step, global_step, sw.elapsed())) self.config.epoch += 1 self.config.epoch_step = 0 self.config.save() train_sess.run( train_model.iterator.initializer, feed_dict={train_model.skip_count_placeholder: 0}) continue # Write step summary and accumulate statistics global_step = self.update_stats(stats, summary_writer, start_time, step_result) # Once in a while, we print statistics. if global_step - last_stats_step >= steps_per_stats: last_stats_step = global_step train_ppl, speed, is_overflow = self.check_stats(stats, global_step, steps_per_stats, log_f) if is_overflow: break # Reset statistics stats = self.init_stats() if global_step - last_eval_step >= steps_per_eval: last_eval_step = global_step log.print_out("# Save eval, global step %d" % global_step) log.add_summary(summary_writer, global_step, "train_ppl", train_ppl) # Save checkpoint loaded_train_model.saver.save(train_sess, self.config.checkpoint_file, global_step=global_step) # Evaluate on dev self.run_sample_decode(infer_model, infer_sess, out_dir, summary_writer, eval_data) dev_ppl, _ = self.run_internal_eval(eval_model, eval_sess, out_dir, summary_writer, use_test_set=False) if dev_ppl < self.config.best_dev_ppl: self.config.best_dev_ppl = dev_ppl patience = self.config.patience log.print_out(' ** Best model thus far, ep {}|{} dev_ppl {:.3f}'.format( self.config.epoch, self.config.epoch_step, dev_ppl)) elif dev_ppl > self.config.degrade_threshold * self.config.best_dev_ppl: patience -= 1 log.print_out( ' worsened, ep {}|{} patience {} best_dev_ppl {:.3f}'.format( self.config.epoch, self.config.epoch_step, patience, self.config.best_dev_ppl)) # Save config parameters self.config.save() # Done training loaded_train_model.saver.save( train_sess, self.config.checkpoint_file, global_step=global_step) if self.config.enable_final_eval: dev_ppl, test_ppl = self.run_full_eval(infer_model, eval_model, infer_sess, eval_sess, out_dir, fs.file_name(self.config.test_data) + '_final', summary_writer) log.print_out( "# Final, step %d ep %d/%d lr %g " "step-time %.2f wps %.2fK train_ppl %.2f, dev_ppl %.2f, test_ppl %.2f, %s" % (global_step, self.config.epoch, self.config.epoch_step, loaded_train_model.learning_rate.eval(session=train_sess), avg_step_time, speed, train_ppl, dev_ppl, test_ppl, time.ctime()), log_f) else: log.print_out( "# Final, step %d ep %d/%d lr %g " "step-time %.2f wps %.2fK train_ppl %.2f best_dev_ppl %.2f, %s" % (global_step, self.config.epoch, self.config.epoch_step, loaded_train_model.learning_rate.eval(session=train_sess), avg_step_time, speed, train_ppl, self.config.best_dev_ppl, time.ctime()), log_f) log.print_time("# Done training!", start_train_time) summary_writer.close() eval_sess.close() infer_sess.close() train_sess.close()