def train_summary_cl_model( model_save_suffix=model_save_suffixes["train_summary_cl_model"]): assert flags.pretrain_model_dir, "pretrain_model_dir is required" save_model_path = osp.join(flags.save_model_dir, model_save_suffix) pretrained_model_pathes = { "EMBEDDING": osp.join(flags.pretrain_model_dir, model_save_suffixes["train_summary_model"]), "T_S": osp.join(flags.pretrain_model_dir, model_save_suffixes["train_lm_model"]), "SUMMARY": osp.join(flags.pretrain_model_dir, model_save_suffixes["train_summary_model"]), } if flags["adv_type"] == "adv": adv_cl_model = AdversarialSummaryModel() elif flags["adv_type"] == "vir_adv": raise Exception("Unimplement") else: raise Exception("Unknow adv_type: %s" % flags["adv_type"]) adv_cl_model.build(training=True, restorer_tag_notifier=["EMBEDDING", "SUMMARY"]) adv_cl_model.fit(save_model_path=save_model_path, pretrain_model_pathes=pretrained_model_pathes)
def _finish_process(self, sess, coodinator, threads, model_saver, save_model_path, global_step_val, loss_val, best_loss_val): coodinator.request_stop() coodinator.join(threads) if save_model_path is not None: save_best_path, save_steps_path = self._get_save_path( save_model_path) if not self.arguments["save_best"] or self.extra_save_path: logger.info("save model.") model_saver.save(sess, save_steps_path, global_step_val) with open( osp.join(osp.dirname(save_steps_path), "best_loss_records.txt"), "a+") as recordf: recordf.write("step-loss: %s - %s\n" % (global_step_val, loss_val)) if self.arguments["save_best"] or self.extra_save_path: if global_step_val % self.arguments[ "save_best_check_steps"] != 0 and loss_val < best_loss_val: logger.info("save model.") model_saver.save(sess, save_best_path, global_step_val) with open( osp.join(osp.dirname(save_best_path), "best_loss_records.txt"), "a+") as recordf: recordf.write("step-loss: %s - %s\n" % (global_step_val, loss_val))
def _save_model_step(self, sess, model_saver, save_model_path, loss_val, best_loss_val, global_step_val): if save_model_path is not None: save_best_path, save_steps_path = self._get_save_path( save_model_path) # save best if ( self.arguments["save_best"] or self.extra_save_path ) and loss_val < best_loss_val and global_step_val % self.arguments[ "save_best_check_steps"] == 0: logger.info("save best to {}".format(save_best_path)) model_saver.save(sess, save_best_path, global_step_val) best_loss_val = loss_val with open( osp.join(osp.dirname(save_best_path), "best_loss_records.txt"), "a+") as recordf: recordf.write("step-loss: %s - %s\n" % (global_step_val, best_loss_val)) # save model per save_steps if (not self.arguments["save_best"] or self.extra_save_path ) and global_step_val % self.arguments["save_steps"] == 0: logger.info("save model to {}".format(save_steps_path)) model_saver.save(sess, save_steps_path, global_step_val) if loss_val < best_loss_val: best_loss_val = loss_val with open( osp.join(osp.dirname(save_steps_path), "best_loss_records.txt"), "a+") as recordf: recordf.write("step-loss: %s - %s\n" % (global_step_val, best_loss_val)) return best_loss_val
def _summary_step(self, sess, debug_tensors, global_step_val, summary_writer, summary, run_metadata=None, feed_dict=None): if debug_tensors: # note that different batch is used when queue is involved in graph debug_results = sess.run(list(debug_tensors.values()), feed_dict=feed_dict) debug_results = zip(debug_tensors.keys(), debug_results) for key, value in debug_results: logger.info("Debug [%s] eval results: %s" % (key, value)) if self.debug_trace and global_step_val % self.arguments[ "eval_steps"] == 0 and run_metadata: if summary_writer is not None: if isinstance(run_metadata, list): for i, metadata in enumerate(run_metadata): summary_writer.add_run_metadata( metadata, "%d-step-%d" % (i, global_step_val)) else: summary_writer.add_run_metadata( run_metadata, "step-%d" % global_step_val) if self.timeline_dir: if isinstance(run_metadata, list): for i, metadata in enumerate(run_metadata): fetched_timeline = timeline.Timeline( metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format( ) with open( osp.join( self.timeline_dir, '%s_timeline_id_%d_step_%d.json' % (self.model_name, i, global_step_val)), 'w') as f: f.write(chrome_trace) else: fetched_timeline = timeline.Timeline( run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format( ) with open( osp.join( self.timeline_dir, '%s_timeline_step_%d.json' % (self.model_name, global_step_val)), 'w') as f: f.write(chrome_trace) if summary_writer is not None and summary: if isinstance(summary, list): for summary_item in summary: summary_writer.add_summary(summary_item, global_step_val) else: summary_writer.add_summary(summary, global_step_val)
def train_ae_model(model_save_suffix=model_save_suffixes["train_ae_model"]): assert flags.pretrain_model_dir, "pretrain_model_dir is required" save_model_path = osp.join(flags.save_model_dir, model_save_suffix) pretrained_model_path = osp.join(flags.pretrain_model_dir, model_save_suffixes["train_lm_model"]) ae_model = AutoEncoderModel(lock_embedding=True) ae_model.build(use_sampler=not flags["no_loss_sampler"], hard_mode=flags["hard_mode"], forget_bias=flags["forget_bias"]) ae_model.fit(save_model_path=save_model_path, pretrained_model_path=pretrained_model_path)
def train_summary_model( model_save_suffix=model_save_suffixes["train_summary_model"]): save_model_path = osp.join(flags.save_model_dir, model_save_suffix) pretrained_model_path = None if flags.pretrain_model_dir: pretrained_model_path = osp.join(flags.pretrain_model_dir, model_save_suffixes["train_lm_model"]) summary_model = SummaryModel() summary_model.build() summary_model.fit(save_model_path=save_model_path, pretrained_model_path=pretrained_model_path)
def train_lm_model(model_save_suffix=model_save_suffixes["train_lm_model"]): save_model_path = osp.join(flags.save_model_dir, model_save_suffix) lm_model = LanguageModel() lm_model.build(use_sampler=not flags["no_loss_sampler"], hard_mode=flags["hard_mode"], forget_bias=flags["forget_bias"]) lm_model.fit(save_model_path=save_model_path)
def _get_save_path(self, save_model_path): original_save_model_path, filepath = save_model_path.rsplit("/", 1) if self.extra_save_path: extra_save_model_path = osp.join(original_save_model_path, self.extra_save_path) extra_save_model_path = osp.join(extra_save_model_path, filepath) else: extra_save_model_path = None original_save_model_path = osp.join(original_save_model_path, filepath) if self.arguments["save_best"]: save_best_path = original_save_model_path save_steps_path = extra_save_model_path else: save_steps_path = original_save_model_path save_best_path = extra_save_model_path return save_best_path, save_steps_path
def eval_cl_model(): eval_from_vals = ["pretrain_cl", "final_cl"] assert flags.eval_from in eval_from_vals, "eval_from must be one of %s" % eval_from_vals if flags.eval_from == "final_cl": model_save_suffix = model_save_suffixes["train_cl_model"] else: model_save_suffix = model_save_suffixes["pre_train_cl_model"] save_model_path = osp.join(flags.save_model_dir, model_save_suffix) generator_model = AdversarialDDGModel( init_modules=AdversarialDDGModel.eval_cl_modules) generator_model.build(eval_cl=True) generator_model.eval(save_model_path=save_model_path)
def train_generator( model_save_suffix=model_save_suffixes["[no_prefix]train_generator"]): assert flags.pretrain_model_dir, "pretrain_model_dir is required" save_model_path = osp.join(flags.save_model_dir, model_save_suffix) pretrained_model_pathes = { "EMBEDDING": osp.join(flags.pretrain_model_dir, model_save_suffixes["[no_prefix]pre_train_cl_model"]), "FG_S": osp.join(flags.pretrain_model_dir, model_save_suffixes["[no_prefix]train_lm_model"]), "SEQ_G_LSTM_1": osp.join(flags.pretrain_model_dir, model_save_suffixes["train_lm_model"]), "SEQ_G_LSTM_2": osp.join(flags.pretrain_model_dir, model_save_suffixes["train_ae_model"]), "RNN_TO_EMBEDDING": osp.join(flags.pretrain_model_dir, model_save_suffixes["train_lm_model"]), } if flags["adv_type"] == "adv": generator_model = AdversarialDDGModel( init_modules=AdversarialDDGModel.stepA_modules) elif flags["adv_type"] == "vir_adv": generator_model = VirtualAdversarialDDGModel( init_modules=VirtualAdversarialDDGModel.stepA_modules) else: raise Exception("Unknow adv_type: %s" % flags["adv_type"]) generator_model.build(stepA=True, restorer_tag_notifier=["EMBEDDING"]) generator_model.fit(save_model_path=save_model_path, pretrain_model_pathes=pretrained_model_pathes)
def pre_train_cl_model( model_save_suffix=model_save_suffixes["pre_train_cl_model"]): assert flags.pretrain_model_dir, "pretrain_model_dir is required" save_model_path = osp.join(flags.save_model_dir, model_save_suffix) pretrained_model_pathes = { "EMBEDDING": osp.join(flags.pretrain_model_dir, model_save_suffixes["[no_prefix]train_lm_model"]), "T_S": osp.join(flags.pretrain_model_dir, model_save_suffixes["[no_prefix]train_lm_model"]) } if flags["adv_type"] == "adv": adv_cl_model = AdversarialDDGModel( init_modules=AdversarialDDGModel.stepB_modules) elif flags["adv_type"] == "vir_adv": adv_cl_model = VirtualAdversarialDDGModel( init_modules=VirtualAdversarialDDGModel.stepB_modules) else: raise Exception("Unknow adv_type: %s" % flags["adv_type"]) adv_cl_model.build(stepB=True, restorer_tag_notifier=[]) adv_cl_model.fit(save_model_path=save_model_path, pretrain_model_pathes=pretrained_model_pathes)
def eval_summary_model( model_save_suffix=model_save_suffixes["train_summary_model"]): save_model_path = osp.join(flags.save_model_dir, model_save_suffix) read_docs = [] inputs_docs = [] with open(flags.inputs_docs_path, "r", encoding="utf-8") as f: for i in range(1000): doc = f.readline() if doc: read_docs.append(doc) import random for i in range(flags.inputs_docs_batch_size): inputs_docs.append(random.choice(read_docs)) summary_model = SummaryModel() summary_model.eval(inputs_docs=inputs_docs, save_model_path=save_model_path, apply_filter=flags["apply_filter"])
def eval_generator(eval_batch_size=flags["eval_batch_size"], eval_topic_count=flags["eval_topic_count"], eval_seq_length=flags["eval_seq_length"]): eval_from_vals = ["generator", "topic_generator"] assert flags.eval_from in eval_from_vals, "eval_from must be one of %s" % eval_from_vals if flags.eval_from == "generator": model_save_suffix = model_save_suffixes["train_generator"] else: model_save_suffix = model_save_suffixes["train_topic_generator"] save_model_path = osp.join(flags.save_model_dir, model_save_suffix) generator_model = AdversarialDDGModel( init_modules=AdversarialDDGModel.eval_graph_modules) generator_model.build(eval_seq=True, batch_size=eval_batch_size, topic_count=eval_topic_count, seq_length=eval_seq_length) generator_model.eval(save_model_path=save_model_path)
def eval_ae_model(model_save_suffix=model_save_suffixes["train_ae_model"]): save_model_path = osp.join(flags.save_model_dir, model_save_suffix) ae_model = AutoEncoderModel() ae_model.eval(save_model_path=save_model_path)
import sys sys.path.insert(0, ".") from adversarial_net.models import LanguageModel from adversarial_net import arguments as flags from adversarial_net.preprocessing import WordCounter from adversarial_net import osp flags.add_argument( name="save_model_dir", argtype=str, default= "E:/kaggle/avito/imdb_testset/adversarial_net/model/lm_model/lm_model.ckpt" ) if __name__ == "__main__": vocab_freqs = WordCounter().load( osp.join(flags["lm_inputs"]["datapath"], "imdb_word_freqs.pickle")).most_common_freqs( flags["lm_sequence"]["vocab_size"]) flags.add_variable(name="vocab_freqs", value=vocab_freqs) lm_model = LanguageModel() lm_model.build() lm_model.fit(save_model_path=flags["save_model_dir"])
def eval_summary_cl_model(): model_save_suffix = model_save_suffixes["train_summary_cl_model"] save_model_path = osp.join(flags.save_model_dir, model_save_suffix) generator_model = AdversarialSummaryModel() generator_model.build(eval_cl=True) generator_model.eval(save_model_path=save_model_path)
def eval_lm_model(model_save_suffix=model_save_suffixes["train_lm_model"]): save_model_path = osp.join(flags.save_model_dir, model_save_suffix) lm_model = LanguageModel() lm_model.eval(save_model_path=save_model_path)
def run_training(self, train_op, loss, acc=None, feed_dict=None, save_model_path=None, variables_to_restore=None, pretrained_model_path=None, sess=None, debug_mode=False): loss_val = best_loss_val = self.arguments["best_loss_val"] global_step_val = 0 if sess is None: sess = tf.Session() if debug_mode: if save_model_path: dump_root = osp.join(save_model_path, "dump") else: dump_root = self.arguments["tfdebug_root"] sess = tf_debug.LocalCLIDebugWrapperSession(sess, dump_root=dump_root) sess.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) with sess: model_saver, summary_writer, merged_summary, coodinator, threads, current_steps = self._initialize_process( sess, save_model_path) # pretained model restore step self._restore_pretained_variables( sess=sess, pretrained_model_path=pretrained_model_path, save_model_path=save_model_path, variables_to_restore=variables_to_restore) # store model step self._resotre_training_model(sess=sess, save_model_path=save_model_path) max_steps = self.arguments["max_steps"] + current_steps while global_step_val < max_steps: # pre-train phase run_options, run_metadata = self._pretrain_step( global_step_val) start_time = time.time() # train step if self.arguments["use_exp_mov_avg_loss"]: ops = [ train_op, loss, self.global_step, merged_summary, self.exp_mov_avg_loss ] else: ops = [train_op, loss, self.global_step, merged_summary] return_cache = self._train_step(sess=sess, ops=ops, acc=acc, feed_dict=feed_dict, run_options=run_options, run_metadata=run_metadata) if self.arguments["use_exp_mov_avg_loss"]: loss_val, global_step_val, summary, exp_mov_avg_loss_val, acc_val = return_cache loss_val = exp_mov_avg_loss_val else: loss_val, global_step_val, summary, acc_val = return_cache duration = time.time() - start_time # summary & debug trace phase self._summary_step(sess=sess, debug_tensors=self.debug_tensors, global_step_val=global_step_val, summary_writer=summary_writer, summary=summary, run_metadata=run_metadata, feed_dict=feed_dict) # Logging self._eval_step(global_step_val, max_steps, loss_val, acc_val, duration) # save model if could best_loss_val = self._save_model_step(sess, model_saver, save_model_path, loss_val, best_loss_val, global_step_val) self._finish_process(sess, coodinator, threads, model_saver, save_model_path, global_step_val, loss_val, best_loss_val)
def eval_summary_cl_model(): model_save_suffix = model_save_suffixes["train_summary_cl_model"] save_model_path = osp.join(flags.save_model_dir, model_save_suffix) generator_model = AdversarialSummaryModel() generator_model.build(eval_cl=True) generator_model.eval(save_model_path=save_model_path) # intersection count between classi word_freqs and summary word_freqs: {10000: 9652, 20000: 18673, 30000: 26590, 40000: 33259, 50000: 38737, 60000: 43262, 70000: 46964, 80000: 49788, 86934: 51515} if __name__ == "__main__": if flags.step == "train_summary_model" or flags.step == "eval_summary_model": inersect_count = [] vocab_freqs = WordCounter().load_and_merge( osp.join(flags["lm_inputs"]["datapath"], "%s_word_freqs.pickle" % flags["lm_inputs"]["dataset"]), osp.join(flags["lm_inputs"]["datapath"], "summary_word_freqs.pickle"), max_words=list(range(0, flags["inputs"]["vocab_size"], 10000))[1:] + [flags["inputs"]["vocab_size"]], return_cache=inersect_count).most_common_freqs( flags["lm_sequence"]["vocab_size"]) inersect_count = inersect_count[0] logger.info( "intersection count between classi word_freqs and summary word_freqs: %s" % inersect_count) else: vocab_freqs = WordCounter().load( osp.join(flags["lm_inputs"]["datapath"], "%s_word_freqs.pickle" % flags["lm_inputs"]["dataset"])).most_common_freqs( flags["lm_sequence"]["vocab_size"])