def load(self): run_config = t2t_trainer.create_run_config(self.hp) self.hp.add_hparam("model_dir", run_config.model_dir) self.estimator = trainer_lib.create_estimator( self.model, self.hp, run_config, decode_hparams=self.decode_hp, use_tpu=self.use_tpu) self.estimator_predictor = tf.contrib.predictor.from_estimator( self.estimator, self.input_fn, config=tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)) FLAGS.problem = "translate_enfr_wmt32k_rev" self.problem = "translate_enfr_wmt32k_rev" self.problem_name = self.problem FLAGS.checkpoint_path = os.path.join( os.getcwd(), "checkpoints/fren/model.ckpt-500000") run_config = t2t_trainer.create_run_config(self.hp) self.hp.model_dir = run_config.model_dir self.estimator = trainer_lib.create_estimator( self.model, self.hp, run_config, decode_hparams=self.decode_hp, use_tpu=self.use_tpu) self.estimator_decoder_predictor = tf.contrib.predictor.from_estimator( self.estimator, self.input_fn, config=tf.ConfigProto(log_device_placement=True, allow_soft_placement=True))
def main(argv): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) t2t_trainer.maybe_log_registry_and_exit() if FLAGS.cloud_mlengine: cloud_mlengine.launch() return if FLAGS.generate_data: t2t_trainer.generate_data() if cloud_mlengine.job_dir(): FLAGS.output_dir = cloud_mlengine.job_dir() if argv: t2t_trainer.set_hparams_from_args(argv[1:]) root_output_dir = FLAGS.output_dir if FLAGS.teacher_dir: teacher_dir = FLAGS.teacher_dir else: teacher_dir = os.path.join(root_output_dir, "teacher") # Train Teacher ============ if FLAGS.skip_teacher_training: tf.logging.info("training teacher skipped") else: hparams = t2t_trainer.create_hparams() hparams.distill_phase = "train" FLAGS.output_dir = teacher_dir exp_fn = t2t_trainer.create_experiment_fn() run_config = t2t_trainer.create_run_config(hparams) exp = exp_fn(run_config, hparams) if t2t_trainer.is_chief(): t2t_trainer.save_metadata(hparams) t2t_trainer.execute_schedule(exp) # ========================== # Train Student ============ hparams = t2t_trainer.create_hparams() hparams.add_hparam("teacher_dir", teacher_dir) hparams.distill_phase = "distill" if FLAGS.student_dir: student_dir = FLAGS.student_dir else: student_dir = os.path.join(root_output_dir, "student") FLAGS.output_dir = student_dir hparams.add_hparam("student_dir", student_dir) exp_fn = t2t_trainer.create_experiment_fn() run_config = t2t_trainer.create_run_config(hparams) exp = exp_fn(run_config, hparams) if t2t_trainer.is_chief(): t2t_trainer.save_metadata(hparams) t2t_trainer.execute_schedule(exp)
def main(argv): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) t2t_trainer.maybe_log_registry_and_exit() if FLAGS.cloud_mlengine: cloud_mlengine.launch() return if FLAGS.generate_data: t2t_trainer.generate_data() if cloud_mlengine.job_dir(): FLAGS.output_dir = cloud_mlengine.job_dir() if argv: t2t_trainer.set_hparams_from_args(argv[1:]) with t2t_trainer.maybe_cloud_tpu(): root_output_dir = FLAGS.output_dir # Train Teacher ============ hparams = t2t_trainer.create_hparams() hparams.distill_phase = "train" teacher_dir = os.path.join(root_output_dir, "teacher") FLAGS.output_dir = teacher_dir exp_fn = t2t_trainer.create_experiment_fn() run_config = t2t_trainer.create_run_config(hparams) exp = exp_fn(run_config, hparams) if t2t_trainer.is_chief(): t2t_trainer.save_metadata(hparams) t2t_trainer.execute_schedule(exp) # ========================== # Train Student ============ hparams = t2t_trainer.create_hparams() hparams.add_hparam("teacher_dir", teacher_dir) hparams.distill_phase = "distill" student_dir = os.path.join(root_output_dir, "student") FLAGS.output_dir = student_dir exp_fn = t2t_trainer.create_experiment_fn() run_config = t2t_trainer.create_run_config(hparams) exp = exp_fn(run_config, hparams) if t2t_trainer.is_chief(): t2t_trainer.save_metadata(hparams) t2t_trainer.execute_schedule(exp)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) ckpt_dir = os.path.expanduser(FLAGS.output_dir) hparams = create_hparams() hparams.no_data_parallelism = True # To clear the devices problem = hparams.problem if FLAGS.export_as_tfhub: export_as_tfhub_module(hparams, problem, ckpt_dir) return run_config = t2t_trainer.create_run_config(hparams) estimator = create_estimator(run_config, hparams) exporter = tf.estimator.FinalExporter( "exporter", lambda: problem.serving_input_fn(hparams), as_text=True) export_dir = os.path.join(ckpt_dir, "export") exporter.export(estimator, export_dir, checkpoint_path=tf.train.latest_checkpoint(ckpt_dir), eval_result=None, is_the_final_export=True)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) if FLAGS.score_file: filename = os.path.expanduser(FLAGS.score_file) if not tf.gfile.Exists(filename): raise ValueError("The file to score doesn't exist: %s" % filename) results = score_file(filename) if not FLAGS.decode_to_file: raise ValueError("To score a file, specify --decode_to_file for results.") write_file = tf.gfile.Open(os.path.expanduser(FLAGS.decode_to_file), "w") for score in results: write_file.write("%.6f\n" % score) write_file.close() return hp = create_hparams() decode_hp = create_decode_hparams() estimator = trainer_lib.create_estimator( FLAGS.model, hp, t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=FLAGS.use_tpu) decode(estimator, hp, decode_hp)
def _init_env(self): FLAGS.use_tpu = False tf.logging.set_verbosity(tf.logging.DEBUG) tf.logging.info("Import usr dir from %s", self._usr_dir) if self._usr_dir != None: usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) tf.logging.info("Start to create hparams,for %s of %s", self._problem, self._hparams_set) self._hparams = create_hparams() self._hparams_decode = create_decode_hparams( extra_length=self._extra_length, batch_size=self._batch_size, beam_size=self._beam_size, alpha=self._alpha, return_beams=self._return_beams, write_beam_scores=self._write_beam_scores) self.estimator = trainer_lib.create_estimator( FLAGS.model, self._hparams, t2t_trainer.create_run_config(self._hparams), decode_hparams=self._hparams_decode, use_tpu=False) tf.logging.info("Finish intialize environment")
def t2t_decoder(problem_name, data_dir, decode_from_file, decode_to_file, checkpoint_path): trainer_lib.set_random_seed(FLAGS.random_seed) hp = trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams, data_dir=os.path.expanduser(data_dir), problem_name=problem_name) decode_hp = decoding.decode_hparams(FLAGS.decode_hparams) decode_hp.shards = FLAGS.decode_shards decode_hp.shard_id = FLAGS.worker_id decode_in_memory = FLAGS.decode_in_memory or decode_hp.decode_in_memory decode_hp.decode_in_memory = decode_in_memory decode_hp.decode_to_file = decode_to_file decode_hp.decode_reference = None FLAGS.checkpoint_path = checkpoint_path estimator = trainer_lib.create_estimator(FLAGS.model, hp, t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=FLAGS.use_tpu) decode_from_text_file(estimator, problem_name, decode_from_file, hp, decode_hp, decode_to_file, checkpoint_path=checkpoint_path)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) if FLAGS.score_file: filename = os.path.expanduser(FLAGS.score_file) if not tf.gfile.Exists(filename): raise ValueError("The file to score doesn't exist: %s" % filename) results = score_file(filename) # if not FLAGS.decode_to_file: # raise ValueError("To score a file, specify --decode_to_file for results.") # write_file = tf.gfile.Open(os.path.expanduser(FLAGS.decode_to_file), "w") # for sentence, score in results: # write_file.write(sentence + "\t" + "SCORE:" + "%.6f\n" % score) # write_file.close() return hp = create_hparams() decode_hp = create_decode_hparams() run_config = t2t_trainer.create_run_config(hp) if FLAGS.disable_grappler_optimizations: run_config.session_config.graph_options.rewrite_options.disable_meta_optimizer = True # summary-hook in tf.estimator.EstimatorSpec requires # hparams.model_dir to be set. hp.add_hparam("model_dir", run_config.model_dir) estimator = trainer_lib.create_estimator(FLAGS.model, hp, run_config, decode_hparams=decode_hp, use_tpu=FLAGS.use_tpu) decode(estimator, hp, decode_hp)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) if FLAGS.score_file: filename = os.path.expanduser(FLAGS.score_file) if not tf.gfile.Exists(filename): raise ValueError("The file to score doesn't exist: %s" % filename) results = score_file(filename) if not FLAGS.decode_to_file: raise ValueError( "To score a file, specify --decode_to_file for results.") write_file = tf.gfile.Open(os.path.expanduser(FLAGS.decode_to_file), "w") for score in results: write_file.write("%.6f\n" % score) write_file.close() return hp = create_hparams() decode_hp = create_decode_hparams() estimator = trainer_lib.create_estimator(FLAGS.model, hp, t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=FLAGS.use_tpu) decode(estimator, hp, decode_hp)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) if FLAGS.checkpoint_path: checkpoint_path = FLAGS.checkpoint_path ckpt_dir = os.path.dirname(checkpoint_path) else: ckpt_dir = os.path.expanduser(FLAGS.output_dir) checkpoint_path = tf.train.latest_checkpoint(ckpt_dir) hparams = create_hparams() hparams.no_data_parallelism = True # To clear the devices problem = hparams.problem export_dir = FLAGS.export_dir or os.path.join(ckpt_dir, "export") if FLAGS.export_as_tfhub: checkpoint_path = tf.train.latest_checkpoint(ckpt_dir) decode_hparams = decoding.decode_hparams(FLAGS.decode_hparams) export_as_tfhub_module(FLAGS.model, hparams, decode_hparams, problem, checkpoint_path, export_dir) return run_config = t2t_trainer.create_run_config(hparams) estimator = create_estimator(run_config, hparams) estimator.export_savedmodel(export_dir, lambda: problem.serving_input_fn(hparams), as_text=False, checkpoint_path=checkpoint_path)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) hparams = trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams, data_dir=FLAGS.data_dir, problem_name=FLAGS.problem) # set appropriate dataset-split, if flags.eval_use_test_set. dataset_split = "test" if FLAGS.eval_use_test_set else None dataset_kwargs = {"dataset_split": dataset_split} eval_input_fn = hparams.problem.make_estimator_input_fn( tf.estimator.ModeKeys.EVAL, hparams, dataset_kwargs=dataset_kwargs) config = t2t_trainer.create_run_config(hparams) # summary-hook in tf.estimator.EstimatorSpec requires # hparams.model_dir to be set. hparams.add_hparam("model_dir", config.model_dir) estimator = trainer_lib.create_estimator(FLAGS.model, hparams, config, use_tpu=FLAGS.use_tpu) ckpt_iter = trainer_lib.next_checkpoint(hparams.model_dir, FLAGS.eval_timeout_mins) for ckpt_path in ckpt_iter: predictions = estimator.evaluate(eval_input_fn, steps=FLAGS.eval_steps, checkpoint_path=ckpt_path) tf.logging.info(predictions)
def create_new_estimator(hp,decode_hp): estimator = trainer_lib.create_estimator( FLAGS.model, hp, t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=FLAGS.use_tpu) return estimator
def main(argv): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) t2t_trainer.maybe_log_registry_and_exit() if FLAGS.generate_data: t2t_trainer.generate_data() if argv: t2t_trainer.set_hparams_from_args(argv[1:]) hparams = t2t_trainer.create_hparams() trainer_lib.add_problem_hparams(hparams, FLAGS.problem) pruning_params = create_pruning_params() pruning_strategy = create_pruning_strategy(pruning_params.strategy) config = t2t_trainer.create_run_config(hparams) params = {"batch_size": hparams.batch_size} # add "_rev" as a hack to avoid image standardization problem = registry.problem(FLAGS.problem) input_fn = problem.make_estimator_input_fn(tf.estimator.ModeKeys.EVAL, hparams) dataset = input_fn(params, config).repeat() features, labels = dataset.make_one_shot_iterator().get_next() sess = tf.Session() model_fn = t2t_model.T2TModel.make_estimator_model_fn( FLAGS.model, hparams, use_tpu=FLAGS.use_tpu) spec = model_fn( features, labels, tf.estimator.ModeKeys.EVAL, params=hparams, config=config) # Restore weights saver = tf.train.Saver() checkpoint_path = os.path.expanduser(FLAGS.output_dir or FLAGS.checkpoint_path) saver.restore(sess, tf.train.latest_checkpoint(checkpoint_path)) def eval_model(): preds = spec.predictions["predictions"] preds = tf.argmax(preds, -1, output_type=labels.dtype) _, acc_update_op = tf.metrics.accuracy(labels=labels, predictions=preds) sess.run(tf.initialize_local_variables()) for _ in range(FLAGS.eval_steps): acc = sess.run(acc_update_op) return acc pruning_utils.sparsify(sess, eval_model, pruning_strategy, pruning_params)
def main(_): FLAGS.decode_interactive = True hp = create_hparams() decode_hp = create_decode_hparams() estimator = trainer_lib.create_estimator(FLAGS.model, hp, t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=False) decode(estimator, hp, decode_hp)
def main(_): import ipdb tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) if FLAGS.score_file: filename = os.path.expanduser(FLAGS.score_file) if not tf.gfile.Exists(filename): raise ValueError("The file to score doesn't exist: %s" % filename) results = score_file(filename) if not FLAGS.decode_to_file: raise ValueError("To score a file, specify --decode_to_file for results.") write_file = tf.gfile.Open(os.path.expanduser(FLAGS.decode_to_file), "w") for score in results: write_file.write("%.6f\n" % score) write_file.close() return hp = create_hparams() decode_hp = create_decode_hparams() # eval_input_fn = hp.problem.make_estimator_input_fn( # tf.estimator.ModeKeys.TRAIN, hp, dataset_kwargs={"dataset_split": "eval"}) # print(eval_input_fn) # for foo in eval_input_fn(None, None): # print(type(foo[0]['targets'])) # print(foo[0]['targets'].numpy()) # exit() run_config = t2t_trainer.create_run_config(hp) if FLAGS.disable_grappler_optimizations: run_config.session_config.graph_options.rewrite_options.disable_meta_optimizer = True # summary-hook in tf.estimator.EstimatorSpec requires # hparams.model_dir to be set. hp.add_hparam("model_dir", run_config.model_dir) estimator = trainer_lib.create_estimator( FLAGS.model, hp, run_config, decode_hparams=decode_hp, use_tpu=FLAGS.use_tpu) decode(estimator, hp, decode_hp)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) if FLAGS.score_file: filename = os.path.expanduser(FLAGS.score_file) if not tf.gfile.Exists(filename): raise ValueError("The file to score doesn't exist: %s" % filename) results = score_file(filename) if not FLAGS.decode_to_file: raise ValueError( "To score a file, specify --decode_to_file for results.") write_file = tf.gfile.Open(os.path.expanduser(FLAGS.decode_to_file), "w") for score in results: write_file.write("%.6f\n" % score) write_file.close() return hp = create_hparams() decode_hp = create_decode_hparams() estimator = trainer_lib.create_estimator(FLAGS.model, hp, t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=FLAGS.use_tpu) decode(estimator, hp, decode_hp) # Post-process decodings (if necessary). if FLAGS.decode_to_file and FLAGS.output_line_prefix_tag: decode_filename_original = FLAGS.decode_to_file decode_filename_prefixed = "%s-%s" % (decode_filename_original, FLAGS.output_line_prefix_tag) tf.logging.info("Writing prefexed decodes into %s" % decode_filename_prefixed) # Read original lines. with tf.gfile.Open(decode_filename_original, "r") as original_fp: original_lines = original_fp.readlines() # Write prefixed lines. prefix = "<%s> " % FLAGS.output_line_prefix_tag prefixed_fp = tf.gfile.Open(decode_filename_prefixed, "w") for line in original_lines: prefixed_fp.write(prefix + line) prefixed_fp.flush() prefixed_fp.close() tf.logging.info("Done.")
def main(_): tf.logging.set_verbosity(tf.logging.INFO) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) FLAGS.use_tpu = False # decoding not supported on TPU hp = create_hparams() decode_hp = create_decode_hparams() estimator = trainer_lib.create_estimator(FLAGS.model, hp, t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=False) decode(estimator, hp, decode_hp)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) hp = t2t_decoder.create_hparams() decode_hp = t2t_decoder.create_decode_hparams() estimator = trainer_lib.create_estimator(FLAGS.model, hp, t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=FLAGS.use_tpu) decode(estimator, hp, decode_hp)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) FLAGS.use_tpu = False # decoding not supported on TPU hp = create_hparams() decode_hp = create_decode_hparams() estimator = trainer_lib.create_estimator( FLAGS.model, hp, t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=False) decode(estimator, hp, decode_hp)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) # Fathom start checkpoint_path = fathom_t2t_model_setup() # Fathom end usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) if FLAGS.score_file: filename = os.path.expanduser(FLAGS.score_file) if not tf.gfile.Exists(filename): raise ValueError("The file to score doesn't exist: %s" % filename) results = score_file(filename) if not FLAGS.decode_to_file: raise ValueError( "To score a file, specify --decode_to_file for results.") write_file = tf.gfile.Open(os.path.expanduser(FLAGS.decode_to_file), "w") for score in results: write_file.write("%.6f\n" % score) write_file.close() return hp = create_hparams() decode_hp = create_decode_hparams() estimator = trainer_lib.create_estimator(FLAGS.model, hp, t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=FLAGS.use_tpu) decode(estimator, hp, decode_hp) # Fathom # This xcom is here so that tasks after decode know the local path to the # downloaded model. Train does this same xcom echo. # Decode, predict, and evaluate code should # converge to use the same fathom_t2t_model_setup. # TODO: since the truncation-boundary xcom value should be available in # the hparams_set, we should probably have consumers access this via a # SavedModel.hparams property rather than XCOM echo_yaml_for_xcom_ingest({ 'output-dir': os.path.dirname(checkpoint_path), 'output-file': FLAGS.decode_output_file, 'truncation-boundary': hp.max_input_seq_length })
def __init__(self, processor_configuration): """Creates the Transformer estimator. Args: processor_configuration: A ProcessorConfiguration protobuffer with the transformer fields populated. """ # Do the pre-setup tensor2tensor requires for flags and configurations. transformer_config = processor_configuration["transformer"] FLAGS.output_dir = transformer_config["model_dir"] usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) data_dir = os.path.expanduser(transformer_config["data_dir"]) # Create the basic hyper parameters. self.hparams = trainer_lib.create_hparams( transformer_config["hparams_set"], transformer_config["hparams"], data_dir=data_dir, problem_name=transformer_config["problem"]) decode_hp = decoding.decode_hparams() decode_hp.add_hparam("shards", 1) decode_hp.add_hparam("shard_id", 0) # Create the estimator and final hyper parameters. self.estimator = trainer_lib.create_estimator( transformer_config["model"], self.hparams, t2t_trainer.create_run_config(self.hparams), decode_hparams=decode_hp, use_tpu=False) # Fetch the vocabulary and other helpful variables for decoding. self.source_vocab = self.hparams.problem_hparams.vocabulary["inputs"] self.targets_vocab = self.hparams.problem_hparams.vocabulary["targets"] self.const_array_size = 10000 # Prepare the Transformer's debug data directory. run_dirs = sorted( glob.glob(os.path.join("/tmp/t2t_server_dump", "run_*"))) for run_dir in run_dirs: shutil.rmtree(run_dir)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) ckpt_dir = os.path.expanduser(FLAGS.output_dir) hparams = create_hparams() hparams.no_data_parallelism = True # To clear the devices run_config = t2t_trainer.create_run_config(hparams) estimator = create_estimator(run_config, hparams) problem = hparams.problem strategy = trainer_lib.create_export_strategy(problem, hparams) export_dir = os.path.join(ckpt_dir, "export", strategy.name) strategy.export(estimator, export_dir, checkpoint_path=tf.train.latest_checkpoint(ckpt_dir))
def __init__(self, processor_configuration): """Creates the Transformer estimator. Args: processor_configuration: A ProcessorConfiguration protobuffer with the transformer fields populated. """ # Do the pre-setup tensor2tensor requires for flags and configurations. transformer_config = processor_configuration["transformer"] FLAGS.output_dir = transformer_config["model_dir"] usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) data_dir = os.path.expanduser(transformer_config["data_dir"]) # Create the basic hyper parameters. self.hparams = trainer_lib.create_hparams( transformer_config["hparams_set"], transformer_config["hparams"], data_dir=data_dir, problem_name=transformer_config["problem"]) decode_hp = decoding.decode_hparams() decode_hp.add_hparam("shards", 1) decode_hp.add_hparam("shard_id", 0) # Create the estimator and final hyper parameters. self.estimator = trainer_lib.create_estimator( transformer_config["model"], self.hparams, t2t_trainer.create_run_config(self.hparams), decode_hparams=decode_hp, use_tpu=False) # Fetch the vocabulary and other helpful variables for decoding. self.source_vocab = self.hparams.problem_hparams.vocabulary["inputs"] self.targets_vocab = self.hparams.problem_hparams.vocabulary["targets"] self.const_array_size = 10000 # Prepare the Transformer's debug data directory. run_dirs = sorted(glob.glob(os.path.join("/tmp/t2t_server_dump", "run_*"))) for run_dir in run_dirs: shutil.rmtree(run_dir)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) if FLAGS.checkpoint_path: checkpoint_path = FLAGS.checkpoint_path ckpt_dir = os.path.dirname(checkpoint_path) else: ckpt_dir = os.path.expanduser(FLAGS.output_dir) checkpoint_path = tf.train.latest_checkpoint(ckpt_dir) hparams = create_hparams() hparams.no_data_parallelism = True # To clear the devices problem = hparams.problem export_dir = FLAGS.export_dir or os.path.join(ckpt_dir, "export") if FLAGS.export_as_tfhub: checkpoint_path = tf.train.latest_checkpoint(ckpt_dir) decode_hparams = decoding.decode_hparams(FLAGS.decode_hparams) export_as_tfhub_module(FLAGS.model, hparams, decode_hparams, problem, checkpoint_path, export_dir) return run_config = t2t_trainer.create_run_config(hparams) estimator = create_estimator(run_config, hparams) exporter = tf.estimator.FinalExporter( "exporter", lambda: problem.serving_input_fn(hparams), as_text=True) exporter.export( estimator, export_dir, checkpoint_path=checkpoint_path, eval_result=None, is_the_final_export=True)
def create_hp_and_estimator(problem_name, data_dir, checkpoint_path): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) hp = trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams, data_dir=os.path.expanduser(data_dir), problem_name=problem_name) decode_hp = decoding.decode_hparams(FLAGS.decode_hparams) decode_hp.shards = FLAGS.decode_shards decode_hp.shard_id = FLAGS.worker_id decode_in_memory = FLAGS.decode_in_memory or decode_hp.decode_in_memory decode_hp.decode_in_memory = decode_in_memory decode_hp.decode_to_file = None decode_hp.decode_reference = None FLAGS.checkpoint_path = checkpoint_path estimator = trainer_lib.create_estimator(FLAGS.model, hp, t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=FLAGS.use_tpu) return hp, decode_hp, estimator
def main(argv): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) t2t_trainer.maybe_log_registry_and_exit() if FLAGS.cloud_mlengine: cloud_mlengine.launch() return if FLAGS.generate_data: t2t_trainer.generate_data() if cloud_mlengine.job_dir(): FLAGS.output_dir = cloud_mlengine.job_dir() if argv: t2t_trainer.set_hparams_from_args(argv[1:]) if FLAGS.surrogate_attack: tf.logging.warn("Performing surrogate model attack.") sur_hparams = create_surrogate_hparams() trainer_lib.add_problem_hparams(sur_hparams, FLAGS.problem) hparams = t2t_trainer.create_hparams() trainer_lib.add_problem_hparams(hparams, FLAGS.problem) attack_params = create_attack_params() attack_params.add_hparam(attack_params.epsilon_name, 0.0) if FLAGS.surrogate_attack: sur_config = create_surrogate_run_config(sur_hparams) config = t2t_trainer.create_run_config(hparams) params = { "batch_size": hparams.batch_size, "use_tpu": FLAGS.use_tpu, } # add "_rev" as a hack to avoid image standardization problem = registry.problem(FLAGS.problem + "_rev") inputs, labels, features = prepare_data(problem, hparams, params, config) sess = tf.Session() if FLAGS.surrogate_attack: sur_model_fn = t2t_model.T2TModel.make_estimator_model_fn( FLAGS.surrogate_model, sur_hparams, use_tpu=FLAGS.use_tpu) sur_ch_model = adv_attack_utils.T2TAttackModel( sur_model_fn, features, params, sur_config, scope="surrogate") # Dummy call to construct graph sur_ch_model.get_probs(inputs) checkpoint_path = os.path.expanduser(FLAGS.surrogate_output_dir) tf.contrib.framework.init_from_checkpoint( tf.train.latest_checkpoint(checkpoint_path), {"/": "surrogate/"}) sess.run(tf.global_variables_initializer()) other_vars = set(tf.global_variables()) model_fn = t2t_model.T2TModel.make_estimator_model_fn( FLAGS.model, hparams) ch_model = adv_attack_utils.T2TAttackModel(model_fn, features, params, config) acc_mask = None probs = ch_model.get_probs(inputs) if FLAGS.ignore_incorrect: preds = tf.argmax(probs, -1, output_type=labels.dtype) preds = tf.reshape(preds, labels.shape) acc_mask = tf.to_float(tf.equal(labels, preds)) one_hot_labels = tf.one_hot(labels, probs.shape[-1]) if FLAGS.surrogate_attack: attack = create_attack(attack_params.attack)(sur_ch_model, sess=sess) else: attack = create_attack(attack_params.attack)(ch_model, sess=sess) new_vars = set(tf.global_variables()) - other_vars # Restore weights saver = tf.train.Saver(new_vars) checkpoint_path = os.path.expanduser(FLAGS.output_dir) saver.restore(sess, tf.train.latest_checkpoint(checkpoint_path)) # reuse variables tf.get_variable_scope().reuse_variables() def compute_accuracy(x, l, mask): """Compute model accuracy.""" preds = ch_model.get_probs(x) preds = tf.squeeze(preds) preds = tf.argmax(preds, -1, output_type=l.dtype) _, acc_update_op = tf.metrics.accuracy(l, preds, weights=mask) if FLAGS.surrogate_attack: preds = sur_ch_model.get_probs(x) preds = tf.squeeze(preds) preds = tf.argmax(preds, -1, output_type=l.dtype) acc_update_op = tf.tuple((acc_update_op, tf.metrics.accuracy(l, preds, weights=mask)[1])) sess.run(tf.initialize_local_variables()) for i in range(FLAGS.eval_steps): tf.logging.info( "\tEvaluating batch [%d / %d]" % (i + 1, FLAGS.eval_steps)) acc = sess.run(acc_update_op) if FLAGS.surrogate_attack: tf.logging.info("\tFinal acc: (%.4f, %.4f)" % (acc[0], acc[1])) else: tf.logging.info("\tFinal acc: %.4f" % acc) return acc epsilon_acc_pairs = [] for epsilon in attack_params.attack_epsilons: tf.logging.info("Attacking @ eps=%.4f" % epsilon) attack_params.set_hparam(attack_params.epsilon_name, epsilon) adv_x = attack.generate(inputs, y=one_hot_labels, **attack_params.values()) acc = compute_accuracy(adv_x, labels, acc_mask) epsilon_acc_pairs.append((epsilon, acc)) for epsilon, acc in epsilon_acc_pairs: if FLAGS.surrogate_attack: tf.logging.info( "Accuracy @ eps=%.4f: (%.4f, %.4f)" % (epsilon, acc[0], acc[1])) else: tf.logging.info("Accuracy @ eps=%.4f: %.4f" % (epsilon, acc))
def main(argv): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) t2t_trainer.maybe_log_registry_and_exit() if FLAGS.cloud_mlengine: cloud_mlengine.launch() return if FLAGS.generate_data: t2t_trainer.generate_data() if cloud_mlengine.job_dir(): FLAGS.output_dir = cloud_mlengine.job_dir() if argv: t2t_trainer.set_hparams_from_args(argv[1:]) hparams = t2t_trainer.create_hparams() trainer_lib.add_problem_hparams(hparams, FLAGS.problem) attack_params = create_attack_params() attack_params.add_hparam("eps", 0.0) config = t2t_trainer.create_run_config(hparams) params = {"batch_size": hparams.batch_size} # add "_rev" as a hack to avoid image standardization problem = registry.problem(FLAGS.problem + "_rev") input_fn = problem.make_estimator_input_fn( tf.estimator.ModeKeys.EVAL, hparams) dataset = input_fn(params, config).repeat() features, _ = dataset.make_one_shot_iterator().get_next() inputs, labels = features["targets"], features["inputs"] inputs = tf.to_float(inputs) labels = tf.squeeze(labels) sess = tf.Session() model_fn = t2t_model.T2TModel.make_estimator_model_fn( FLAGS.model, hparams, use_tpu=FLAGS.use_tpu) ch_model = adv_attack_utils.T2TAttackModel(model_fn, params, config) acc_mask = None probs = ch_model.get_probs(inputs) if FLAGS.ignore_incorrect: preds = tf.argmax(probs, -1) preds = tf.squeeze(preds) acc_mask = tf.to_float(tf.equal(labels, preds)) one_hot_labels = tf.one_hot(labels, probs.shape[-1]) attack = create_attack(attack_params.attack)(ch_model, sess=sess) # Restore weights saver = tf.train.Saver() checkpoint_path = os.path.expanduser(FLAGS.output_dir or FLAGS.checkpoint_path) saver.restore(sess, tf.train.latest_checkpoint(checkpoint_path)) # reuse variables tf.get_variable_scope().reuse_variables() def compute_accuracy(x, labels, mask): preds = ch_model.get_probs(x) preds = tf.squeeze(preds) preds = tf.argmax(preds, -1, output_type=labels.dtype) _, acc_update_op = tf.metrics.accuracy( labels=labels, predictions=preds, weights=mask) sess.run(tf.initialize_local_variables()) for _ in range(FLAGS.eval_steps): acc = sess.run(acc_update_op) return acc acc = compute_accuracy(inputs, labels, acc_mask) epsilon_acc_pairs = [(0.0, acc)] for epsilon in attack_params.attack_epsilons: attack_params.eps = epsilon adv_x = attack.generate(inputs, y=one_hot_labels, **attack_params.values()) acc = compute_accuracy(adv_x, labels, acc_mask) epsilon_acc_pairs.append((epsilon, acc)) for epsilon, acc in epsilon_acc_pairs: tf.logging.info("Accuracy @ eps=%f: %f" % (epsilon, acc))
def _init_env(self): FLAGS.use_tpu = False tf.logging.set_verbosity(tf.logging.DEBUG) tf.logging.info("Import usr dir from %s", self._usr_dir) if self._usr_dir != None: usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) tf.logging.info("Start to create hparams,for %s of %s", self._problem, self._hparams_set) self._hparams = create_hparams() self._hparams_decode = create_decode_hparams( extra_length=self._extra_length, batch_size=self._batch_size, beam_size=self._beam_size, alpha=self._alpha, return_beams=self._return_beams, write_beam_scores=self._write_beam_scores, force_decode_length=self._force_decode_length) self.estimator = trainer_lib.create_estimator( FLAGS.model, self._hparams, t2t_trainer.create_run_config(self._hparams), decode_hparams=self._hparams_decode, use_tpu=False) tf.logging.info("Finish intialize environment") ####### ### make input placeholder #self._inputs_ph = tf.placeholder(dtype=tf.int32) # shape not specified,any shape # x=tf.placeholder(dtype=tf.int32) # x.set_shape([None, None]) # ? -> (?,?) # x = tf.expand_dims(x, axis=[2])# -> (?,?,1) # x = tf.to_int32(x) # self._inputs_ph=x #batch_inputs = tf.reshape(self._inputs_ph, [self._batch_size, -1, 1, 1]) #batch_inputs=x ### # batch_inputs = tf.reshape(self._inputs_ph, [-1, -1, 1, 1]) #targets_ph = tf.placeholder(dtype=tf.int32) #batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) self.inputs_ph = tf.placeholder(tf.int32, shape=(None, None, 1, 1), name='inputs') self.targets_ph = tf.placeholder(tf.int32, shape=(None, None, None, None), name='targets') self.input_extra_length_ph = tf.placeholder(dtype=tf.int32, shape=[]) self._features = { "inputs": self.inputs_ph, "problem_choice": 0, # We run on the first problem here. "input_space_id": self._hparams.problem_hparams.input_space_id, "target_space_id": self._hparams.problem_hparams.target_space_id } ### 加入 decode length 变长的 self._features['decode_length'] = self.input_extra_length_ph ## target self._features['targets'] = self.targets_ph ## 去掉 整数的 del self._features["problem_choice"] del self._features["input_space_id"] del self._features["target_space_id"] #del self._features['decode_length'] #### mode = tf.estimator.ModeKeys.EVAL translate_model = registry.model(self._model_name)( hparams=self._hparams, decode_hparams=self._hparams_decode, mode=mode) self.predict_dict = {} ### get logit ,attention mats self.logits, _ = translate_model(self._features) #[? ? ? 1 vocabsz] #translate_model(features) from visualization import get_att_mats self.att_mats = get_att_mats(translate_model, self._model_name) # enc, dec, encdec ### get infer translate_model.set_mode(tf.estimator.ModeKeys.PREDICT) with tf.variable_scope(tf.get_variable_scope(), reuse=True): self.outputs_scores = translate_model.infer( features=self._features, decode_length=self._extra_length, beam_size=self._beam_size, top_beams=self._beam_size, alpha=self._alpha) #outputs 4,4,63 ###### tf.logging.info("Start to init tf session") if self._isGpu: print('Using GPU in Decoder') gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=self._fraction) self._sess = tf.Session( config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)) else: print('Using CPU in Decoder') gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0) config = tf.ConfigProto(gpu_options=gpu_options) config.allow_soft_placement = True config.log_device_placement = False self._sess = tf.Session(config=config) with self._sess.as_default(): ckpt = saver_mod.get_checkpoint_state(self._model_dir) saver = tf.train.Saver() tf.logging.info("Start to restore the parameters from %s", ckpt.model_checkpoint_path) saver.restore(self._sess, ckpt.model_checkpoint_path) tf.logging.info("Finish intialize environment")
def main(argv): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) t2t_trainer.maybe_log_registry_and_exit() if FLAGS.cloud_mlengine: cloud_mlengine.launch() return if FLAGS.generate_data: t2t_trainer.generate_data() if cloud_mlengine.job_dir(): FLAGS.output_dir = cloud_mlengine.job_dir() if argv: t2t_trainer.set_hparams_from_args(argv[1:]) hparams = t2t_trainer.create_hparams() trainer_lib.add_problem_hparams(hparams, FLAGS.problem) attack_params = create_attack_params() attack_params.add_hparam("eps", 0.0) config = t2t_trainer.create_run_config(hparams) params = {"batch_size": hparams.batch_size} # add "_rev" as a hack to avoid image standardization problem = registry.problem(FLAGS.problem + "_rev") input_fn = problem.make_estimator_input_fn(tf.estimator.ModeKeys.EVAL, hparams) dataset = input_fn(params, config).repeat() features, _ = dataset.make_one_shot_iterator().get_next() inputs, labels = features["targets"], features["inputs"] inputs = tf.to_float(inputs) labels = tf.squeeze(labels) sess = tf.Session() model_fn = t2t_model.T2TModel.make_estimator_model_fn( FLAGS.model, hparams, use_tpu=FLAGS.use_tpu) ch_model = adv_attack_utils.T2TAttackModel(model_fn, params, config) acc_mask = None probs = ch_model.get_probs(inputs) if FLAGS.ignore_incorrect: preds = tf.argmax(probs, -1) preds = tf.squeeze(preds) acc_mask = tf.to_float(tf.equal(labels, preds)) one_hot_labels = tf.one_hot(labels, probs.shape[-1]) attack = create_attack(attack_params.attack)(ch_model, sess=sess) # Restore weights saver = tf.train.Saver() checkpoint_path = os.path.expanduser(FLAGS.output_dir or FLAGS.checkpoint_path) saver.restore(sess, tf.train.latest_checkpoint(checkpoint_path)) # reuse variables tf.get_variable_scope().reuse_variables() def compute_accuracy(x, labels, mask): preds = ch_model.get_probs(x) preds = tf.squeeze(preds) preds = tf.argmax(preds, -1, output_type=labels.dtype) _, acc_update_op = tf.metrics.accuracy(labels=labels, predictions=preds, weights=mask) sess.run(tf.initialize_local_variables()) for _ in range(FLAGS.eval_steps): acc = sess.run(acc_update_op) return acc acc = compute_accuracy(inputs, labels, acc_mask) epsilon_acc_pairs = [(0.0, acc)] for epsilon in attack_params.attack_epsilons: attack_params.eps = epsilon adv_x = attack.generate(inputs, y=one_hot_labels, **attack_params.values()) acc = compute_accuracy(adv_x, labels, acc_mask) epsilon_acc_pairs.append((epsilon, acc)) for epsilon, acc in epsilon_acc_pairs: tf.logging.info("Accuracy @ eps=%f: %f" % (epsilon, acc))
def _init_env(self): FLAGS.use_tpu = False tf.logging.set_verbosity(tf.logging.DEBUG) tf.logging.info("Import usr dir from %s", self._usr_dir) if self._usr_dir != None: usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) tf.logging.info("Start to create hparams,for %s of %s", self._problem, self._hparams_set) self._hparams = create_hparams() self._hparams_decode = create_decode_hparams( extra_length=self._extra_length, batch_size=self._batch_size, beam_size=self._beam_size, alpha=self._alpha, return_beams=self._return_beams, write_beam_scores=self._write_beam_scores) self.estimator = trainer_lib.create_estimator( FLAGS.model, self._hparams, t2t_trainer.create_run_config(self._hparams), decode_hparams=self._hparams_decode, use_tpu=False) tf.logging.info("Finish intialize environment") ####### problem type :输出分类 还是序列 还是语言模型 self.problem_type = self._hparams.problems[0].target_modality[ 0] # class? symble self._whether_has_inputs = self._hparams.problem_instances[ 0].has_inputs self._beam_size = 1 if self.problem_type == 'class_label' else self._beam_size ####### ### make input placeholder self._inputs_ph = tf.placeholder( dtype=tf.int32) # shape not specified,any shape x = tf.placeholder(dtype=tf.int32) x.set_shape([None, None]) # ? -> (?,?) x = tf.expand_dims(x, axis=[2]) # -> (?,?,1) # EVAL MODEL x = tf.expand_dims(x, axis=[3]) # -> (?,?,1,1) x = tf.to_int32(x) self._inputs_ph = x #batch_inputs = tf.reshape(self._inputs_ph, [self._batch_size, -1, 1, 1]) batch_inputs = x #[?,?,1,1] # batch_inputs = tf.reshape(self._inputs_ph, [-1, -1, 1, 1]) #targets_ph = tf.placeholder(dtype=tf.int32) #batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) self._features = { "inputs": batch_inputs, "problem_choice": 0, # We run on the first problem here. "input_space_id": self._hparams.problems[0].input_space_id, "target_space_id": self._hparams.problems[0].target_space_id } ### 加入 decode length 变长的 #self.input_extra_length_ph = tf.placeholder(dtype=tf.int32) #self._features['decode_length'] = self.input_extra_length_ph #### EVAL MODE target self._targets_ph = tf.placeholder(tf.int32, shape=(1, None, 1, 1), name='targets') self._features['targets'] = self._targets_ph #batch targets del self._features['problem_choice'] del self._features['input_space_id'] del self._features['target_space_id'] #### mode = tf.estimator.ModeKeys.EVAL predictions_dict = self.estimator._call_model_fn( self._features, None, mode, t2t_trainer.create_run_config(self._hparams)) self._predictions_dict = predictions_dict.predictions #self._predictions = self._predictions_dict["outputs"] # self._scores=predictions_dict['scores'] not return when greedy search tf.logging.info("Start to init tf session") if self._isGpu: print('Using GPU in Decoder') gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=self._fraction) self._sess = tf.Session( config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)) else: print('Using CPU in Decoder') gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0) config = tf.ConfigProto(gpu_options=gpu_options) config.allow_soft_placement = True config.log_device_placement = False self._sess = tf.Session(config=config) with self._sess.as_default(): ckpt = saver_mod.get_checkpoint_state(self._model_dir) saver = tf.train.Saver() tf.logging.info("Start to restore the parameters from %s", ckpt.model_checkpoint_path) saver.restore(self._sess, ckpt.model_checkpoint_path) tf.logging.info("Finish intialize environment")
def _init_env(self): FLAGS.use_tpu = False #tf.logging.set_verbosity(tf.logging.DEBUG) tf.logging.info("Import usr dir from %s", self._usr_dir) if self._usr_dir != None: #usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) usr_dir.import_usr_dir(self._usr_dir) tf.logging.info("Start to create hparams,for %s of %s", self._problem, self._hparams_set) self._hparams = create_hparams() self._hparams_decode = create_decode_hparams(extra_length=self._extra_length, batch_size=self._batch_size, beam_size=self._beam_size, alpha=self._alpha, return_beams=self._return_beams, write_beam_scores=self._write_beam_scores, force_decode_length=self._force_decode_length) self.estimator = trainer_lib.create_estimator( FLAGS.model, self._hparams, t2t_trainer.create_run_config(self._hparams), decode_hparams=self._hparams_decode, use_tpu=False) tf.logging.info("Finish intialize environment") ####### problem type :输出分类 还是序列 还是语言模型 #self.problem_type = self._hparams.problem_hparams[0].target_modality[0] #class? symble self.problem_type = self._hparams.problem_hparams.target_modality[0] #self._whether_has_inputs = self._hparams.problem[0].has_inputs self._whether_has_inputs = self._hparams.problem.has_inputs self._beam_size=1 if self._customer_problem_type=='classification' else self._beam_size ### make input placeholder #self._inputs_ph = tf.placeholder(dtype=tf.int32) # shape not specified,any shape # x=tf.placeholder(dtype=tf.int32) # x.set_shape([None, None]) # ? -> (?,?) # x = tf.expand_dims(x, axis=[2])# -> (?,?,1) # x = tf.to_int32(x) #self._inputs_ph=x #batch_inputs = tf.reshape(self._inputs_ph, [self._batch_size, -1, 1, 1]) #batch_inputs=x # batch_inputs = tf.reshape(self._inputs_ph, [-1, -1, 1, 1]) batch_inputs,self._targets_ph,self.input_extra_length_ph=get_ph(x_dim_3=True) #targets_ph = tf.placeholder(dtype=tf.int32) #batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) self._features = {"inputs": batch_inputs, "problem_choice": 0, # We run on the first problem here. "input_space_id": self._hparams.problem_hparams.input_space_id, "target_space_id": self._hparams.problem_hparams.target_space_id} ### 加入 decode length 变长的 #self.input_extra_length_ph = tf.placeholder(dtype=tf.int32,shape=[]) self._features['decode_length'] = self.input_extra_length_ph # total_decode=input_len+extra_len| extra of chunkProblem =0 # real_decode_length=len(input)+extra_length ## #self._features['decode_length_decide_end'] = True #### 如果是relative 参数 if self._hparams_set=="transformer_relative": del self._features['problem_choice'] del self._features['input_space_id'] del self._features['target_space_id'] if self._customer_problem_type=='languageModel_pp': del self._features['problem_choice'] del self._features['input_space_id'] del self._features['target_space_id'] if self._model_name in ['slice_net','transformer_encoder']: del self._features['problem_choice'] del self._features['input_space_id'] del self._features['target_space_id'] if self._model_name=='transformer' and self._customer_problem_type=='classification': del self._features['problem_choice'] del self._features['input_space_id'] del self._features['target_space_id'] ###### target if transformer_scorer if self._customer_problem_type=='classification': self._targets_ph = tf.placeholder(tf.int32, shape=(None, None, None, None), name='targets') self._features['targets'] = self._targets_ph # batch targets if self._customer_problem_type=='languageModel_pp': self._targets_ph = tf.placeholder(tf.int32, shape=(None, None, None, None), name='targets') self._features['targets']= self._targets_ph #### mode mode = tf.estimator.ModeKeys.PREDICT if self._customer_problem_type == 'languageModel_pp': mode = tf.estimator.ModeKeys.EVAL elif self._customer_problem_type=='classification' and 'score' not in self._model_name: mode = tf.estimator.ModeKeys.EVAL # estimator_spec = model_builder.model_fn(self._model_name, features, mode, self._hparams, # problem_names=[self._problem], decode_hparams=self._hparams_dc) predictions_dict = self.estimator._call_model_fn(self._features,None,mode,t2t_trainer.create_run_config(self._hparams)) self._predictions_dict=predictions_dict.predictions # score -> score_yr if self._customer_problem_type=='classification' and 'score' in self._model_name: self._score=predictions_dict.predictions.get('scores') if self._score!=None: #[batch,beam] [batch,] self._predictions_dict['scores_class']=tf.exp(common_layers.log_prob_from_logits(self._score)) elif self._customer_problem_type=='classification' and 'score' not in self._model_name: self._score = predictions_dict.predictions.get('predictions') if self._score!=None: #[batch,beam] [batch,] self._predictions_dict['scores_class']=tf.exp(common_layers.log_prob_from_logits(self._score)) #self._predictions = self._predictions_dict["outputs"] # self._scores=predictions_dict['scores'] not return when greedy search tf.logging.info("Start to init tf session") if self._isGpu: print('Using GPU in Decoder') gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=self._fraction) self._sess = tf.Session( config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)) else: print('Using CPU in Decoder') gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0) config = tf.ConfigProto(gpu_options=gpu_options) config.allow_soft_placement = True config.log_device_placement = False self._sess = tf.Session(config=config) with self._sess.as_default(): ckpt = saver_mod.get_checkpoint_state(self._model_dir) saver = tf.train.Saver(allow_empty=True) tf.logging.info("Start to restore the parameters from %s", ckpt.model_checkpoint_path) saver.restore(self._sess, ckpt.model_checkpoint_path) tf.logging.info("Finish intialize environment")
def main(argv): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) t2t_trainer.maybe_log_registry_and_exit() if FLAGS.generate_data: t2t_trainer.generate_data() if argv: t2t_trainer.set_hparams_from_args(argv[1:]) # hparams = t2t_trainer.create_hparams() # hparams.add_hparam("data_dir", FLAGS.data_dir) # trainer_lib.add_problem_hparams(hparams, FLAGS.problem) hparams_path = os.path.join(FLAGS.output_dir, "hparams.json") hparams = trainer_lib.create_hparams( FLAGS.hparams_set, FLAGS.hparams, data_dir=FLAGS.data_dir, problem_name=FLAGS.problem, hparams_path=hparams_path) hparams.add_hparam("model_dir", FLAGS.output_dir) config = t2t_trainer.create_run_config(hparams) params = {"batch_size": hparams.batch_size} # add "_rev" as a hack to avoid image standardization problem = registry.problem(FLAGS.problem) input_fn = problem.make_estimator_input_fn(tf.estimator.ModeKeys.EVAL, hparams) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) model_fn = t2t_model.T2TModel.make_estimator_model_fn( FLAGS.model, hparams, use_tpu=False) dataset = input_fn(params, config).repeat() dataset_iteraor = dataset.make_one_shot_iterator() features, labels = dataset_iteraor.get_next() # tf.logging.info("### t2t_wei_feat_distrib.py features %s", features) spec = model_fn( features, labels, tf.estimator.ModeKeys.EVAL, params=hparams, config=config) # get the summary model structure graph summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) # Restore weights saver = tf.train.Saver() checkpoint_path = os.path.expanduser(FLAGS.output_dir or FLAGS.checkpoint_path) tf.logging.info("### t2t_wei_feat_distrib.py checkpoint_path %s", checkpoint_path) # saver.restore(sess, tf.train.latest_checkpoint(checkpoint_path)) # Load weights from checkpoint. ckpts = tf.train.get_checkpoint_state(checkpoint_path) ckpt = ckpts.model_checkpoint_path saver.restore(sess, ckpt) # saver.restore(sess, checkpoint_path+'/model.ckpt-1421000') # initialize_from_ckpt(checkpoint_path) # get parameter pruning_params = create_pruning_params() pruning_strategy = create_pruning_strategy(pruning_params.strategy) # get evalutaion graph if 'image' in FLAGS.problem: acc, acc_update_op = get_eval_graph_image(spec, labels) tf.summary.scalar('accuracy', acc) # define evaluation function def eval_model(): sess.run(tf.initialize_local_variables()) for _ in range(FLAGS.eval_steps): acc = sess.run(acc_update_op) return acc elif 'translate' in FLAGS.problem: bleu_op = get_eval_graph_trans(spec, labels) # tf.summary.scalar('bleu', bleu_op) # define evaluation function def eval_model(): bleu_value = 0 # sess.run(tf.initialize_local_variables()) # sess.run() # local_vars = tf.local_variables() # tf.logging.info("###!!!!!!! t2t_wei_feat_distrib.py local_vars %s", local_vars) # for _ in range(FLAGS.eval_steps): for _ in range(FLAGS.eval_steps): # outputs_tensor, labels_tensor, preds_tensor = sess.run([outputs, labels, preds]) bleu = sess.run(bleu_op) # tf.logging.info("### t2t_wei_feat_distrib.py outputs_tensor %s", outputs_tensor[0].shape) # tf.logging.info("### t2t_wei_feat_distrib.py labels_tensor %s", labels_tensor[0].shape) # tf.logging.info("### t2t_wei_feat_distrib.py preds %s", preds_tensor[0].shape) bleu_value += bleu bleu_value /= FLAGS.eval_steps return bleu_value # get weight distribution graph wei_feat_distrib.get_weight_distrib_graph(pruning_params) # do accuracy sparsity tradeoff for model weights wei_feat_distrib.wei_sparsity_acc_tradeoff(sess, eval_model, pruning_strategy, pruning_params, summary_writer) # do accuracy sparsity tradeoff for model weights # save the summary summary_writer.close() sess.run(tf.initialize_local_variables()) preds = spec.predictions["predictions"] # features_shape=tf.shape(features) pred_shape=tf.shape(preds) labels_shape=tf.shape(labels)
#tf.app.run() import os # rootpath=os.environ['ROOT_PATH'] rootpath = '../' FLAGS.data_dir = rootpath + '/data' FLAGS.problem = 'reason_problem' FLAGS.model = 'transformer_ae' FLAGS.hparams_set = 'transformer_ae_small' FLAGS.t2t_usr_dir = rootpath + '/src' FLAGS.output_dir = rootpath + '/model' FLAGS.decode_from_file = './input_string.txt' FLAGS.decode_to_file = './output_string.txt' tf.logging.set_verbosity(tf.logging.INFO) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) FLAGS.use_tpu = False # decoding not supported on TPU hp = create_hparams() decode_hp = create_decode_hparams() estimator = trainer_lib.create_estimator(FLAGS.model, hp, t2t_trainer.create_run_config(hp), decode_hparams=decode_hp, use_tpu=False) decode(estimator, hp, decode_hp)
def _init_env(self): FLAGS.use_tpu = False tf.logging.set_verbosity(tf.logging.DEBUG) tf.logging.info("Import usr dir from %s", self._usr_dir) if self._usr_dir != None: usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) tf.logging.info("Start to create hparams,for %s of %s", self._problem, self._hparams_set) self._hparams = create_hparams() self._hparams_decode = create_decode_hparams( extra_length=self._extra_length, batch_size=self._batch_size, beam_size=self._beam_size, alpha=self._alpha, return_beams=self._return_beams, write_beam_scores=self._write_beam_scores) # self.estimator_spec = t2t_model.T2TModel.make_estimator_model_fn( # self._model_name, self._hparams, decode_hparams=self._hparams_decode, use_tpu=False) self.estimator = trainer_lib.create_estimator( FLAGS.model, self._hparams, t2t_trainer.create_run_config(self._hparams), decode_hparams=self._hparams_decode, use_tpu=False) tf.logging.info("Finish intialize environment") ####### ### make input placeholder self._inputs_ph = tf.placeholder( dtype=tf.int32) # shape not specified,any shape x = tf.placeholder(dtype=tf.int32) x.set_shape([None, None]) # ? -> (?,?) x = tf.expand_dims(x, axis=[2]) # -> (?,?,1) x = tf.to_int32(x) self._inputs_ph = x #batch_inputs = tf.reshape(self._inputs_ph, [self._batch_size, -1, 1, 1]) batch_inputs = x # batch_inputs = tf.reshape(self._inputs_ph, [-1, -1, 1, 1]) #targets_ph = tf.placeholder(dtype=tf.int32) #batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) self._features = { "inputs": batch_inputs, "problem_choice": 0, # We run on the first problem here. "input_space_id": self._hparams.problem_hparams.input_space_id, "target_space_id": self._hparams.problem_hparams.target_space_id } ### 加入 decode length 变长的 self.input_extra_length_ph = tf.placeholder(dtype=tf.int32) #self._features['decode_length'] = [self.input_extra_length_ph] #### 采样 c(s) ### self.cache_ph = tf.placeholder(dtype=tf.int32) #self._features['cache_raw']=tf.reshape(self.cache_ph,[1,2,1]) ## 去掉 整数的 del self._features["problem_choice"] del self._features["input_space_id"] del self._features["target_space_id"] #### mode = tf.estimator.ModeKeys.PREDICT # estimator_spec = model_builder.model_fn(self._model_name, features, mode, self._hparams, # problem_names=[self._problem], decode_hparams=self._hparams_dc) ###### from tensor2tensor.models import transformer_vae model_i = transformer_vae.TransformerAE( hparams=self._hparams, mode=mode, decode_hparams=self._hparams_decode) # Transformer_(hparams=self._hparams, # mode=mode, decode_hparams=self._hparams_decode) # #problem_hparams=p_hparams, # self._beam_result = model_i._fast_decode(self._features, decode_length=5, beam_size=10, top_beams=10, # alpha=0.6) #fail # self._beam_result = model_i._beam_decode(self._features, # decode_length=5, # beam_size=self._beam_size, # top_beams=self._beam_size, # alpha=0.6) self.result_dict = model_i.infer(self._features) print '' #### add target,丢了一些KEY 不能单独拿出来MODEL_FN # from tensor2tensor.layers import common_layers # features=self._features # batch_size = common_layers.shape_list(features["inputs"])[0] # length = common_layers.shape_list(features["inputs"])[1] # target_length = tf.to_int32(2.0 * tf.to_float(length)) # initial_output = tf.zeros((batch_size, target_length, 1, 1), # dtype=tf.int64) # features["targets"] = initial_output # ### input # if "inputs" in features and len(features["inputs"].shape) < 4: # inputs_old = features["inputs"] # features["inputs"] = tf.expand_dims(features["inputs"], 2) # #### model_fn # self.result_dict=model_i.model_fn(features) print '' """ ###### predictions_dict = self.estimator._call_model_fn(self._features,None,mode,t2t_trainer.create_run_config(self._hparams)) self._predictions_dict=predictions_dict.predictions """ #self._predictions = self._predictions_dict["outputs"] # self._scores=predictions_dict['scores'] not return when greedy search tf.logging.info("Start to init tf session") if self._isGpu: print('Using GPU in Decoder') gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=self._fraction) self._sess = tf.Session( config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)) else: print('Using CPU in Decoder') gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0) config = tf.ConfigProto(gpu_options=gpu_options) config.allow_soft_placement = True config.log_device_placement = False self._sess = tf.Session(config=config) with self._sess.as_default(): ckpt = saver_mod.get_checkpoint_state(self._model_dir) saver = tf.train.Saver() tf.logging.info("Start to restore the parameters from %s", ckpt.model_checkpoint_path) saver.restore(self._sess, ckpt.model_checkpoint_path) tf.logging.info("Finish intialize environment")
def _init_env(self): FLAGS.use_tpu = False tf.logging.set_verbosity(tf.logging.DEBUG) tf.logging.info("Import usr dir from %s", self._usr_dir) if self._usr_dir != None: usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) tf.logging.info("Start to create hparams,for %s of %s", self._problem, self._hparams_set) self._hparams = create_hparams() self._hparams_decode = create_decode_hparams( extra_length=self._extra_length, batch_size=self._batch_size, beam_size=self._beam_size, alpha=self._alpha, return_beams=self._return_beams, write_beam_scores=self._write_beam_scores, force_decode_length=self._force_decode_length) # self.estimator_spec = t2t_model.T2TModel.make_estimator_model_fn( # self._model_name, self._hparams, decode_hparams=self._hparams_decode, use_tpu=False) self.estimator = trainer_lib.create_estimator( FLAGS.model, self._hparams, t2t_trainer.create_run_config(self._hparams), decode_hparams=self._hparams_decode, use_tpu=False) tf.logging.info("Finish intialize environment") ####### ### make input placeholder self._inputs_ph = tf.placeholder( dtype=tf.int32) # shape not specified,any shape x = tf.placeholder(dtype=tf.int32) x.set_shape([None, None]) # ? -> (?,?) x = tf.expand_dims(x, axis=[2]) # -> (?,?,1) x = tf.to_int32(x) self._inputs_ph = x #batch_inputs = tf.reshape(self._inputs_ph, [self._batch_size, -1, 1, 1]) batch_inputs = x ### # batch_inputs = tf.reshape(self._inputs_ph, [-1, -1, 1, 1]) #targets_ph = tf.placeholder(dtype=tf.int32) #batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) #self.inputs_ph = tf.placeholder(tf.int32, shape=(None, None, 1, 1), name='inputs') #self.targets_ph = tf.placeholder(tf.int32, shape=(None, None, None, None), name='targets') self.inputs_ph = tf.placeholder(tf.int32, shape=(None, None, 1, 1), name='inputs') self.targets_ph = tf.placeholder(tf.int32, shape=(None, None, 1, 1), name='targets') self._features = { "inputs": self.inputs_ph, "problem_choice": 0, # We run on the first problem here. "input_space_id": self._hparams.problem_hparams.input_space_id, "target_space_id": self._hparams.problem_hparams.target_space_id } ### 加入 decode length 变长的 self.input_extra_length_ph = tf.placeholder(dtype=tf.int32) self._features['decode_length'] = self.input_extra_length_ph ## target #self._targets_ph= tf.placeholder(tf.int32, shape=(None, None, None, None), name='targets') self._features['targets'] = self.targets_ph target_pretend = np.zeros((1, 1, 1, 1)) ## 去掉 整数的 del self._features["problem_choice"] del self._features["input_space_id"] del self._features["target_space_id"] del self._features['decode_length'] #### #mode = tf.estimator.ModeKeys.PREDICT # affect last_only t2t_model._top_single ,[1,?,1,512]->[1,1,1,1,64] # if self.predict_or_eval=='EVAL': # mode = tf.estimator.ModeKeys.EVAL # affect last_only t2t_model._top_single ,[1,?,1,512]->[1,?,1,1,64] # # estimator_spec = model_builder.model_fn(self._model_name, features, mode, self._hparams, # # problem_names=[self._problem], decode_hparams=self._hparams_dc) # if self.predict_or_eval=='PREDICT': # mode = tf.estimator.ModeKeys.PREDICT if self.predict_or_eval == 'and': mode = tf.estimator.ModeKeys.TRAIN ########### # registry.model ############ translate_model = registry.model(self._model_name)( hparams=self._hparams, decode_hparams=self._hparams_decode, mode=mode) self.predict_dict = {} # if self.predict_or_eval == 'EVAL': # self.logits,_=translate_model(self._features) # self.predict_dict['scores']=self.logits # # if self.predict_or_eval == 'PREDICT': # # self.predict_dict=translate_model.infer(features=self._features, # decode_length=50, # beam_size=1, # top_beams=1) # print '' if self.predict_or_eval == 'and': ### get logit EVAL mode #self._features['targets'] = [[self._targets_ph]] # function body() self.logits, self.ret2 = translate_model(self._features) ################## ## model_fn fetch logits FAIL : key not found ############# # logits,_=translate_model.model_fn(self._features) # self._beam_result = model_i._fast_decode(self._features, decode_length=5, beam_size=10, top_beams=10, # alpha=0.6) #fail # self._beam_result = model_i._beam_decode(self._features, # decode_length=5, # beam_size=self._beam_size, # top_beams=self._beam_size, # alpha=0.6) ########## # logits,_=model_i.model_fn(self._features) # assert len(logits.shape) == 5 # logits = tf.squeeze(logits, [2, 3]) # # Compute the log probabilities # from tensor2tensor.layers import common_layers # self.log_probs = common_layers.log_prob_from_logits(logits) ###### #self._predictions = self._predictions_dict["outputs"] # self._scores=predictions_dict['scores'] not return when greedy search tf.logging.info("Start to init tf session") if self._isGpu: print('Using GPU in Decoder') gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=self._fraction) self._sess = tf.Session( config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)) else: print('Using CPU in Decoder') gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0) config = tf.ConfigProto(gpu_options=gpu_options) config.allow_soft_placement = True config.log_device_placement = False self._sess = tf.Session(config=config) with self._sess.as_default(): ckpt = saver_mod.get_checkpoint_state(self._model_dir) saver = tf.train.Saver() #tf.logging.info("Start to restore the parameters from %s", ckpt.model_checkpoint_path) #saver.restore(self._sess, ckpt.model_checkpoint_path) ########## 重新初始化参数 self._sess.run(tf.global_variables_initializer()) tf.logging.info("Finish intialize environment")
def _init_env(self): FLAGS.use_tpu = False tf.logging.set_verbosity(tf.logging.DEBUG) tf.logging.info("Import usr dir from %s", self._usr_dir) if self._usr_dir != None: usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) tf.logging.info("Start to create hparams,for %s of %s", self._problem, self._hparams_set) self._hparams = create_hparams() self._hparams_decode = create_decode_hparams( extra_length=self._extra_length, batch_size=self._batch_size, beam_size=self._beam_size, alpha=self._alpha, return_beams=self._return_beams, write_beam_scores=self._write_beam_scores, force_decode_length=self._force_decode_length) # self.estimator_spec = t2t_model.T2TModel.make_estimator_model_fn( # self._model_name, self._hparams, decode_hparams=self._hparams_decode, use_tpu=False) self.estimator = trainer_lib.create_estimator( FLAGS.model, self._hparams, t2t_trainer.create_run_config(self._hparams), decode_hparams=self._hparams_decode, use_tpu=False) tf.logging.info("Finish intialize environment") ####### ### make input placeholder self._inputs_ph = tf.placeholder( dtype=tf.int32) # shape not specified,any shape x = tf.placeholder(dtype=tf.int32) x.set_shape([None, None]) # ? -> (?,?) x = tf.expand_dims(x, axis=[2]) # -> (?,?,1) x = tf.to_int32(x) self._inputs_ph = x #batch_inputs = tf.reshape(self._inputs_ph, [self._batch_size, -1, 1, 1]) batch_inputs = x ### # batch_inputs = tf.reshape(self._inputs_ph, [-1, -1, 1, 1]) #targets_ph = tf.placeholder(dtype=tf.int32) #batch_targets = tf.reshape(targets_ph, [1, -1, 1, 1]) #self.inputs_ph = tf.placeholder(tf.int32, shape=(None, None, 1, 1), name='inputs') #self.targets_ph = tf.placeholder(tf.int32, shape=(None, None, None, None), name='targets') self.inputs_ph = tf.placeholder(tf.int32, shape=(None, None, 1, 1), name='inputs') self.targets_ph = tf.placeholder(tf.int32, shape=(None, None, 1, 1), name='targets') self.targets_ph_2 = tf.placeholder(tf.int32, shape=(None, None, 1, 1), name='targets') self._features = { "inputs": self.inputs_ph, "problem_choice": 0, # We run on the first problem here. "input_space_id": self._hparams.problem_hparams.input_space_id, "target_space_id": self._hparams.problem_hparams.target_space_id } ### 加入 decode length 变长的 self.input_extra_length_ph = tf.placeholder(dtype=tf.int32) self._features['decode_length'] = self.input_extra_length_ph ## target #self._targets_ph= tf.placeholder(tf.int32, shape=(None, None, None, None), name='targets') self._features['targets'] = self.targets_ph self._features['targets2'] = self.targets_ph_2 target_pretend = np.zeros((1, 1, 1, 1)) ## 去掉 整数的 del self._features["problem_choice"] del self._features["input_space_id"] del self._features["target_space_id"] del self._features['decode_length'] #### #mode = tf.estimator.ModeKeys.PREDICT # affect last_only t2t_model._top_single ,[1,?,1,512]->[1,1,1,1,64] # if self.predict_or_eval=='EVAL': # mode = tf.estimator.ModeKeys.EVAL # affect last_only t2t_model._top_single ,[1,?,1,512]->[1,?,1,1,64] # # estimator_spec = model_builder.model_fn(self._model_name, features, mode, self._hparams, # # problem_names=[self._problem], decode_hparams=self._hparams_dc) # if self.predict_or_eval=='PREDICT': # mode = tf.estimator.ModeKeys.PREDICT if self.predict_or_eval == 'and': mode = tf.estimator.ModeKeys.TRAIN ########### # registry.model ############ translate_model = registry.model(self._model_name)( hparams=self._hparams, decode_hparams=self._hparams_decode, mode=mode) self.predict_dict = {} # if self.predict_or_eval == 'EVAL': # self.logits,_=translate_model(self._features) # self.predict_dict['scores']=self.logits # # if self.predict_or_eval == 'PREDICT': # # self.predict_dict=translate_model.infer(features=self._features, # decode_length=50, # beam_size=1, # top_beams=1) # print '' if self.predict_or_eval == 'and': ### get logit EVAL mode #self._features['targets'] = [[self._targets_ph]] # function body() self.logits, self.ret2 = translate_model(self._features) ################## ## model_fn fetch logits FAIL : key not found ############# # logits,_=translate_model.model_fn(self._features) # self._beam_result = model_i._fast_decode(self._features, decode_length=5, beam_size=10, top_beams=10, # alpha=0.6) #fail # self._beam_result = model_i._beam_decode(self._features, # decode_length=5, # beam_size=self._beam_size, # top_beams=self._beam_size, # alpha=0.6) ########## # logits,_=model_i.model_fn(self._features) # assert len(logits.shape) == 5 # logits = tf.squeeze(logits, [2, 3]) # # Compute the log probabilities # from tensor2tensor.layers import common_layers # self.log_probs = common_layers.log_prob_from_logits(logits) ###### #self._predictions = self._predictions_dict["outputs"] # self._scores=predictions_dict['scores'] not return when greedy search tf.logging.info("Start to init tf session") if self._isGpu: print('Using GPU in Decoder') gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=self._fraction) self._sess = tf.Session( config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)) else: print('Using CPU in Decoder') gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0) config = tf.ConfigProto(gpu_options=gpu_options) config.allow_soft_placement = True config.log_device_placement = False self._sess = tf.Session(config=config) with self._sess.as_default(): #ckpt = saver_mod.get_checkpoint_state(self._model_dir) self._sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() #train_handle = sess.run(train_iterator.string_handle()) #dev_handle = sess.run(dev_iterator.string_handle()) if os.path.exists(os.path.join(self._model_dir, "checkpoint")): saver.restore(self._sess, tf.train.latest_checkpoint(self._model_dir)) global_step = max(sess.run(translate_model.global_step), 1) for _ in tqdm(range(global_step, 1000 + 1)): global_step = sess.run(translate_model.global_step) + 1 loss, train_op = sess.run([self.ret2, model.train_op], feed_dict={ handle: train_handle, model.dropout: config.dropout }) if global_step % config.period == 0: loss_sum = tf.Summary(value=[ tf.Summary.Value(tag="model/loss", simple_value=loss), ]) writer.add_summary(loss_sum, global_step) if global_step % config.checkpoint == 0: _, summ = evaluate_batch(model, config.val_num_batches, train_eval_file, sess, "train", handle, train_handle) for s in summ: writer.add_summary(s, global_step) metrics, summ = evaluate_batch( model, dev_total // config.batch_size + 1, dev_eval_file, sess, "dev", handle, dev_handle) dev_f1 = metrics["f1"] dev_em = metrics["exact_match"] if dev_f1 < best_f1 and dev_em < best_em: patience += 1 if patience > config.early_stop: break else: patience = 0 best_em = max(best_em, dev_em) best_f1 = max(best_f1, dev_f1) for s in summ: writer.add_summary(s, global_step) writer.flush() filename = os.path.join( config.save_dir, "model_{}.ckpt".format(global_step)) saver.save(sess, filename)
def main(argv): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) t2t_trainer.maybe_log_registry_and_exit() if FLAGS.cloud_mlengine: cloud_mlengine.launch() return if FLAGS.generate_data: t2t_trainer.generate_data() if cloud_mlengine.job_dir(): FLAGS.output_dir = cloud_mlengine.job_dir() if argv: t2t_trainer.set_hparams_from_args(argv[1:]) if FLAGS.surrogate_attack: tf.logging.warn("Performing surrogate model attack.") sur_hparams = create_surrogate_hparams() trainer_lib.add_problem_hparams(sur_hparams, FLAGS.problem) hparams = t2t_trainer.create_hparams() trainer_lib.add_problem_hparams(hparams, FLAGS.problem) attack_params = create_attack_params() attack_params.add_hparam(attack_params.epsilon_name, 0.0) if FLAGS.surrogate_attack: sur_config = create_surrogate_run_config(sur_hparams) config = t2t_trainer.create_run_config(hparams) params = { "batch_size": hparams.batch_size, "use_tpu": FLAGS.use_tpu, } # add "_rev" as a hack to avoid image standardization problem = registry.problem(FLAGS.problem + "_rev") inputs, labels, features = prepare_data(problem, hparams, params, config) sess = tf.Session() if FLAGS.surrogate_attack: sur_model_fn = t2t_model.T2TModel.make_estimator_model_fn( FLAGS.surrogate_model, sur_hparams, use_tpu=FLAGS.use_tpu) sur_ch_model = adv_attack_utils.T2TAttackModel( sur_model_fn, features, params, sur_config, scope="surrogate") # Dummy call to construct graph sur_ch_model.get_probs(inputs) checkpoint_path = os.path.expanduser(FLAGS.surrogate_output_dir) tf.train.init_from_checkpoint( tf.train.latest_checkpoint(checkpoint_path), {"/": "surrogate/"}) sess.run(tf.global_variables_initializer()) other_vars = set(tf.global_variables()) model_fn = t2t_model.T2TModel.make_estimator_model_fn( FLAGS.model, hparams) ch_model = adv_attack_utils.T2TAttackModel(model_fn, features, params, config) acc_mask = None probs = ch_model.get_probs(inputs) if FLAGS.ignore_incorrect: preds = tf.argmax(probs, -1, output_type=labels.dtype) preds = tf.reshape(preds, labels.shape) acc_mask = tf.to_float(tf.equal(labels, preds)) one_hot_labels = tf.one_hot(labels, probs.shape[-1]) if FLAGS.surrogate_attack: attack = create_attack(attack_params.attack)(sur_ch_model, sess=sess) else: attack = create_attack(attack_params.attack)(ch_model, sess=sess) new_vars = set(tf.global_variables()) - other_vars # Restore weights saver = tf.train.Saver(new_vars) checkpoint_path = os.path.expanduser(FLAGS.output_dir) saver.restore(sess, tf.train.latest_checkpoint(checkpoint_path)) # reuse variables tf.get_variable_scope().reuse_variables() def compute_accuracy(x, l, mask): """Compute model accuracy.""" preds = ch_model.get_probs(x) preds = tf.squeeze(preds) preds = tf.argmax(preds, -1, output_type=l.dtype) _, acc_update_op = tf.metrics.accuracy(l, preds, weights=mask) if FLAGS.surrogate_attack: preds = sur_ch_model.get_probs(x) preds = tf.squeeze(preds) preds = tf.argmax(preds, -1, output_type=l.dtype) acc_update_op = tf.tuple((acc_update_op, tf.metrics.accuracy(l, preds, weights=mask)[1])) sess.run(tf.initialize_local_variables()) for i in range(FLAGS.eval_steps): tf.logging.info( "\tEvaluating batch [%d / %d]" % (i + 1, FLAGS.eval_steps)) acc = sess.run(acc_update_op) if FLAGS.surrogate_attack: tf.logging.info("\tFinal acc: (%.4f, %.4f)" % (acc[0], acc[1])) else: tf.logging.info("\tFinal acc: %.4f" % acc) return acc epsilon_acc_pairs = [] for epsilon in attack_params.attack_epsilons: tf.logging.info("Attacking @ eps=%.4f" % epsilon) attack_params.set_hparam(attack_params.epsilon_name, epsilon) adv_x = attack.generate(inputs, y=one_hot_labels, **attack_params.values()) acc = compute_accuracy(adv_x, labels, acc_mask) epsilon_acc_pairs.append((epsilon, acc)) for epsilon, acc in epsilon_acc_pairs: if FLAGS.surrogate_attack: tf.logging.info( "Accuracy @ eps=%.4f: (%.4f, %.4f)" % (epsilon, acc[0], acc[1])) else: tf.logging.info("Accuracy @ eps=%.4f: %.4f" % (epsilon, acc))