def main(_): logging.set_verbosity(logging.INFO) tf.enable_resource_variables() agent_class = dqn_agent.DdqnAgent if FLAGS.use_ddqn else dqn_agent.DqnAgent train_eval(FLAGS.root_dir, agent_class=agent_class, num_iterations=FLAGS.num_iterations)
def main(_): logging.set_verbosity(logging.INFO) if common.has_eager_been_enabled(): return 0 tf.enable_resource_variables() TrainEval(FLAGS.root_dir, suite_atari.game(name=FLAGS.game_name), **get_run_args()).run()
def main(_): parser = argparse.ArgumentParser( description='Classification model training') parser.add_argument('--config_file', type=str, default=None, help='Optional config file for params') parser.add_argument('opts', help='see config.py for all options', default=None, nargs=argparse.REMAINDER) args = parser.parse_args() if args.config_file is not None: cfg_from_file(args.config_file) if args.opts is not None: cfg_from_list(args.opts) assert_and_infer_cfg() print_cfg() os.environ["CUDA_VISIBLE_DEVICES"] = str(cfg.GPU_ID) logger = utils.setup_custom_logger('root') tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) tf_config = tf.ConfigProto(device_count=dict( GPU=1), gpu_options=tf.GPUOptions(allow_growth=True)) tf.enable_resource_variables() train(tf_config, logger) test(tf_config, logger)
def tf1_multi_args_model_path() -> Generator[str, None, None]: def simple_model_fn(): x1 = tf.placeholder(shape=[None, 5], dtype=tf.float32, name="x1") x2 = tf.placeholder(shape=[None, 5], dtype=tf.float32, name="x2") factor = tf.placeholder(shape=(), dtype=tf.float32, name="factor") w = tf.constant([[1.0], [1.0], [1.0], [1.0], [1.0]], dtype=tf.float32) x = x1 + x2 * factor p = tf.matmul(x, w) return {"p": p, "x1": x1, "x2": x2, "factor": factor} simple_model = simple_model_fn() with tempfile.TemporaryDirectory() as temp_dir: with sess.as_default(): tf.enable_resource_variables() sess.run(tf.global_variables_initializer()) inputs = { "x1": simple_model["x1"], "x2": simple_model["x2"], "factor": simple_model["factor"], } outputs = {"prediction": simple_model["p"]} tf.compat.v1.saved_model.simple_save(sess, temp_dir, inputs=inputs, outputs=outputs) yield temp_dir
def main(_): if common.has_eager_been_enabled(): return 0 tf.compat.v1.enable_resource_variables() logging.set_verbosity(logging.INFO) tf.enable_resource_variables() train_eval(FLAGS.root_dir, num_iterations=FLAGS.num_iterations)
def main(_): logging.set_verbosity(logging.INFO) tf.enable_resource_variables() environment_name = FLAGS.environment_name if environment_name is None: environment_name = suite_atari.game(name=FLAGS.game_name) TrainEval(FLAGS.root_dir, environment_name, **get_run_args()).run()
def main(_): logging.set_verbosity(logging.INFO) tf.enable_resource_variables() train_eval( FLAGS.root_dir, num_iterations=FLAGS.num_iterations, eval_interval=FLAGS.eval_interval)
def _init_parameters(self): tf.enable_resource_variables() # self.model = tf.keras.Sequential([ # tf.keras.layers.Dense(512, # activation=tf.nn.selu, input_shape=(self.FLAGS.num_mov,), name='w1'), # tf.keras.layers.Dense(512, activation=tf.nn.selu, name='w2'), # tf.keras.layers.Dense(1024, activation=tf.nn.selu), # tf.keras.layers.Dropout(rate=0.8), # tf.keras.layers.Dense(512, activation=tf.nn.selu), # tf.keras.layers.Dense(512, activation=tf.nn.selu), # tf.keras.layers.Dense(self.FLAGS.num_mov, activation=tf.nn.selu) # ]) with tf.name_scope('weights'): # self.W_1 = tfe.Variable(tf.random_normal([self.FLAGS.num_mov, 512], mean=0.0, stddev=0.2), name='weight_1') # self.W_2 = tfe.Variable(tf.random_normal([512, 512], mean=0.0, stddev=0.2), name='weight_2') # self.W_3 = tfe.Variable(tf.random_normal([512, 1024], mean=0.0, stddev=0.2), name='weight_3') initializer = tf.contrib.layers.xavier_initializer() self.W_1 = tf.get_variable(shape=[self.FLAGS.num_mov, 512], initializer=initializer, name='weight_1') self.W_2 = tf.get_variable(shape=[512, 512], name='weight_2', initializer=initializer) self.W_3 = tf.get_variable(shape=[512, 1024], name='weight_3', initializer=initializer) # self.W_4 = tfe.Variable(tf.random_normal([1024, 512], mean=0.0, stddev=0.2), name='weight_4') # self.W_5 = tfe.Variable(tf.random_normal([512, 512], mean=0.0, stddev=0.2), name='weight_5') # self.W_6 = tfe.Variable(tf.random_normal([512, self.FLAGS.num_mov], mean=0.0, stddev=0.2), name='weight_6') with tf.name_scope('biases'): self.b1 = tf.get_variable(shape=[512], name='bias_1', initializer=self.bias_initializer) self.b2 = tf.get_variable(shape=[512], name='bias_2', initializer=self.bias_initializer) self.b3 = tf.get_variable(shape=[1024], name='bias_3', initializer=self.bias_initializer) self.b4 = tf.get_variable(shape=[512], name='bias_4', initializer=self.bias_initializer) self.b5 = tf.get_variable(shape=[512], name='bias_5', initializer=self.bias_initializer) self.b6 = tf.get_variable(shape=[17770], name='bias_6', initializer=self.bias_initializer)
def main(results): tf.enable_resource_variables() # restore config and model dir_output = results weights_dir = os.path.join(dir_output, 'model.weights/') t = datetime.datetime.today().strftime('%Y-%m-%d-%H-%M-%S') # saved_path = 'saved_' + t saved_path = 'saved_word' saved_path = os.path.join(dir_output, saved_path) config_data = Config(dir_output + "data.json") config_vocab = Config(dir_output + "vocab.json") config_model = Config(dir_output + "model.json") vocab = Vocab(config_vocab) if not os.path.isdir(saved_path): model = Img2SeqModel(config_model, dir_output, vocab) model.build_pred() model.restore_session(weights_dir) model.save_savedmodel(saved_path) # chkp.print_tensors_in_checkpoint_file(weights_dir, tensor_name='', all_tensors=True) SAMPLE_DIR = 'tools/data/hand/raw_word' def representative_dataset_gen(): num_calibration_steps = 10 if not os.path.isdir(SAMPLE_DIR): print 'Failed to read representative_dataset' return for f in os.listdir(SAMPLE_DIR): img_path = os.path.join(SAMPLE_DIR, f) img = Image.open(img_path) img = img.resize((80, 100), Image.BILINEAR) img.show() img = np.array(img) yield [img] num_calibration_steps -= 1 if num_calibration_steps == 0: break converter = tf.lite.TFLiteConverter.from_saved_model(saved_path) converter.target_ops = [ # tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS ] # Following has "Segmentation fault" # converter.optimizations = [tf.lite.Optimize.DEFAULT] # converter.representative_dataset = representative_dataset_gen tflite_model = converter.convert() open("converted_model_word.tflite", "wb").write(tflite_model)
def main(_): # Ignore all information-related logs logging.set_verbosity(logging.INFO) # Enable resource variables tf.enable_resource_variables() agent_class = dqn_agent.DdqnAgent if FLAGS.use_ddqn else dqn_agent.DqnAgent # Train the agent & evaluate it! train_eval(FLAGS.root_dir, agent_class=agent_class, num_iterations=FLAGS.num_iterations)
def main(): """Runs all unit tests with select TF 2.0 features enabled. This function should only be used if TensorFlow code is being tested. Eventually, all TF 2.0 features will be enabled. """ tf2.enable() # Switches TensorArrayV2 and control flow V2 tf.enable_v2_tensorshape() tf.enable_resource_variables() # Required since we use defuns. tf.test.main()
def main(unused_argv): tf.enable_resource_variables() with tf.device('/CPU:0'): # due to b/128333994 env = wheel_py_environment.WheelPyEnvironment(DELTA, MU_BASE, STD_BASE, MU_HIGH, STD_HIGH, BATCH_SIZE) environment = tf_py_environment.TFPyEnvironment(env) optimal_reward_fn = functools.partial( environment_utilities.tf_wheel_bandit_compute_optimal_reward, delta=DELTA, mu_inside=MU_BASE[0], mu_high=MU_HIGH) optimal_action_fn = functools.partial( environment_utilities.tf_wheel_bandit_compute_optimal_action, delta=DELTA) if FLAGS.agent == 'LinUCB': agent = lin_ucb_agent.LinearUCBAgent( time_step_spec=environment.time_step_spec(), action_spec=environment.action_spec(), alpha=AGENT_ALPHA, dtype=tf.float32) elif FLAGS.agent == 'LinTS': agent = lin_ts_agent.LinearThompsonSamplingAgent( time_step_spec=environment.time_step_spec(), action_spec=environment.action_spec(), alpha=AGENT_ALPHA, dtype=tf.float32) elif FLAGS.agent == 'epsGreedy': network = q_network.QNetwork( input_tensor_spec=environment.time_step_spec().observation, action_spec=environment.action_spec(), fc_layer_params=LAYERS) agent = eps_greedy_agent.NeuralEpsilonGreedyAgent( time_step_spec=environment.time_step_spec(), action_spec=environment.action_spec(), reward_network=network, optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=LR), epsilon=EPSILON) regret_metric = tf_bandit_metrics.RegretMetric(optimal_reward_fn) suboptimal_arms_metric = tf_bandit_metrics.SuboptimalArmsMetric( optimal_action_fn) trainer.train( root_dir=FLAGS.root_dir, agent=agent, environment=environment, training_loops=TRAINING_LOOPS, steps_per_loop=STEPS_PER_LOOP, additional_metrics=[regret_metric, suboptimal_arms_metric])
def export_saved_model(self, **kwargs): """Export a saved model for inference.""" tf.enable_resource_variables() driver = inference.ServingDriver( self.model_name, self.ckpt_path, batch_size=self.batch_size, use_xla=self.use_xla, model_params=self.model_config.as_dict(), **kwargs) driver.build() driver.export(self.saved_model_dir, self.tflite_path, self.tensorrt)
def main(_): logging.set_verbosity(logging.INFO) tf.enable_v2_behavior() tf.enable_resource_variables() tf.enable_control_flow_v2() logging.info('Executing eagerly: %s', tf.executing_eagerly()) logging.info('parsing config files: %s', FLAGS.gin_file) gin.parse_config_files_and_bindings(FLAGS.gin_file, FLAGS.gin_bindings, skip_unknown=True) trainer.train(root_dir, eval_metrics_callback=metrics_callback)
def setUp(self): super(CategoricalDqnAgentTest, self).setUp() tf.enable_resource_variables() self._obs_spec = tensor_spec.TensorSpec([2], tf.float32) self._time_step_spec = ts.time_step_spec(self._obs_spec) self._action_spec = tensor_spec.BoundedTensorSpec((), tf.int32, 0, 1) self._categorical_net = categorical_q_network.CategoricalQNetwork( self._obs_spec, self._action_spec, fc_layer_params=[4]) self._dummy_categorical_net = DummyCategoricalNet(self._obs_spec) self._optimizer = tf.train.GradientDescentOptimizer(0.01)
def main(unused_argv): tf.enable_resource_variables() with tf.device('/CPU:0'): # due to b/128333994 observation_shape = [CONTEXT_DIM] overall_shape = [BATCH_SIZE] + observation_shape observation_distribution = tfd.Normal(loc=tf.zeros(overall_shape), scale=tf.ones(overall_shape)) action_shape = [NUM_ACTIONS] observation_to_reward_shape = observation_shape + action_shape observation_to_reward_distribution = tfd.Normal( loc=tf.zeros(observation_to_reward_shape), scale=tf.ones(observation_to_reward_shape)) drift_distribution = tfd.Normal(loc=DRIFT_MEAN, scale=DRIFT_VARIANCE) additive_reward_distribution = tfd.Normal( loc=tf.zeros(action_shape), scale=(REWARD_NOISE_VARIANCE * tf.ones(action_shape))) environment_dynamics = dle.DriftingLinearDynamics( observation_distribution, observation_to_reward_distribution, drift_distribution, additive_reward_distribution) environment = nse.NonStationaryStochasticEnvironment( environment_dynamics) if FLAGS.agent == 'LinUCB': agent = lin_ucb_agent.LinearUCBAgent( time_step_spec=environment.time_step_spec(), action_spec=environment.action_spec(), alpha=AGENT_ALPHA, gamma=0.95, emit_log_probability=False, dtype=tf.float32) elif FLAGS.agent == 'LinTS': agent = lin_ts_agent.LinearThompsonSamplingAgent( time_step_spec=environment.time_step_spec(), action_spec=environment.action_spec(), alpha=AGENT_ALPHA, gamma=0.95, dtype=tf.float32) regret_metric = tf_bandit_metrics.RegretMetric( environment.environment_dynamics.compute_optimal_reward) suboptimal_arms_metric = tf_bandit_metrics.SuboptimalArmsMetric( environment.environment_dynamics.compute_optimal_action) trainer.train( root_dir=FLAGS.root_dir, agent=agent, environment=environment, training_loops=TRAINING_LOOPS, steps_per_loop=STEPS_PER_LOOP, additional_metrics=[regret_metric, suboptimal_arms_metric])
def main(unused_argv): tf.enable_resource_variables() with tf.device('/CPU:0'): # due to b/128333994 action_reward_fns = ( environment_utilities.sliding_linear_reward_fn_generator( CONTEXT_DIM, NUM_ACTIONS, REWARD_NOISE_VARIANCE)) env = sspe.StationaryStochasticPyEnvironment(functools.partial( environment_utilities.context_sampling_fn, batch_size=BATCH_SIZE, context_dim=CONTEXT_DIM), action_reward_fns, batch_size=BATCH_SIZE) environment = tf_py_environment.TFPyEnvironment(env) optimal_reward_fn = functools.partial( environment_utilities.tf_compute_optimal_reward, per_action_reward_fns=action_reward_fns) optimal_action_fn = functools.partial( environment_utilities.tf_compute_optimal_action, per_action_reward_fns=action_reward_fns) if FLAGS.agent == 'LinUCB': agent = lin_ucb_agent.LinearUCBAgent( time_step_spec=environment.time_step_spec(), action_spec=environment.action_spec(), alpha=AGENT_ALPHA, dtype=tf.float32) elif FLAGS.agent == 'LinTS': agent = lin_ts_agent.LinearThompsonSamplingAgent( time_step_spec=environment.time_step_spec(), action_spec=environment.action_spec(), alpha=AGENT_ALPHA, dtype=tf.float32) regret_metric = tf_bandit_metrics.RegretMetric(optimal_reward_fn) suboptimal_arms_metric = tf_bandit_metrics.SuboptimalArmsMetric( optimal_action_fn) trainer.train( root_dir=FLAGS.root_dir, agent=agent, environment=environment, training_loops=TRAINING_LOOPS, steps_per_loop=STEPS_PER_LOOP, additional_metrics=[regret_metric, suboptimal_arms_metric])
def __init__(self, ncf_dataset, params): with tf.Graph().as_default() as self._graph: if params["use_xla_for_gpu"]: # The XLA functions we use require resource variables. tf.enable_resource_variables() self._ncf_dataset = ncf_dataset self._global_step = tf.train.create_global_step() self._train_model_properties = self._build_model(params, is_training=True) self._eval_model_properties = self._build_model(params, is_training=False) initializer = tf.global_variables_initializer() self._graph.finalize() self._session = tf.Session(graph=self._graph) self._session.run(initializer)
def benchmark_graph(self): """Benchmark Graph performance.""" hparams = get_default_hparams() tf.enable_resource_variables() for sample_size in [10, 25, 50, 100, 200]: hparams.n_samples = sample_size tf.reset_default_graph() with tf.Graph().as_default(): energy_fn, _, _ = l2hmc.get_scg_energy_fn() x = tf.random_normal([hparams.n_samples, hparams.x_dim], dtype=tf.float32) dynamics = l2hmc.Dynamics(x_dim=hparams.x_dim, minus_loglikelihood_fn=energy_fn, n_steps=hparams.n_steps, eps=hparams.eps) loss, _, _ = l2hmc.compute_loss(dynamics, x) optimizer = tf.train.AdamOptimizer( learning_rate=hparams.learning_rate) train_op, loss, _ = graph_step(dynamics, optimizer, x) # Single thread; fairer comparison against eager session_conf = tf.ConfigProto(inter_op_parallelism_threads=1) with tf.Session(config=session_conf) as sess: sess.run(tf.global_variables_initializer()) # Warmup to reduce initialization effect when timing for _ in range(hparams.n_warmup_iters): _, _ = sess.run([train_op, loss]) # Training start_time = time.time() for i in range(hparams.n_iters): _, loss_np = sess.run([train_op, loss]) print("Iteration %d: loss %.4f" % (i, loss_np)) wall_time = (time.time() - start_time) / hparams.n_iters examples_per_sec = hparams.n_samples / wall_time self.report_benchmark( name="graph_train_%s_%d" % ("gpu" if tf.test.is_gpu_available() else "cpu", sample_size), iters=hparams.n_iters, extras={"examples_per_sec": examples_per_sec}, wall_time=wall_time)
def benchmark_graph(self): """Benchmark Graph performance.""" hparams = get_default_hparams() tf.enable_resource_variables() for sample_size in [10, 25, 50, 100, 200]: hparams.n_samples = sample_size tf.reset_default_graph() with tf.Graph().as_default(): energy_fn, _, _ = l2hmc.get_scg_energy_fn() x = tf.random_normal([hparams.n_samples, hparams.x_dim], dtype=tf.float32) dynamics = l2hmc.Dynamics( x_dim=hparams.x_dim, minus_loglikelihood_fn=energy_fn, n_steps=hparams.n_steps, eps=hparams.eps) loss, _, _ = l2hmc.compute_loss(dynamics, x) optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate) train_op, loss, _ = graph_step(dynamics, optimizer, x) # Single thread; fairer comparison against eager session_conf = tf.ConfigProto(inter_op_parallelism_threads=1) with tf.Session(config=session_conf) as sess: sess.run(tf.global_variables_initializer()) # Warmup to reduce initialization effect when timing for _ in range(hparams.n_warmup_iters): _, _ = sess.run([train_op, loss]) # Training start_time = time.time() for i in range(hparams.n_iters): _, loss_np = sess.run([train_op, loss]) print("Iteration %d: loss %.4f" % (i, loss_np)) wall_time = (time.time() - start_time) / hparams.n_iters examples_per_sec = hparams.n_samples / wall_time self.report_benchmark( name="graph_train_%s_%d" % ("gpu" if tf.test.is_gpu_available() else "cpu", sample_size), iters=hparams.n_iters, extras={"examples_per_sec": examples_per_sec}, wall_time=wall_time)
def __init__(self, ncf_dataset, params, num_train_steps, num_eval_steps, use_while_loop): self._num_train_steps = num_train_steps self._num_eval_steps = num_eval_steps self._use_while_loop = use_while_loop with tf.Graph().as_default() as self._graph: if params["use_xla_for_gpu"]: # The XLA functions we use require resource variables. tf.enable_resource_variables() self._ncf_dataset = ncf_dataset self._global_step = tf.train.create_global_step() self._train_model_properties = self._build_model(params, num_train_steps, is_training=True) self._eval_model_properties = self._build_model(params, num_eval_steps, is_training=False) initializer = tf.global_variables_initializer() self._graph.finalize() self._session = tf.Session(graph=self._graph) self._session.run(initializer)
def main(argv): del argv # Unused. # If using update_damping_immediately resource variables must be enabled. # (Although they probably will be by default on TPUs.) if FLAGS.update_damping_immediately: tf.enable_resource_variables() tf.set_random_seed(FLAGS.seed) # Invert using cholesky decomposition + triangular solve. This is the only # code path for matrix inversion supported on TPU right now. kfac.utils.set_global_constants(posdef_inv_method='cholesky') kfac.fisher_factors.set_global_constants( eigenvalue_decomposition_threshold=10000) if not FLAGS.use_sua_approx: if FLAGS.use_custom_patches_op: kfac.fisher_factors.set_global_constants( use_patches_second_moment_op=True) else: # Temporary measure to save memory with giant batches: kfac.fisher_factors.set_global_constants( sub_sample_inputs=True, inputs_to_extract_patches_factor=0.1) config = make_tpu_run_config(FLAGS.master, FLAGS.seed, FLAGS.model_dir, FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps) estimator = contrib_tpu.TPUEstimator(use_tpu=True, model_fn=_model_fn, config=config, train_batch_size=FLAGS.batch_size, eval_batch_size=1024) estimator.train(input_fn=mnist_input_fn, max_steps=FLAGS.train_steps, hooks=[])
def main(_): # causes memory fragmentation for bert leading to OOM if os.environ.get("TF_XLA_FLAGS", None) is not None: os.environ["TF_XLA_FLAGS"] += " --tf_xla_enable_lazy_compilation false" else: os.environ["TF_XLA_FLAGS"] = " --tf_xla_enable_lazy_compilation false" # Enable async_io to speed up multi-gpu training with XLA and Horovod. os.environ["TF_XLA_FLAGS"] += " --tf_xla_async_io_level 1" tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) dllogging = utils.dllogger_class.dllogger_class(FLAGS.dllog_path) if FLAGS.horovod: hvd.init() bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) validate_flags_or_throw(bert_config) tf.io.gfile.makedirs(FLAGS.output_dir) tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) master_process = True training_hooks = [] global_batch_size = FLAGS.train_batch_size * FLAGS.num_accumulation_steps hvd_rank = 0 config = tf.compat.v1.ConfigProto() learning_rate = FLAGS.learning_rate if FLAGS.horovod: tf.compat.v1.logging.info("Multi-GPU training with TF Horovod") tf.compat.v1.logging.info("hvd.size() = %d hvd.rank() = %d", hvd.size(), hvd.rank()) global_batch_size = FLAGS.train_batch_size * hvd.size() * FLAGS.num_accumulation_steps learning_rate = learning_rate * hvd.size() master_process = (hvd.rank() == 0) hvd_rank = hvd.rank() config.gpu_options.visible_device_list = str(hvd.local_rank()) if hvd.size() > 1: training_hooks.append(hvd.BroadcastGlobalVariablesHook(0)) if FLAGS.use_xla: config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1 if FLAGS.amp: tf.enable_resource_variables() run_config = tf.estimator.RunConfig( model_dir=FLAGS.output_dir if master_process else None, session_config=config, save_checkpoints_steps=FLAGS.save_checkpoints_steps if master_process else None, save_summary_steps=FLAGS.save_checkpoints_steps if master_process else None, log_step_count_steps=FLAGS.display_loss_steps, keep_checkpoint_max=1) if master_process: tf.compat.v1.logging.info("***** Configuaration *****") for key in FLAGS.__flags.keys(): tf.compat.v1.logging.info(' {}: {}'.format(key, getattr(FLAGS, key))) tf.compat.v1.logging.info("**************************") train_examples = None num_train_steps = None num_warmup_steps = None training_hooks.append(LogTrainRunHook(global_batch_size, hvd_rank, FLAGS.save_checkpoints_steps)) # Prepare Training Data if FLAGS.do_train: train_examples = read_squad_examples( input_file=FLAGS.train_file, is_training=True, version_2_with_negative=FLAGS.version_2_with_negative) num_train_steps = int( len(train_examples) / global_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) # Pre-shuffle the input to avoid having to make a very large shuffle # buffer in in the `input_fn`. rng = random.Random(12345) rng.shuffle(train_examples) start_index = 0 end_index = len(train_examples) tmp_filenames = [os.path.join(FLAGS.output_dir, "train.tf_record")] if FLAGS.horovod: tmp_filenames = [os.path.join(FLAGS.output_dir, "train.tf_record{}".format(i)) for i in range(hvd.size())] num_examples_per_rank = len(train_examples) // hvd.size() remainder = len(train_examples) % hvd.size() if hvd.rank() < remainder: start_index = hvd.rank() * (num_examples_per_rank+1) end_index = start_index + num_examples_per_rank + 1 else: start_index = hvd.rank() * num_examples_per_rank + remainder end_index = start_index + (num_examples_per_rank) model_fn = model_fn_builder( bert_config=bert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, hvd=None if not FLAGS.horovod else hvd, amp=FLAGS.amp) estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config) if FLAGS.do_train: # We write to a temporary file to avoid storing very large constant tensors # in memory. train_writer = FeatureWriter( filename=tmp_filenames[hvd_rank], is_training=True) convert_examples_to_features( examples=train_examples[start_index:end_index], tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=True, output_fn=train_writer.process_feature, verbose_logging=FLAGS.verbose_logging) train_writer.close() tf.compat.v1.logging.info("***** Running training *****") tf.compat.v1.logging.info(" Num orig examples = %d", end_index - start_index) tf.compat.v1.logging.info(" Num split examples = %d", train_writer.num_features) tf.compat.v1.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.compat.v1.logging.info(" Num steps = %d", num_train_steps) tf.compat.v1.logging.info(" LR = %f", learning_rate) del train_examples train_input_fn = input_fn_builder( input_file=tmp_filenames, batch_size=FLAGS.train_batch_size, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, hvd=None if not FLAGS.horovod else hvd) train_start_time = time.time() estimator.train(input_fn=train_input_fn, hooks=training_hooks, max_steps=num_train_steps) train_time_elapsed = time.time() - train_start_time train_time_wo_overhead = training_hooks[-1].total_time avg_sentences_per_second = num_train_steps * global_batch_size * 1.0 / train_time_elapsed ss_sentences_per_second = (num_train_steps - training_hooks[-1].skipped) * global_batch_size * 1.0 / train_time_wo_overhead if master_process: tf.compat.v1.logging.info("-----------------------------") tf.compat.v1.logging.info("Total Training Time = %0.2f for Sentences = %d", train_time_elapsed, num_train_steps * global_batch_size) tf.compat.v1.logging.info("Total Training Time W/O Overhead = %0.2f for Sentences = %d", train_time_wo_overhead, (num_train_steps - training_hooks[-1].skipped) * global_batch_size) tf.compat.v1.logging.info("Throughput Average (sentences/sec) with overhead = %0.2f", avg_sentences_per_second) tf.compat.v1.logging.info("Throughput Average (sentences/sec) = %0.2f", ss_sentences_per_second) dllogging.logger.log(step=(), data={"throughput_train": ss_sentences_per_second}, verbosity=Verbosity.DEFAULT) tf.compat.v1.logging.info("-----------------------------") if FLAGS.export_triton and master_process: export_model(estimator, FLAGS.output_dir, FLAGS.init_checkpoint) if FLAGS.do_predict and master_process: eval_examples = read_squad_examples( input_file=FLAGS.predict_file, is_training=False, version_2_with_negative=FLAGS.version_2_with_negative) # Perform evaluation on subset, useful for profiling if FLAGS.num_eval_iterations is not None: eval_examples = eval_examples[:FLAGS.num_eval_iterations*FLAGS.predict_batch_size] eval_writer = FeatureWriter( filename=os.path.join(FLAGS.output_dir, "eval.tf_record"), is_training=False) eval_features = [] def append_feature(feature): eval_features.append(feature) eval_writer.process_feature(feature) convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=False, output_fn=append_feature, verbose_logging=FLAGS.verbose_logging) eval_writer.close() tf.compat.v1.logging.info("***** Running predictions *****") tf.compat.v1.logging.info(" Num orig examples = %d", len(eval_examples)) tf.compat.v1.logging.info(" Num split examples = %d", len(eval_features)) tf.compat.v1.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_input_fn = input_fn_builder( input_file=eval_writer.filename, batch_size=FLAGS.predict_batch_size, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) all_results = [] eval_hooks = [LogEvalRunHook(FLAGS.predict_batch_size)] eval_start_time = time.time() for result in estimator.predict( predict_input_fn, yield_single_examples=True, hooks=eval_hooks): if len(all_results) % 1000 == 0: tf.compat.v1.logging.info("Processing example: %d" % (len(all_results))) unique_id = int(result["unique_ids"]) start_logits = [float(x) for x in result["start_logits"].flat] end_logits = [float(x) for x in result["end_logits"].flat] all_results.append( RawResult( unique_id=unique_id, start_logits=start_logits, end_logits=end_logits)) eval_time_elapsed = time.time() - eval_start_time time_list = eval_hooks[-1].time_list time_list.sort() # Removing outliers (init/warmup) in throughput computation. eval_time_wo_overhead = sum(time_list[:int(len(time_list) * 0.99)]) num_sentences = (int(len(time_list) * 0.99)) * FLAGS.predict_batch_size avg = np.mean(time_list) cf_50 = max(time_list[:int(len(time_list) * 0.50)]) cf_90 = max(time_list[:int(len(time_list) * 0.90)]) cf_95 = max(time_list[:int(len(time_list) * 0.95)]) cf_99 = max(time_list[:int(len(time_list) * 0.99)]) cf_100 = max(time_list[:int(len(time_list) * 1)]) ss_sentences_per_second = num_sentences * 1.0 / eval_time_wo_overhead tf.compat.v1.logging.info("-----------------------------") tf.compat.v1.logging.info("Total Inference Time = %0.2f for Sentences = %d", eval_time_elapsed, eval_hooks[-1].count * FLAGS.predict_batch_size) tf.compat.v1.logging.info("Total Inference Time W/O Overhead = %0.2f for Sentences = %d", eval_time_wo_overhead, num_sentences) tf.compat.v1.logging.info("Summary Inference Statistics") tf.compat.v1.logging.info("Batch size = %d", FLAGS.predict_batch_size) tf.compat.v1.logging.info("Sequence Length = %d", FLAGS.max_seq_length) tf.compat.v1.logging.info("Precision = %s", "fp16" if FLAGS.amp else "fp32") tf.compat.v1.logging.info("Latency Confidence Level 50 (ms) = %0.2f", cf_50 * 1000) tf.compat.v1.logging.info("Latency Confidence Level 90 (ms) = %0.2f", cf_90 * 1000) tf.compat.v1.logging.info("Latency Confidence Level 95 (ms) = %0.2f", cf_95 * 1000) tf.compat.v1.logging.info("Latency Confidence Level 99 (ms) = %0.2f", cf_99 * 1000) tf.compat.v1.logging.info("Latency Confidence Level 100 (ms) = %0.2f", cf_100 * 1000) tf.compat.v1.logging.info("Latency Average (ms) = %0.2f", avg * 1000) tf.compat.v1.logging.info("Throughput Average (sentences/sec) = %0.2f", ss_sentences_per_second) dllogging.logger.log(step=(), data={"throughput_val": ss_sentences_per_second}, verbosity=Verbosity.DEFAULT) tf.compat.v1.logging.info("-----------------------------") output_prediction_file = os.path.join(FLAGS.output_dir, "predictions.json") output_nbest_file = os.path.join(FLAGS.output_dir, "nbest_predictions.json") output_null_log_odds_file = os.path.join(FLAGS.output_dir, "null_odds.json") write_predictions(eval_examples, eval_features, all_results, FLAGS.n_best_size, FLAGS.max_answer_length, FLAGS.do_lower_case, output_prediction_file, output_nbest_file, output_null_log_odds_file, FLAGS.version_2_with_negative, FLAGS.verbose_logging) if FLAGS.eval_script: import sys import subprocess eval_out = subprocess.check_output([sys.executable, FLAGS.eval_script, FLAGS.predict_file, output_prediction_file]) scores = str(eval_out).strip() exact_match = float(scores.split(":")[1].split(",")[0]) f1 = float(scores.split(":")[2].split("}")[0]) dllogging.logger.log(step=(), data={"f1": f1}, verbosity=Verbosity.DEFAULT) dllogging.logger.log(step=(), data={"exact_match": exact_match}, verbosity=Verbosity.DEFAULT) print(str(eval_out))
def main(_): os.environ[ "TF_XLA_FLAGS"] = "--tf_xla_enable_lazy_compilation=false" #causes memory fragmentation for bert leading to OOM tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) dllogging = utils.dllogger_class.dllogger_class(FLAGS.dllog_path) if FLAGS.horovod: hvd.init() processors = { "bc5cdr": BC5CDRProcessor, "clefe": CLEFEProcessor, 'i2b2': I2b22012Processor } if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) tf.io.gfile.makedirs(FLAGS.output_dir) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 master_process = True training_hooks = [] global_batch_size = FLAGS.train_batch_size hvd_rank = 0 config = tf.compat.v1.ConfigProto() if FLAGS.horovod: global_batch_size = FLAGS.train_batch_size * hvd.size() master_process = (hvd.rank() == 0) hvd_rank = hvd.rank() config.gpu_options.visible_device_list = str(hvd.local_rank()) if hvd.size() > 1: training_hooks.append(hvd.BroadcastGlobalVariablesHook(0)) if FLAGS.use_xla: config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1 tf.enable_resource_variables() run_config = tf.estimator.RunConfig( model_dir=FLAGS.output_dir if master_process else None, session_config=config, save_checkpoints_steps=FLAGS.save_checkpoints_steps if master_process else None, keep_checkpoint_max=1) if master_process: tf.compat.v1.logging.info("***** Configuaration *****") for key in FLAGS.__flags.keys(): tf.compat.v1.logging.info(' {}: {}'.format( key, getattr(FLAGS, key))) tf.compat.v1.logging.info("**************************") train_examples = None num_train_steps = None num_warmup_steps = None training_hooks.append(LogTrainRunHook(global_batch_size, hvd_rank)) if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / global_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) start_index = 0 end_index = len(train_examples) tmp_filenames = [os.path.join(FLAGS.output_dir, "train.tf_record")] if FLAGS.horovod: tmp_filenames = [ os.path.join(FLAGS.output_dir, "train.tf_record{}".format(i)) for i in range(hvd.size()) ] num_examples_per_rank = len(train_examples) // hvd.size() remainder = len(train_examples) % hvd.size() if hvd.rank() < remainder: start_index = hvd.rank() * (num_examples_per_rank + 1) end_index = start_index + num_examples_per_rank + 1 else: start_index = hvd.rank() * num_examples_per_rank + remainder end_index = start_index + (num_examples_per_rank) model_fn = model_fn_builder(bert_config=bert_config, num_labels=len(label_list) + 1, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate if not FLAGS.horovod else FLAGS.learning_rate * hvd.size(), num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_one_hot_embeddings=False, hvd=None if not FLAGS.horovod else hvd, use_fp16=FLAGS.use_fp16) estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) if FLAGS.do_train: #train_file = os.path.join(FLAGS.output_dir, "train.tf_record") #filed_based_convert_examples_to_features( # train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) filed_based_convert_examples_to_features( train_examples[start_index:end_index], label_list, FLAGS.max_seq_length, tokenizer, tmp_filenames[hvd_rank]) tf.compat.v1.logging.info("***** Running training *****") tf.compat.v1.logging.info(" Num examples = %d", len(train_examples)) tf.compat.v1.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.compat.v1.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_file=tmp_filenames, #train_file, batch_size=FLAGS.train_batch_size, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, hvd=None if not FLAGS.horovod else hvd) #estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) train_start_time = time.time() estimator.train(input_fn=train_input_fn, max_steps=num_train_steps, hooks=training_hooks) train_time_elapsed = time.time() - train_start_time train_time_wo_overhead = training_hooks[-1].total_time avg_sentences_per_second = num_train_steps * global_batch_size * 1.0 / train_time_elapsed ss_sentences_per_second = ( num_train_steps - training_hooks[-1].skipped ) * global_batch_size * 1.0 / train_time_wo_overhead if master_process: tf.compat.v1.logging.info("-----------------------------") tf.compat.v1.logging.info( "Total Training Time = %0.2f for Sentences = %d", train_time_elapsed, num_train_steps * global_batch_size) tf.compat.v1.logging.info( "Total Training Time W/O Overhead = %0.2f for Sentences = %d", train_time_wo_overhead, (num_train_steps - training_hooks[-1].skipped) * global_batch_size) tf.compat.v1.logging.info( "Throughput Average (sentences/sec) with overhead = %0.2f", avg_sentences_per_second) tf.compat.v1.logging.info( "Throughput Average (sentences/sec) = %0.2f", ss_sentences_per_second) dllogging.logger.log( step=(), data={"throughput_train": ss_sentences_per_second}, verbosity=Verbosity.DEFAULT) tf.compat.v1.logging.info("-----------------------------") if FLAGS.do_eval and master_process: eval_examples = processor.get_dev_examples(FLAGS.data_dir) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") filed_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.compat.v1.logging.info("***** Running evaluation *****") tf.compat.v1.logging.info(" Num examples = %d", len(eval_examples)) tf.compat.v1.logging.info(" Batch size = %d", FLAGS.eval_batch_size) eval_steps = None eval_drop_remainder = False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, batch_size=FLAGS.eval_batch_size, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.io.gfile.Open(output_eval_file, "w") as writer: tf.compat.v1.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.compat.v1.logging.info(" %s = %s", key, str(result[key])) dllogging.logger.log(step=(), data={key: float(strresult[key])}, verbosity=Verbosity.DEFAULT) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict and master_process: predict_examples = processor.get_test_examples(FLAGS.data_dir) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") filed_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file, mode="test") with tf.io.gfile.Open(os.path.join(FLAGS.output_dir, 'label2id.pkl'), 'rb') as rf: label2id = pickle.load(rf) id2label = {value: key for key, value in label2id.items()} token_path = os.path.join(FLAGS.output_dir, "token_test.txt") if tf.io.gfile.Exists(token_path): tf.io.gfile.Remove(token_path) tf.compat.v1.logging.info("***** Running prediction*****") tf.compat.v1.logging.info(" Num examples = %d", len(predict_examples)) tf.compat.v1.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, batch_size=FLAGS.predict_batch_size, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) eval_hooks = [LogEvalRunHook(FLAGS.predict_batch_size)] eval_start_time = time.time() output_predict_file = os.path.join(FLAGS.output_dir, "label_test.txt") test_labels_file = os.path.join(FLAGS.output_dir, "test_labels.txt") test_labels_err_file = os.path.join(FLAGS.output_dir, "test_labels_errs.txt") with tf.io.gfile.Open(output_predict_file, 'w') as writer, \ tf.io.gfile.Open(test_labels_file, 'w') as tl, \ tf.io.gfile.Open(test_labels_err_file, 'w') as tle: print(id2label) i = 0 for prediction in estimator.predict(input_fn=predict_input_fn, hooks=eval_hooks, yield_single_examples=True): output_line = "\n".join(id2label[id] for id in prediction if id != 0) + "\n" writer.write(output_line) result_to_pair(predict_examples[i], prediction, id2label, tl, tle) i = i + 1 eval_time_elapsed = time.time() - eval_start_time time_list = eval_hooks[-1].time_list time_list.sort() # Removing outliers (init/warmup) in throughput computation. eval_time_wo_overhead = sum(time_list[:int(len(time_list) * 0.99)]) num_sentences = (int(len(time_list) * 0.99)) * FLAGS.predict_batch_size avg = np.mean(time_list) cf_50 = max(time_list[:int(len(time_list) * 0.50)]) cf_90 = max(time_list[:int(len(time_list) * 0.90)]) cf_95 = max(time_list[:int(len(time_list) * 0.95)]) cf_99 = max(time_list[:int(len(time_list) * 0.99)]) cf_100 = max(time_list[:int(len(time_list) * 1)]) ss_sentences_per_second = num_sentences * 1.0 / eval_time_wo_overhead tf.compat.v1.logging.info("-----------------------------") tf.compat.v1.logging.info( "Total Inference Time = %0.2f for Sentences = %d", eval_time_elapsed, eval_hooks[-1].count * FLAGS.predict_batch_size) tf.compat.v1.logging.info( "Total Inference Time W/O Overhead = %0.2f for Sentences = %d", eval_time_wo_overhead, num_sentences) tf.compat.v1.logging.info("Summary Inference Statistics") tf.compat.v1.logging.info("Batch size = %d", FLAGS.predict_batch_size) tf.compat.v1.logging.info("Sequence Length = %d", FLAGS.max_seq_length) tf.compat.v1.logging.info("Precision = %s", "fp16" if FLAGS.use_fp16 else "fp32") tf.compat.v1.logging.info("Latency Confidence Level 50 (ms) = %0.2f", cf_50 * 1000) tf.compat.v1.logging.info("Latency Confidence Level 90 (ms) = %0.2f", cf_90 * 1000) tf.compat.v1.logging.info("Latency Confidence Level 95 (ms) = %0.2f", cf_95 * 1000) tf.compat.v1.logging.info("Latency Confidence Level 99 (ms) = %0.2f", cf_99 * 1000) tf.compat.v1.logging.info("Latency Confidence Level 100 (ms) = %0.2f", cf_100 * 1000) tf.compat.v1.logging.info("Latency Average (ms) = %0.2f", avg * 1000) tf.compat.v1.logging.info("Throughput Average (sentences/sec) = %0.2f", ss_sentences_per_second) dllogging.logger.log(step=(), data={"throughput_val": ss_sentences_per_second}, verbosity=Verbosity.DEFAULT) tf.compat.v1.logging.info("-----------------------------") tf.compat.v1.logging.info('Reading: %s', test_labels_file) with tf.io.gfile.Open(test_labels_file, "r") as f: counts = evaluate(f) eval_result = report_notprint(counts) print(''.join(eval_result)) with tf.io.gfile.Open( os.path.join(FLAGS.output_dir, 'test_results_conlleval.txt'), 'w') as fd: fd.write(''.join(eval_result))
def main(_): logging.set_verbosity(logging.INFO) tf.enable_resource_variables() TrainEval(FLAGS.root_dir, suite_atari.game(name=FLAGS.game_name), **get_run_args()).run()
def main(_): # causes memory fragmentation for bert leading to OOM if os.environ.get("TF_XLA_FLAGS", None) is not None: os.environ["TF_XLA_FLAGS"] += "--tf_xla_enable_lazy_compilation=false" else: os.environ["TF_XLA_FLAGS"] = "--tf_xla_enable_lazy_compilation=false" tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) dllogging = utils.dllogger_class.dllogger_class(FLAGS.dllog_path) if FLAGS.horovod: hvd.init() processors = { "chemprot": BioBERTChemprotProcessor, 'mednli': MedNLIProcessor, } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True." ) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.io.gfile.makedirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 master_process = True training_hooks = [] global_batch_size = FLAGS.train_batch_size hvd_rank = 0 config = tf.compat.v1.ConfigProto() if FLAGS.horovod: global_batch_size = FLAGS.train_batch_size * hvd.size() master_process = (hvd.rank() == 0) hvd_rank = hvd.rank() config.gpu_options.visible_device_list = str(hvd.local_rank()) if hvd.size() > 1: training_hooks.append(hvd.BroadcastGlobalVariablesHook(0)) if FLAGS.use_xla: config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1 tf.enable_resource_variables() run_config = tf.estimator.RunConfig( model_dir=FLAGS.output_dir if master_process else None, session_config=config, save_checkpoints_steps=FLAGS.save_checkpoints_steps if master_process else None, keep_checkpoint_max=1) if master_process: tf.compat.v1.logging.info("***** Configuaration *****") for key in FLAGS.__flags.keys(): tf.compat.v1.logging.info(' {}: {}'.format( key, getattr(FLAGS, key))) tf.compat.v1.logging.info("**************************") train_examples = None num_train_steps = None num_warmup_steps = None training_hooks.append(LogTrainRunHook(global_batch_size, hvd_rank)) if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / global_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) start_index = 0 end_index = len(train_examples) tmp_filenames = [os.path.join(FLAGS.output_dir, "train.tf_record")] if FLAGS.horovod: tmp_filenames = [ os.path.join(FLAGS.output_dir, "train.tf_record{}".format(i)) for i in range(hvd.size()) ] num_examples_per_rank = len(train_examples) // hvd.size() remainder = len(train_examples) % hvd.size() if hvd.rank() < remainder: start_index = hvd.rank() * (num_examples_per_rank + 1) end_index = start_index + num_examples_per_rank + 1 else: start_index = hvd.rank() * num_examples_per_rank + remainder end_index = start_index + (num_examples_per_rank) model_fn = model_fn_builder(bert_config=bert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate if not FLAGS.horovod else FLAGS.learning_rate * hvd.size(), num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_one_hot_embeddings=False, hvd=None if not FLAGS.horovod else hvd, amp=FLAGS.amp) estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) if FLAGS.do_train: file_based_convert_examples_to_features( train_examples[start_index:end_index], label_list, FLAGS.max_seq_length, tokenizer, tmp_filenames[hvd_rank]) tf.compat.v1.logging.info("***** Running training *****") tf.compat.v1.logging.info(" Num examples = %d", len(train_examples)) tf.compat.v1.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.compat.v1.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_file=tmp_filenames, batch_size=FLAGS.train_batch_size, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, hvd=None if not FLAGS.horovod else hvd) train_start_time = time.time() estimator.train(input_fn=train_input_fn, max_steps=num_train_steps, hooks=training_hooks) train_time_elapsed = time.time() - train_start_time train_time_wo_overhead = training_hooks[-1].total_time avg_sentences_per_second = num_train_steps * global_batch_size * 1.0 / train_time_elapsed ss_sentences_per_second = ( num_train_steps - training_hooks[-1].skipped ) * global_batch_size * 1.0 / train_time_wo_overhead if master_process: tf.compat.v1.logging.info("-----------------------------") tf.compat.v1.logging.info( "Total Training Time = %0.2f for Sentences = %d", train_time_elapsed, num_train_steps * global_batch_size) tf.compat.v1.logging.info( "Total Training Time W/O Overhead = %0.2f for Sentences = %d", train_time_wo_overhead, (num_train_steps - training_hooks[-1].skipped) * global_batch_size) tf.compat.v1.logging.info( "Throughput Average (sentences/sec) with overhead = %0.2f", avg_sentences_per_second) tf.compat.v1.logging.info( "Throughput Average (sentences/sec) = %0.2f", ss_sentences_per_second) dllogging.logger.log( step=(), data={"throughput_train": ss_sentences_per_second}, verbosity=Verbosity.DEFAULT) tf.compat.v1.logging.info("-----------------------------") if FLAGS.do_eval and master_process: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.compat.v1.logging.info("***** Running evaluation *****") tf.compat.v1.logging.info( " Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.compat.v1.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None eval_drop_remainder = False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, batch_size=FLAGS.eval_batch_size, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.io.gfile.GFile(output_eval_file, "w") as writer: tf.compat.v1.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.compat.v1.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict and master_process: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.compat.v1.logging.info("***** Running prediction*****") tf.compat.v1.logging.info( " Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.compat.v1.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, batch_size=FLAGS.predict_batch_size, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) eval_hooks = [LogEvalRunHook(FLAGS.predict_batch_size)] eval_start_time = time.time() output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.io.gfile.GFile(output_predict_file, "w") as writer: num_written_lines = 0 tf.compat.v1.logging.info("***** Predict results *****") for prediction in estimator.predict(input_fn=predict_input_fn, hooks=eval_hooks, yield_single_examples=True): probabilities = prediction["probabilities"] output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" writer.write(output_line) num_written_lines += 1 assert num_written_lines == num_actual_predict_examples eval_time_elapsed = time.time() - eval_start_time time_list = eval_hooks[-1].time_list time_list.sort() # Removing outliers (init/warmup) in throughput computation. eval_time_wo_overhead = sum(time_list[:int(len(time_list) * 0.99)]) num_sentences = (int(len(time_list) * 0.99)) * FLAGS.predict_batch_size avg = np.mean(time_list) cf_50 = max(time_list[:int(len(time_list) * 0.50)]) cf_90 = max(time_list[:int(len(time_list) * 0.90)]) cf_95 = max(time_list[:int(len(time_list) * 0.95)]) cf_99 = max(time_list[:int(len(time_list) * 0.99)]) cf_100 = max(time_list[:int(len(time_list) * 1)]) ss_sentences_per_second = num_sentences * 1.0 / eval_time_wo_overhead tf.compat.v1.logging.info("-----------------------------") tf.compat.v1.logging.info( "Total Inference Time = %0.2f for Sentences = %d", eval_time_elapsed, eval_hooks[-1].count * FLAGS.predict_batch_size) tf.compat.v1.logging.info( "Total Inference Time W/O Overhead = %0.2f for Sentences = %d", eval_time_wo_overhead, num_sentences) tf.compat.v1.logging.info("Summary Inference Statistics") tf.compat.v1.logging.info("Batch size = %d", FLAGS.predict_batch_size) tf.compat.v1.logging.info("Sequence Length = %d", FLAGS.max_seq_length) tf.compat.v1.logging.info("Precision = %s", "fp16" if FLAGS.amp else "fp32") tf.compat.v1.logging.info("Latency Confidence Level 50 (ms) = %0.2f", cf_50 * 1000) tf.compat.v1.logging.info("Latency Confidence Level 90 (ms) = %0.2f", cf_90 * 1000) tf.compat.v1.logging.info("Latency Confidence Level 95 (ms) = %0.2f", cf_95 * 1000) tf.compat.v1.logging.info("Latency Confidence Level 99 (ms) = %0.2f", cf_99 * 1000) tf.compat.v1.logging.info("Latency Confidence Level 100 (ms) = %0.2f", cf_100 * 1000) tf.compat.v1.logging.info("Latency Average (ms) = %0.2f", avg * 1000) tf.compat.v1.logging.info("Throughput Average (sentences/sec) = %0.2f", ss_sentences_per_second) dllogging.logger.log(step=(), data={"throughput_val": ss_sentences_per_second}, verbosity=Verbosity.DEFAULT) tf.compat.v1.logging.info("-----------------------------")
def main(_): tf.compat.v1.enable_resource_variables() logging.set_verbosity(logging.INFO) tf.enable_resource_variables() train_eval(FLAGS.root_dir, num_iterations=FLAGS.num_iterations)
import time import os import logging import numpy as np import tensorflow as tf import gym import argparse import shutil from replay_buffer.segment_tree import ReplayBuffer from simulator.env_ops import MultiThreadEnv from teflon.policy.SAC import SAC as SAC from teflon.multi_step import MultistepAggregator tf.enable_resource_variables() def explorer(env, policy, start_transitions=30000, explorer_noise=0.1, initial_random=True): """ explorer makes transitions from environment (called Actor in Ape-X paper). :param MultiThreadEnv env: :param SAC policy: :param int start_transitions: transition numbers collected from random actions. :param bool initial_random: whether it uses random actions at first or not During finetuning this is set to False.
def main(_): os.environ[ "TF_XLA_FLAGS"] = "--tf_xla_enable_lazy_compilation=false" #causes memory fragmentation for bert leading to OOM tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) dllogging = utils.dllogger_class.dllogger_class(FLAGS.dllog_path) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") if FLAGS.horovod: import horovod.tensorflow as hvd hvd.init() bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) tf.io.gfile.makedirs(FLAGS.output_dir) input_files = [] for input_file_dir in FLAGS.input_files_dir.split(","): input_files.extend(tf.io.gfile.glob(os.path.join(input_file_dir, "*"))) if FLAGS.horovod and len(input_files) < hvd.size(): raise ValueError("Input Files must be sharded") if FLAGS.use_fp16 and FLAGS.manual_fp16: raise ValueError( "AMP and Manual Mixed Precision Training are both activated! Error" ) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 config = tf.compat.v1.ConfigProto() if FLAGS.horovod: config.gpu_options.visible_device_list = str(hvd.local_rank()) if hvd.rank() == 0: tf.compat.v1.logging.info("***** Configuaration *****") for key in FLAGS.__flags.keys(): tf.compat.v1.logging.info(' {}: {}'.format( key, getattr(FLAGS, key))) tf.compat.v1.logging.info("**************************") # config.gpu_options.per_process_gpu_memory_fraction = 0.7 if FLAGS.use_xla: config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1 config.graph_options.rewrite_options.memory_optimization = rewriter_config_pb2.RewriterConfig.NO_MEM_OPT tf.enable_resource_variables() run_config = tf.estimator.RunConfig( model_dir=FLAGS.output_dir, session_config=config, save_checkpoints_steps=FLAGS.save_checkpoints_steps if not FLAGS.horovod or hvd.rank() == 0 else None, save_summary_steps=FLAGS.save_checkpoints_steps if not FLAGS.horovod or hvd.rank() == 0 else None, # This variable controls how often estimator reports examples/sec. # Default value is every 100 steps. # When --report_loss is True, we set to very large value to prevent # default info reporting from estimator. # Ideally we should set it to None, but that does not work. log_step_count_steps=10000 if FLAGS.report_loss else 100) model_fn = model_fn_builder(bert_config=bert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate if not FLAGS.horovod else FLAGS.learning_rate * hvd.size(), num_train_steps=FLAGS.num_train_steps, num_warmup_steps=FLAGS.num_warmup_steps, use_one_hot_embeddings=False, hvd=None if not FLAGS.horovod else hvd) estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) if FLAGS.do_train: training_hooks = [] if FLAGS.horovod and hvd.size() > 1: training_hooks.append(hvd.BroadcastGlobalVariablesHook(0)) if (not FLAGS.horovod or hvd.rank() == 0): global_batch_size = FLAGS.train_batch_size * FLAGS.num_accumulation_steps if not FLAGS.horovod else FLAGS.train_batch_size * FLAGS.num_accumulation_steps * hvd.size( ) training_hooks.append( _LogSessionRunHook(global_batch_size, FLAGS.num_accumulation_steps, dllogging, FLAGS.display_loss_steps, FLAGS.save_checkpoints_steps, FLAGS.report_loss)) tf.compat.v1.logging.info("***** Running training *****") tf.compat.v1.logging.info(" Batch size = %d", FLAGS.train_batch_size) train_input_fn = input_fn_builder( input_files=input_files, batch_size=FLAGS.train_batch_size, max_seq_length=FLAGS.max_seq_length, max_predictions_per_seq=FLAGS.max_predictions_per_seq, is_training=True, hvd=None if not FLAGS.horovod else hvd) train_start_time = time.time() estimator.train(input_fn=train_input_fn, hooks=training_hooks, max_steps=FLAGS.num_train_steps) train_time_elapsed = time.time() - train_start_time if (not FLAGS.horovod or hvd.rank() == 0): train_time_wo_overhead = training_hooks[-1].total_time avg_sentences_per_second = FLAGS.num_train_steps * global_batch_size * 1.0 / train_time_elapsed ss_sentences_per_second = ( FLAGS.num_train_steps - training_hooks[-1].skipped ) * global_batch_size * 1.0 / train_time_wo_overhead tf.compat.v1.logging.info("-----------------------------") tf.compat.v1.logging.info( "Total Training Time = %0.2f for Sentences = %d", train_time_elapsed, FLAGS.num_train_steps * global_batch_size) tf.compat.v1.logging.info( "Total Training Time W/O Overhead = %0.2f for Sentences = %d", train_time_wo_overhead, (FLAGS.num_train_steps - training_hooks[-1].skipped) * global_batch_size) tf.compat.v1.logging.info( "Throughput Average (sentences/sec) with overhead = %0.2f", avg_sentences_per_second) tf.compat.v1.logging.info( "Throughput Average (sentences/sec) = %0.2f", ss_sentences_per_second) dllogging.logger.log( step=(), data={"throughput_train": ss_sentences_per_second}, verbosity=Verbosity.DEFAULT) tf.compat.v1.logging.info("-----------------------------") if FLAGS.do_eval and (not FLAGS.horovod or hvd.rank() == 0): tf.compat.v1.logging.info("***** Running evaluation *****") tf.compat.v1.logging.info(" Batch size = %d", FLAGS.eval_batch_size) eval_files = [] for eval_file_dir in FLAGS.eval_files_dir.split(","): eval_files.extend( tf.io.gfile.glob(os.path.join(eval_file_dir, "*"))) eval_input_fn = input_fn_builder( input_files=eval_files, batch_size=FLAGS.eval_batch_size, max_seq_length=FLAGS.max_seq_length, max_predictions_per_seq=FLAGS.max_predictions_per_seq, is_training=False, hvd=None if not FLAGS.horovod else hvd) eval_hooks = [LogEvalRunHook(FLAGS.eval_batch_size)] eval_start_time = time.time() result = estimator.evaluate(input_fn=eval_input_fn, steps=FLAGS.max_eval_steps, hooks=eval_hooks) eval_time_elapsed = time.time() - eval_start_time time_list = eval_hooks[-1].time_list time_list.sort() # Removing outliers (init/warmup) in throughput computation. eval_time_wo_overhead = sum(time_list[:int(len(time_list) * 0.99)]) num_sentences = (int(len(time_list) * 0.99)) * FLAGS.eval_batch_size ss_sentences_per_second = num_sentences * 1.0 / eval_time_wo_overhead tf.compat.v1.logging.info("-----------------------------") tf.compat.v1.logging.info( "Total Inference Time = %0.2f for Sentences = %d", eval_time_elapsed, eval_hooks[-1].count * FLAGS.eval_batch_size) tf.compat.v1.logging.info( "Total Inference Time W/O Overhead = %0.2f for Sentences = %d", eval_time_wo_overhead, num_sentences) tf.compat.v1.logging.info("Summary Inference Statistics on EVAL set") tf.compat.v1.logging.info("Batch size = %d", FLAGS.eval_batch_size) tf.compat.v1.logging.info("Sequence Length = %d", FLAGS.max_seq_length) tf.compat.v1.logging.info("Precision = %s", "fp16" if FLAGS.use_fp16 else "fp32") tf.compat.v1.logging.info("Throughput Average (sentences/sec) = %0.2f", ss_sentences_per_second) dllogging.logger.log(step=(), data={"throughput_val": ss_sentences_per_second}, verbosity=Verbosity.DEFAULT) tf.compat.v1.logging.info("-----------------------------") output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.io.gfile.GFile(output_eval_file, "w") as writer: tf.compat.v1.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.compat.v1.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key])))
def main(_): print(FLAGS) print(tf.__version__, "==tensorflow version==") os.environ[ 'NCCL_LL_THRESHOLD'] = '0' # to avoid collective reduce hangs on # os.environ['TF_ENABLE_WHILE_V2'] = '1' # os.environ['TF_ENABLE_COND_V2'] = '1' tf.enable_resource_variables() init_checkpoint = os.path.join(FLAGS.buckets, FLAGS.init_checkpoint) train_file = [] for file in FLAGS.train_file.split(","): train_file_path = os.path.join(FLAGS.buckets, file) train_file.append(train_file_path) # train_file = os.path.join(FLAGS.buckets, FLAGS.train_file) # dev_file = os.path.join(FLAGS.buckets, FLAGS.dev_file) dev_file = [] for file in FLAGS.dev_file.split(","): dev_file_path = os.path.join(FLAGS.buckets, file) dev_file.append(dev_file_path) checkpoint_dir = os.path.join(FLAGS.buckets, FLAGS.model_output) print(init_checkpoint, train_file, dev_file, checkpoint_dir) if FLAGS.distribution_strategy == "MirroredStrategy": cross_tower_ops = cross_tower_ops_lib.AllReduceCrossTowerOps( "nccl", 10, 0, 0) distribution = tf.contrib.distribute.MirroredStrategy( num_gpus=FLAGS.num_gpus, cross_tower_ops=cross_tower_ops) worker_count = FLAGS.num_gpus else: cross_tower_ops = cross_tower_ops_lib.AllReduceCrossTowerOps( "nccl", 10, 0, 0) distribution = tf.contrib.distribute.MirroredStrategy( num_gpus=FLAGS.num_gpus, cross_tower_ops=cross_tower_ops) worker_count = FLAGS.num_gpus sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) run_config = tf.estimator.RunConfig( keep_checkpoint_max=10, # model_dir=checkpoint_dir, train_distribute=distribution, # tf 1.8 # distribute=distribution, # tf 1.4 session_config=sess_config, save_checkpoints_secs=None, save_checkpoints_steps=None, log_step_count_steps=100) task_index = run_config.task_id is_chief = run_config.is_chief print("==worker_count==", worker_count, "==local_rank==", task_index, "==is is_chief==", is_chief) cluster = "" target = "" print(FLAGS) if FLAGS.mode == "single_task": train_eval_api = train_eval elif FLAGS.mode == "multi_task": train_eval_api = multitask_train_eval elif FLAGS.mode == 'distillation': train_eval_api = distillation_train_eval elif FLAGS.mode == "electra": train_eval_api = pretrain_train_eval if FLAGS.mode == "electra": train_eval_api.monitored_estimator( FLAGS=FLAGS, worker_count=worker_count, task_index=task_index, cluster=cluster, is_chief=is_chief, init_checkpoint=init_checkpoint, train_file=train_file, dev_file=dev_file, checkpoint_dir=checkpoint_dir, run_config=run_config, distribution_strategy=FLAGS.distribution_strategy, profiler=FLAGS.profiler, parse_type=FLAGS.parse_type, rule_model=FLAGS.rule_model, train_op=FLAGS.train_op, running_type=FLAGS.running_type, decay=FLAGS.decay, warmup=FLAGS.warmup, input_target=FLAGS.input_target, distillation=FLAGS.distillation, temperature=FLAGS.temperature, distillation_ratio=FLAGS.distillation_ratio, electra_mode=FLAGS.electra_mode, sharing_mode=FLAGS.sharing_mode, attention_type=FLAGS.attention_type, ues_token_type=FLAGS.ues_token_type, gumbel_anneal=FLAGS.gumbel_anneal, annealed_mask_prob=FLAGS.annealed_mask_prob, joint_train=FLAGS.joint_train, optimization_type=FLAGS.optimization_type, gen_disc_type=FLAGS.gen_disc_type, train_op_type=FLAGS.train_op_type, mask_method=FLAGS.mask_method, minmax_mode=FLAGS.minmax_mode, seq_type=FLAGS.seq_type, mask_type=FLAGS.mask_type) # use_tpu=FLAGS.use_tpu) else: train_eval_api.monitored_estimator( FLAGS=FLAGS, worker_count=worker_count, task_index=task_index, cluster=cluster, is_chief=is_chief, target=target, init_checkpoint=init_checkpoint, train_file=train_file, dev_file=dev_file, checkpoint_dir=checkpoint_dir, run_config=run_config, distribution_strategy=FLAGS.distribution_strategy, profiler=FLAGS.profiler, parse_type=FLAGS.parse_type, rule_model=FLAGS.rule_model, train_op=FLAGS.train_op, running_type=FLAGS.running_type, decay=FLAGS.decay, warmup=FLAGS.warmup, input_target=FLAGS.input_target, distillation=FLAGS.distillation, temperature=FLAGS.temperature, distillation_ratio=FLAGS.distillation_ratio, attention_type=FLAGS.attention_type, ues_token_type=FLAGS.ues_token_type, seq_type=FLAGS.seq_type, mask_type=FLAGS.mask_type)
def main(_): # If using update_damping_immediately resource variables must be enabled. if FLAGS.update_damping_immediately: tf.enable_resource_variables() if not FLAGS.use_sua_approx: if FLAGS.use_custom_patches_op: kfac.fisher_factors.set_global_constants( use_patches_second_moment_op=True ) else: # Temporary measure to save memory with giant batches: kfac.fisher_factors.set_global_constants( sub_sample_inputs=True, inputs_to_extract_patches_factor=0.2) tf.set_random_seed(FLAGS.seed) (train_op, opt, batch_loss, batch_error, batch_size_schedule, batch_size, eval_loss, eval_error, eval_loss_avg, eval_error_avg) = construct_train_quants() global_step = tf.train.get_or_create_global_step() if FLAGS.optimizer == 'kfac': # We need to put the control depenency on train_op here so that we are # guaranteed to get the up-to-date values of these various quantities. # Otherwise there is a race condition and we might get the old values, # nondeterministically. Another solution would be to get these values in # a separate sess.run call, but this can sometimes cause problems with # training frameworks that use hooks (see the comments below). with tf.control_dependencies([train_op]): learning_rate = opt.learning_rate momentum = opt.momentum damping = opt.damping rho = opt.rho qmodel_change = opt.qmodel_change # Without setting allow_soft_placement=True there will be problems when # the optimizer tries to place certain ops like "mod" on the GPU (which isn't # supported). config = tf.ConfigProto(allow_soft_placement=True) # Train model. # It's good practice to put everything into a single sess.run call. The # reason is that certain "training frameworks" like to run hooks at each # sess.run call, and there is an implicit expectation there will only # be one sess.run call every "iteration" of the "optimizer". For example, # a framework might try to print the loss at each sess.run call, causing # the mini-batch to be advanced, thus completely breaking the "cached # batch" mechanism that the damping adaptation method may rely on. (Plus # there will also be the extra cost of having to reevaluate the loss # twice.) That being said we don't completely do that here because it's # inconvenient. with tf.train.MonitoredTrainingSession(save_checkpoint_secs=30, config=config) as sess: for _ in range(FLAGS.train_steps): i = sess.run(global_step) if FLAGS.use_batch_size_schedule: batch_size_ = batch_size_schedule[min(i, len(batch_size_schedule) - 1)] else: batch_size_ = FLAGS.batch_size if FLAGS.optimizer == 'kfac': (_, batch_loss_, batch_error_, learning_rate_, momentum_, damping_, rho_, qmodel_change_) = sess.run([train_op, batch_loss, batch_error, learning_rate, momentum, damping, rho, qmodel_change], feed_dict={batch_size: batch_size_}) else: _, batch_loss_, batch_error_ = sess.run( [train_op, batch_loss, batch_error], feed_dict={batch_size: batch_size_}) # Print training stats. tf.logging.info( 'iteration: %d', i) tf.logging.info( 'mini-batch size: %d | mini-batch loss = %f | mini-batch error = %f ', batch_size_, batch_loss_, batch_error_) if FLAGS.optimizer == 'kfac': tf.logging.info( 'learning_rate = %f | momentum = %f', learning_rate_, momentum_) tf.logging.info( 'damping = %f | rho = %f | qmodel_change = %f', damping_, rho_, qmodel_change_) # "Eval" here means just compute stuff on the full training set. if (i+1) % FLAGS.eval_every == 0: eval_loss_, eval_error_, eval_loss_avg_, eval_error_avg_ = sess.run( [eval_loss, eval_error, eval_loss_avg, eval_error_avg]) tf.logging.info('-----------------------------------------------------') tf.logging.info('eval_loss = %f | eval_error = %f', eval_loss_, eval_error_) tf.logging.info('eval_loss_avg = %f | eval_error_avg = %f', eval_loss_avg_, eval_error_avg_) tf.logging.info('-----------------------------------------------------') else: tf.logging.info('----')
def main(argv): del argv # Unused. tf.enable_resource_variables() tf.set_random_seed(FLAGS.seed) set_lr_schedule() set_custom_sparsity_map() folder_stub = os.path.join(FLAGS.training_method, str(FLAGS.end_sparsity), str(FLAGS.maskupdate_begin_step), str(FLAGS.maskupdate_end_step), str(FLAGS.maskupdate_frequency), str(FLAGS.drop_fraction), str(FLAGS.label_smoothing), str(FLAGS.weight_decay)) output_dir = FLAGS.output_dir if FLAGS.use_folder_stub: output_dir = os.path.join(output_dir, folder_stub) export_dir = os.path.join(output_dir, 'export_dir') # we pass the updated eval and train string to the params dictionary. params = {} params['output_dir'] = output_dir params['training_method'] = FLAGS.training_method params['use_tpu'] = FLAGS.use_tpu dataset_func = functools.partial( imagenet_input.ImageNetInput, data_dir=FLAGS.data_directory, transpose_input=False, num_parallel_calls=FLAGS.num_parallel_calls, use_bfloat16=False) imagenet_train, imagenet_eval = [dataset_func(is_training=is_training) for is_training in [True, False]] run_config = tpu_config.RunConfig( master=FLAGS.master, model_dir=output_dir, save_checkpoints_steps=FLAGS.steps_per_checkpoint, keep_checkpoint_max=FLAGS.keep_checkpoint_max, session_config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False), tpu_config=tpu_config.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_cores, tpu_job_name=FLAGS.tpu_job_name)) classifier = tpu_estimator.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=resnet_model_fn_w_pruning, params=params, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size) cpu_classifier = tpu_estimator.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=resnet_model_fn_w_pruning, params=params, config=run_config, train_batch_size=FLAGS.train_batch_size, export_to_tpu=False, eval_batch_size=FLAGS.eval_batch_size) if FLAGS.num_eval_images % FLAGS.eval_batch_size != 0: raise ValueError( 'eval_batch_size (%d) must evenly divide num_eval_images(%d)!' % (FLAGS.eval_batch_size, FLAGS.num_eval_images)) eval_steps = FLAGS.num_eval_images // FLAGS.eval_batch_size if FLAGS.mode == 'eval_once': ckpt_path = os.path.join(output_dir, FLAGS.eval_once_ckpt_prefix) dataset = imagenet_train if FLAGS.eval_on_train else imagenet_eval classifier.evaluate( input_fn=dataset.input_fn, steps=eval_steps, checkpoint_path=ckpt_path, name='{0}'.format(FLAGS.eval_once_ckpt_prefix)) elif FLAGS.mode == 'eval': # Run evaluation when there's a new checkpoint for ckpt in evaluation.checkpoints_iterator(output_dir): tf.logging.info('Starting to evaluate.') try: dataset = imagenet_train if FLAGS.eval_on_train else imagenet_eval classifier.evaluate( input_fn=dataset.input_fn, steps=eval_steps, checkpoint_path=ckpt, name='eval') # Terminate eval job when final checkpoint is reached global_step = int(os.path.basename(ckpt).split('-')[1]) if global_step >= FLAGS.train_steps: tf.logging.info( 'Evaluation finished after training step %d' % global_step) break except tf.errors.NotFoundError: logging('Checkpoint no longer exists,skipping checkpoint.') else: global_step = estimator._load_global_step_from_checkpoint_dir(output_dir) # Session run hooks to export model for prediction export_hook = ExportModelHook(cpu_classifier, export_dir) hooks = [export_hook] if FLAGS.mode == 'train': tf.logging.info('start training...') classifier.train( input_fn=imagenet_train.input_fn, hooks=hooks, max_steps=FLAGS.train_steps) else: assert FLAGS.mode == 'train_and_eval' tf.logging.info('start training and eval...') while global_step < FLAGS.train_steps: next_checkpoint = min(global_step + FLAGS.steps_per_eval, FLAGS.train_steps) classifier.train( input_fn=imagenet_train.input_fn, max_steps=next_checkpoint) global_step = next_checkpoint logging('Completed training up to step :', global_step) classifier.evaluate(input_fn=imagenet_eval.input_fn, steps=eval_steps)