def test_pprint_hparams(self): hparams = hparam.HParams( int_=1, str_="str", bool_=True, float_=1.1, list_int=[1, 2], none=None) # pylint: disable=g-inconsistent-quotes expected_string = r""" {'bool_': True, 'float_': 1.1, 'int_': 1, 'list_int': [1, 2], 'none': None, 'str_': 'str'}""" # pylint: enable=g-inconsistent-quotes self.assertEqual(expected_string, misc_utils.pprint_hparams(hparams))
def train(hparams, output_dir, env_problem_name, report_fn=None): """Train.""" env_fn = initialize_env_specs(hparams, env_problem_name) tf.logging.vlog(1, "HParams in trainer_model_free.train : %s", misc_utils.pprint_hparams(hparams)) tf.logging.vlog(1, "Using hparams.base_algo: %s", hparams.base_algo) learner = rl_utils.LEARNERS[hparams.base_algo](hparams.frame_stack_size, output_dir, output_dir, total_num_epochs=1) policy_hparams = trainer_lib.create_hparams(hparams.base_algo_params) rl_utils.update_hparams_from_hparams(policy_hparams, hparams, hparams.base_algo + "_") tf.logging.vlog(1, "Policy HParams : %s", misc_utils.pprint_hparams(policy_hparams)) total_steps = policy_hparams.epochs_num tf.logging.vlog(2, "total_steps: %d", total_steps) eval_every_epochs = policy_hparams.eval_every_epochs tf.logging.vlog(2, "eval_every_epochs: %d", eval_every_epochs) if eval_every_epochs == 0: eval_every_epochs = total_steps policy_hparams.eval_every_epochs = 0 metric_name = rl_utils.get_metric_name( sampling_temp=hparams.eval_sampling_temps[0], max_num_noops=hparams.eval_max_num_noops, clipped=False) tf.logging.vlog(1, "metric_name: %s", metric_name) eval_metrics_dir = os.path.join(output_dir, "eval_metrics") eval_metrics_dir = os.path.expanduser(eval_metrics_dir) tf.gfile.MakeDirs(eval_metrics_dir) eval_metrics_writer = tf.summary.FileWriter(eval_metrics_dir) def evaluate_on_new_model(model_dir_path): global step eval_metrics = rl_utils.evaluate_all_configs(hparams, model_dir_path) tf.logging.info("Agent eval metrics:\n{}".format( pprint.pformat(eval_metrics))) rl_utils.summarize_metrics(eval_metrics_writer, eval_metrics, step) if report_fn: report_fn(eval_metrics[metric_name], step) step += 1 policy_hparams.epochs_num = total_steps policy_hparams.save_models_every_epochs = eval_every_epochs learner.train(env_fn, policy_hparams, simulated=False, save_continuously=True, epoch=0, model_save_fn=evaluate_on_new_model)
def train(hparams, output_dir, env_problem_name, report_fn=None): """Train.""" env_fn = initialize_env_specs(hparams, env_problem_name) tf.logging.vlog(1, "HParams in trainer_model_free.train : %s", misc_utils.pprint_hparams(hparams)) tf.logging.vlog(1, "Using hparams.base_algo: %s", hparams.base_algo) learner = rl_utils.LEARNERS[hparams.base_algo]( hparams.frame_stack_size, output_dir, output_dir, total_num_epochs=1, distributional_size=hparams.get("distributional_size", 1), distributional_subscale=hparams.get("distributional_subscale", 0.04), distributional_threshold=hparams.get("distributional_threshold", 0.0), ) policy_hparams = trainer_lib.create_hparams(hparams.base_algo_params) rl_utils.update_hparams_from_hparams( policy_hparams, hparams, hparams.base_algo + "_" ) tf.logging.vlog(1, "Policy HParams : %s", misc_utils.pprint_hparams(policy_hparams)) # TODO(konradczechowski): remove base_algo dependance, when evaluation method # will be decided if hparams.base_algo == "ppo": total_steps = policy_hparams.epochs_num tf.logging.vlog(2, "total_steps: %d", total_steps) eval_every_epochs = policy_hparams.eval_every_epochs tf.logging.vlog(2, "eval_every_epochs: %d", eval_every_epochs) if eval_every_epochs == 0: eval_every_epochs = total_steps policy_hparams.eval_every_epochs = 0 metric_name = rl_utils.get_metric_name( sampling_temp=hparams.eval_sampling_temps[0], max_num_noops=hparams.eval_max_num_noops, clipped=False ) tf.logging.vlog(1, "metric_name: %s", metric_name) eval_metrics_dir = os.path.join(output_dir, "eval_metrics") eval_metrics_dir = os.path.expanduser(eval_metrics_dir) tf.gfile.MakeDirs(eval_metrics_dir) eval_metrics_writer = tf.summary.FileWriter(eval_metrics_dir) def evaluate_on_new_model(model_dir_path): global step eval_metrics = rl_utils.evaluate_all_configs(hparams, model_dir_path) tf.logging.info( "Agent eval metrics:\n{}".format(pprint.pformat(eval_metrics))) rl_utils.summarize_metrics(eval_metrics_writer, eval_metrics, step) if report_fn: report_fn(eval_metrics[metric_name], step) step += 1 policy_hparams.epochs_num = total_steps policy_hparams.save_models_every_epochs = eval_every_epochs else: def evaluate_on_new_model(model_dir_path): del model_dir_path raise NotImplementedError( "This function is currently implemented only for ppo") learner.train(env_fn, policy_hparams, simulated=False, save_continuously=True, epoch=0, model_save_fn=evaluate_on_new_model)
def train(hparams, output_dir, report_fn=None): """Train.""" hparams = initialize_env_specs(hparams) tf.logging.vlog(1, "HParams in trainer_model_free.train : %s", misc_utils.pprint_hparams(hparams)) tf.logging.vlog(1, "Using hparams.base_algo: %s", hparams.base_algo) learner = rl_utils.LEARNERS[hparams.base_algo](hparams.frame_stack_size, output_dir, output_dir, total_num_epochs=1) policy_hparams = trainer_lib.create_hparams(hparams.base_algo_params) rl_utils.update_hparams_from_hparams(policy_hparams, hparams, hparams.base_algo + "_") tf.logging.vlog(1, "Policy HParams : %s", misc_utils.pprint_hparams(policy_hparams)) total_steps = policy_hparams.epochs_num tf.logging.vlog(2, "total_steps: %d", total_steps) eval_every_epochs = policy_hparams.eval_every_epochs tf.logging.vlog(2, "eval_every_epochs: %d", eval_every_epochs) if eval_every_epochs == 0: eval_every_epochs = total_steps policy_hparams.eval_every_epochs = 0 steps = list(range(eval_every_epochs, total_steps + 1, eval_every_epochs)) if not steps or steps[-1] < eval_every_epochs: steps.append(eval_every_epochs) tf.logging.vlog(1, "steps: [%s]", ",".join([str(s) for s in steps])) metric_name = rl_utils.get_metric_name( sampling_temp=hparams.eval_sampling_temps[0], max_num_noops=hparams.eval_max_num_noops, clipped=False) tf.logging.vlog(1, "metric_name: %s", metric_name) for i, step in enumerate(steps): tf.logging.info("Starting training iteration [%d] for [%d] steps.", i, step) policy_hparams.epochs_num = step learner.train(hparams.env_fn, policy_hparams, simulated=False, save_continuously=True, epoch=0) tf.logging.info("Ended training iteration [%d] for [%d] steps.", i, step) eval_metrics = rl_utils.evaluate_all_configs(hparams, output_dir) tf.logging.info("Agent eval metrics:\n{}".format( pprint.pformat(eval_metrics))) if report_fn: report_fn(eval_metrics[metric_name], step)