def main(_): seed = common.set_random_seed(FLAGS.random_seed) gin_file = common.get_gin_file() gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param) algorithm_ctor = gin.query_parameter( 'TrainerConfig.algorithm_ctor').scoped_configurable_fn env = create_environment(nonparallel=True, seed=seed) env.reset() common.set_global_env(env) config = policy_trainer.TrainerConfig(root_dir="") data_transformer = create_data_transformer(config.data_transformer_ctor, env.observation_spec()) config.data_transformer = data_transformer observation_spec = data_transformer.transformed_observation_spec common.set_transformed_observation_spec(observation_spec) algorithm = algorithm_ctor( observation_spec=observation_spec, action_spec=env.action_spec(), config=config) try: policy_trainer.play( FLAGS.root_dir, env, algorithm, checkpoint_step=FLAGS.checkpoint_step or "latest", epsilon_greedy=FLAGS.epsilon_greedy, num_episodes=FLAGS.num_episodes, max_episode_length=FLAGS.max_episode_length, sleep_time_per_step=FLAGS.sleep_time_per_step, record_file=FLAGS.record_file, ignored_parameter_prefixes=FLAGS.ignored_parameter_prefixes.split( ",") if FLAGS.ignored_parameter_prefixes else []) finally: env.close()
def run(self): """Run trainings with all possible parameter combinations in configured space """ # parsing gin configuration here to make all jobs have same copy # of base configuration (gin file may be changed occasionally) gin_file = common.get_gin_file() gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param) param_keys = self._conf.param_keys param_values = self._conf.param_values max_worker_num = self._conf.max_worker_num process_pool = multiprocessing.Pool(processes=max_worker_num, maxtasksperchild=1) device_queue = self._init_device_queue(max_worker_num) task_count = 0 for values in itertools.product(*param_values): parameters = dict(zip(param_keys, values)) for repeat in range(self._conf.repeats): root_dir = "%s/%s" % (FLAGS.root_dir, self._generate_run_name( parameters, task_count, repeat)) process_pool.apply_async( func=self._worker, args=[root_dir, parameters, device_queue], error_callback=lambda e: logging.error(e)) task_count += 1 process_pool.close() process_pool.join()
def _worker(self, root_dir, parameters): logging.set_verbosity(logging.INFO) gin_file = common.get_gin_file() if FLAGS.gin_file: common.copy_gin_configs(root_dir, gin_file) gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param) with gin.unlock_config(): gin.parse_config(['%s=%s' % (k, v) for k, v in parameters.items()]) train_eval(root_dir)
def main(_): gin_file = common.get_gin_file() gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param) algorithm_ctor = gin.query_parameter('TrainerConfig.algorithm_ctor') env = create_environment(num_parallel_environments=1) algorithm = algorithm_ctor(env) policy_trainer.play(FLAGS.root_dir, env, algorithm, checkpoint_name=FLAGS.checkpoint_name, greedy_predict=FLAGS.greedy_predict, random_seed=FLAGS.random_seed, num_episodes=FLAGS.num_episodes, sleep_time_per_step=FLAGS.sleep_time_per_step, record_file=FLAGS.record_file)
def main(_): logging.set_verbosity(logging.INFO) gin_file = common.get_gin_file() if FLAGS.gin_file and not FLAGS.play: common.copy_gin_configs(FLAGS.root_dir, gin_file) gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param) if FLAGS.play: with gin.unlock_config(): gin.bind_parameter( '__main__.play.algorithm_ctor', gin.query_parameter('TrainerConfig.algorithm_ctor')) play(FLAGS.root_dir) else: train_eval(FLAGS.root_dir)
def main(_): seed = common.set_random_seed(FLAGS.random_seed, not FLAGS.use_tf_functions) gin_file = common.get_gin_file() gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param) algorithm_ctor = gin.query_parameter( 'TrainerConfig.algorithm_ctor').scoped_configurable_fn env = create_environment(nonparallel=True, seed=seed) env.reset() common.set_global_env(env) algorithm = algorithm_ctor(observation_spec=env.observation_spec(), action_spec=env.action_spec()) policy_trainer.play(FLAGS.root_dir, env, algorithm, checkpoint_name=FLAGS.checkpoint_name, epsilon_greedy=FLAGS.epsilon_greedy, num_episodes=FLAGS.num_episodes, sleep_time_per_step=FLAGS.sleep_time_per_step, record_file=FLAGS.record_file, use_tf_functions=FLAGS.use_tf_functions) env.pyenv.close()
def main(_): gin_file = common.get_gin_file() FLAGS.alsologtostderr = True gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param) train_eval(FLAGS.ml_type, FLAGS.root_dir)
def main(_): gin_file = common.get_gin_file() gin.parse_config_files_and_bindings(gin_file, FLAGS.gin_param) train_eval(FLAGS.root_dir)
def _train(self): for env in self._envs: env.reset() if self._eval_env: self._eval_env.reset() begin_iter_num = int(self._trainer_progress._iter_num) iter_num = begin_iter_num checkpoint_interval = math.ceil( (self._num_iterations or self._num_env_steps) / self._num_checkpoints) if self._num_iterations: time_to_checkpoint = self._trainer_progress._iter_num + checkpoint_interval else: time_to_checkpoint = self._trainer_progress._env_steps + checkpoint_interval while True: t0 = time.time() with record_time("time/train_iter"): train_steps = self._algorithm.train_iter() t = time.time() - t0 logging.log_every_n_seconds( logging.INFO, '%s -> %s: %s time=%.3f throughput=%0.2f' % (common.get_gin_file(), [ os.path.basename(self._root_dir.strip('/')) ], iter_num, t, int(train_steps) / t), n_seconds=1) if self._evaluate and (iter_num + 1) % self._eval_interval == 0: self._eval() if iter_num == begin_iter_num: self._summarize_training_setting() # check termination env_steps_metric = self._algorithm.get_step_metrics()[1] total_time_steps = env_steps_metric.result() iter_num += 1 self._trainer_progress.update(iter_num, total_time_steps) if ((self._num_iterations and iter_num >= self._num_iterations) or (self._num_env_steps and total_time_steps >= self._num_env_steps)): # Evaluate before exiting so that the eval curve shown in TB # will align with the final iter/env_step. if self._evaluate: self._eval() break if ((self._num_iterations and iter_num >= time_to_checkpoint) or (self._num_env_steps and total_time_steps >= time_to_checkpoint)): self._save_checkpoint() time_to_checkpoint += checkpoint_interval elif self._checkpoint_requested: logging.info("Saving checkpoint upon request...") self._save_checkpoint() self._checkpoint_requested = False if self._debug_requested: self._debug_requested = False import pdb pdb.set_trace()