def run(args, env_fn, policy_fn, get_weights_fn, set_weights_fn): initialize_logger( logging_level=logging.getLevelName(args.logging_level)) if args.n_env > 1: args.n_explorer = 1 elif args.n_explorer is None: args.n_explorer = multiprocessing.cpu_count() - 1 assert args.n_explorer > 0, "[error] number of explorers must be positive integer" env = env_fn() global_rb, queues, is_training_done, lock, trained_steps = prepare_experiment(env, args) noise = 0.3 tasks = [] # Add explorers if args.n_env > 1: tasks.append(Process( target=explorer, args=[global_rb, queues[0], trained_steps, is_training_done, lock, env_fn, policy_fn, set_weights_fn, noise, args.n_env, args.n_thread, args.local_buffer_size, args.episode_max_steps, args.gpu_explorer])) else: for i in range(args.n_explorer): tasks.append(Process( target=explorer, args=[global_rb, queues[i], trained_steps, is_training_done, lock, env_fn, policy_fn, set_weights_fn, noise, args.n_env, args.n_thread, args.local_buffer_size, args.episode_max_steps, args.gpu_explorer])) # Add learner tasks.append(Process( target=learner, args=[global_rb, trained_steps, is_training_done, lock, env_fn(), policy_fn, get_weights_fn, args.n_training, args.param_update_freq, args.test_freq, args.gpu_learner, queues])) # Add evaluator tasks.append(Process( target=evaluator, args=[is_training_done, env_fn(), policy_fn, set_weights_fn, queues[-1], args.gpu_evaluator, args.save_model_interval])) for task in tasks: task.start() for task in tasks: task.join()
def __init__( self, policy, env, args, test_env=None): self._set_from_args(args) self._policy = policy self._env = env self._test_env = self._env if test_env is None else test_env if self._normalize_obs: assert isinstance(env.observation_space, Box) self._obs_normalizer = EmpiricalNormalizer( shape=env.observation_space.shape) # prepare log directory self._output_dir = prepare_output_dir( args=args, user_specified_dir=self._logdir, suffix="{}_{}".format(self._policy.policy_name, args.dir_suffix)) self.logger = initialize_logger( logging_level=logging.getLevelName(args.logging_level), output_dir=self._output_dir) if args.evaluate: assert args.model_dir is not None self._set_check_point(args.model_dir) # prepare TensorBoard output self.writer = tf.summary.create_file_writer(self._output_dir) self.writer.set_as_default()
def __init__(self, policy, env, args, test_env=None): self._policy = policy self._env = env self._test_env = self._env if test_env is None else test_env self._set_from_args(args) # prepare log directory self._output_dir = prepare_output_dir(args=args, user_specified_dir="./results", suffix="{}_{}".format( self._policy.policy_name, args.dir_suffix)) self.logger = initialize_logger(logging_level=logging.getLevelName( args.logging_level), output_dir=self._output_dir) # Save and restore model checkpoint = tf.train.Checkpoint(policy=self._policy) self.checkpoint_manager = tf.train.CheckpointManager( checkpoint, directory=self._output_dir, max_to_keep=5) if args.model_dir is not None: assert os.path.isdir(args.model_dir) path_ckpt = tf.train.latest_checkpoint(args.model_dir) checkpoint.restore(path_ckpt) self.logger.info("Restored {}".format(path_ckpt)) # prepare TensorBoard output self.writer = tf.summary.create_file_writer(self._output_dir) self.writer.set_as_default()
def __init__( self, policy, env, params, test_env=None): """Initializing the training instance.""" self._params = params self._set_from_params() self._policy = policy self._env = env self._test_env = self._env if test_env is None else test_env args = self._get_args_from_params() # Convolutional Autoencoder: self._CAE = CAE(pooling=self._params["cae"]["pooling"], latent_dim=self._params["cae"]["latent_dim"], input_shape=self._env.workspace.shape, conv_filters=self._params["cae"]["conv_filters"]) self._CAE.build(input_shape=(1, self._env.workspace.shape[0], self._env.workspace.shape[1], 1)) self._CAE.load_weights(filepath=self._params["cae"]["weights_path"]) for layer, _ in self._CAE._get_trainable_state().items(): layer.trainable = False #Initialize array for trajectory storage self.trajectory=[] # Initialize workspace relabeler: self._relabeler = PointrobotRelabeler( ws_shape=(self._env.grid_size, self._env.grid_size), mode=params["trainer"]["relabeling_mode"], remove_zigzaging=params["trainer"]["remove_zigzaging"] ) # prepare log directory self._output_dir = prepare_output_dir( args=args, user_specified_dir=self._logdir, suffix="{}_{}".format(self._policy.policy_name, params["trainer"]["dir_suffix"])) self.logger = initialize_logger( logging_level=logging.getLevelName(params["trainer"]["logging_level"]), output_dir=self._output_dir) if self._save_test_path_sep: sep_logdirs = ['successful_trajs', 'unsuccessful_trajs', 'unfinished_trajs'] for logdir in sep_logdirs: if not os.path.exists(os.path.join(self._logdir, logdir)): os.makedirs(os.path.join(self._logdir, logdir)) if params["trainer"]["mode"] == "evaluate": assert glob.glob(os.path.join(params["trainer"]["model_dir"], '*')) self._set_check_point(params["trainer"]["model_dir"]) # prepare TensorBoard output self.writer = tf.summary.create_file_writer(self._output_dir) self.writer.set_as_default() # relabeling visualization: self._relabel_fig = plt.figure(2)
def __init__(self, policy, env, args, test_env=None): """ Initialize Trainer class Args: policy: Policy to be trained env (gym.Env): Environment for train args (Namespace or dict): config parameters specified with command line test_env (gym.Env): Environment for test. """ if isinstance(args, dict): _args = args args = policy.__class__.get_argument(Trainer.get_argument()) args = args.parse_args([]) for k, v in _args.items(): if hasattr(args, k): setattr(args, k, v) else: raise ValueError(f"{k} is invalid parameter.") self._set_from_args(args) self._policy = policy self._env = env self._test_env = self._env if test_env is None else test_env if self._normalize_obs: assert isinstance(env.observation_space, Box) self._obs_normalizer = EmpiricalNormalizer( shape=env.observation_space.shape) # prepare log directory self._output_dir = prepare_output_dir(args=args, user_specified_dir=self._logdir, suffix="{}_{}".format( self._policy.policy_name, args.dir_suffix)) self.logger = initialize_logger(logging_level=logging.getLevelName( args.logging_level), output_dir=self._output_dir) if args.evaluate: assert args.model_dir is not None self._set_check_point(args.model_dir) # prepare TensorBoard output self.writer = tf.summary.create_file_writer(self._output_dir) self.writer.set_as_default()
def __init__( self, policy, env, args, test_env=None): self._set_from_args(args) self._policy = policy self._env = env self._test_env = self._env if test_env is None else test_env if self._normalize_obs: assert isinstance(env.observation_space, Box) self._obs_normalizer = EmpiricalNormalizer( shape=env.observation_space.shape) # prepare log directory self._output_dir = prepare_output_dir( args=args, user_specified_dir=self._logdir, suffix="{}_{}".format(self._policy.policy_name, args.dir_suffix)) self.logger = initialize_logger( logging_level=logging.getLevelName(args.logging_level), output_dir=self._output_dir) # Save and restore model self._checkpoint = tf.train.Checkpoint(policy=self._policy) self.checkpoint_manager = tf.train.CheckpointManager( self._checkpoint, directory=self._output_dir, max_to_keep=5) if args.evaluate: assert args.model_dir is not None if args.model_dir is not None: assert os.path.isdir(args.model_dir) self._latest_path_ckpt = tf.train.latest_checkpoint(args.model_dir) self._checkpoint.restore(self._latest_path_ckpt) self.logger.info("Restored {}".format(self._latest_path_ckpt)) # prepare TensorBoard output self.writer = tf.summary.create_file_writer(self._output_dir) self.writer.set_as_default()