class AC_Trainer(object): def __init__(self, params): ##################### ## SET AGENT PARAMS ##################### computation_graph_args = { 'n_layers': params['n_layers'], 'size': params['size'], 'learning_rate': params['learning_rate'], 'num_target_updates': params['num_target_updates'], 'num_grad_steps_per_target_update': params['num_grad_steps_per_target_update'], } estimate_advantage_args = { 'gamma': params['discount'], 'standardize_advantages': not (params['dont_standardize_advantages']), } train_args = { 'num_agent_train_steps_per_iter': params['num_agent_train_steps_per_iter'], 'num_critic_updates_per_agent_update': params['num_critic_updates_per_agent_update'], 'num_actor_updates_per_agent_update': params['num_actor_updates_per_agent_update'], } agent_params = { **computation_graph_args, **estimate_advantage_args, **train_args } self.params = params self.params['agent_class'] = ACAgent self.params['agent_params'] = agent_params self.params['batch_size_initial'] = self.params['batch_size'] ################ ## RL TRAINER ################ self.rl_trainer = RL_Trainer(self.params) def run_training_loop(self): self.rl_trainer.run_training_loop( self.params['n_iter'], collect_policy=self.rl_trainer.agent.actor, eval_policy=self.rl_trainer.agent.actor, ) self.rl_trainer.eval_render()
class Q_Trainer(object): def __init__(self, params): self.params = params train_args = { 'num_agent_train_steps_per_iter': params['num_agent_train_steps_per_iter'], 'num_critic_updates_per_agent_update': params['num_critic_updates_per_agent_update'], 'train_batch_size': params['batch_size'], 'double_q': params['double_q'], } env_args = get_env_kwargs(params['env_name']) self.agent_params = {**train_args, **env_args, **params} self.params['agent_class'] = DQNAgent self.params['agent_params'] = self.agent_params self.params['train_batch_size'] = params['batch_size'] self.params['env_wrappers'] = self.agent_params['env_wrappers'] self.rl_trainer = RL_Trainer(self.params) def run_training_loop(self): self.rl_trainer.run_training_loop( self.agent_params['num_timesteps'], collect_policy=self.rl_trainer.agent.actor, eval_policy=self.rl_trainer.agent.actor, ) self.rl_trainer.eval_render()
class PG_Trainer(object): def __init__(self, params): ##################### ## SET AGENT PARAMS ##################### computation_graph_args = { 'n_layers': params['n_layers'], 'size': params['size'], 'learning_rate': params['learning_rate'], } estimate_advantage_args = { 'gamma': params['discount'], 'standardize_advantages': params['standardize_advantages'], 'reward_to_go': params['reward_to_go'], 'nn_baseline': params['nn_baseline'], 'gae': params['gae'], 'gae_gamma': params['gae_gamma'], 'gae_lambda': params['gae_lambda'] } train_args = { 'num_agent_train_steps_per_iter': params['num_agent_train_steps_per_iter'], } agent_params = { **computation_graph_args, **estimate_advantage_args, **train_args } self.params = params self.params['agent_class'] = PGAgent self.params['agent_params'] = agent_params self.params['batch_size_initial'] = self.params['batch_size'] ################ ## RL TRAINER ################ self.rl_trainer = RL_Trainer(self.params) def run_training_loop(self): self.rl_trainer.run_training_loop( self.params['n_iter'], collect_policy=self.rl_trainer.agent.actor, eval_policy=self.rl_trainer.agent.actor, ) if self.params['render_after_training'] == 1: self.rl_trainer.eval_render(self.rl_trainer.agent.actor) def load_trained_agent_render(self): self.rl_trainer.agent.actor.restore( '/home/kim/cs285_ws/homework_fall2019/hw2/cs285/data/pg_todo_CartPole-v0_15-01-2020_15-42-29/policy_itr_99' ) self.rl_trainer.eval_render(self.rl_trainer.agent.actor)
class BC_Trainer(object): def __init__(self, params): ####################### ## AGENT PARAMS ####################### agent_params = { 'n_layers': params['n_layers'], 'size': params['size'], 'learning_rate': params['learning_rate'], 'max_replay_buffer_size': params['max_replay_buffer_size'], } self.params = params self.params['agent_class'] = BCAgent ## TODO: look in here and implement this self.params['agent_params'] = agent_params ################ ## RL TRAINER ################ self.rl_trainer = RL_Trainer(self.params) ## TODO: look in here and implement this ####################### ## LOAD EXPERT POLICY ####################### print('Loading expert policy from...', self.params['expert_policy_file']) self.loaded_expert_policy = Loaded_Gaussian_Policy(self.rl_trainer.sess, self.params['expert_policy_file']) print('Done restoring expert policy...') def run_training_loop(self): self.rl_trainer.run_training_loop( n_iter=self.params['n_iter'], initial_expertdata=self.params['expert_data'], collect_policy=self.rl_trainer.agent.actor, eval_policy=self.rl_trainer.agent.actor, relabel_with_expert=self.params['do_dagger'], expert_policy=self.loaded_expert_policy, ) if self.params['render_after_training'] == 1: self.rl_trainer.eval_render(self.rl_trainer.agent.actor)