def __init__(self, config, policy, global_episode, n_agent=0, agent_type='exploration', log_dir=''): print(f"Initializing agent {n_agent}...") self.config = config self.n_agent = n_agent self.agent_type = agent_type self.max_steps = config['max_ep_length'] self.num_episode_save = config['num_episode_save'] self.global_episode = global_episode self.local_episode = 0 self.log_dir = log_dir # Create environment self.env_wrapper = create_env_wrapper(config) self.ou_noise = OUNoise(dim=config["action_dim"], low=config["action_low"], high=config["action_high"]) self.ou_noise.reset() self.actor = policy print("Agent ", n_agent, self.actor.device) # Logger log_path = f"{log_dir}/agent-{n_agent}" self.logger = Logger(log_path)
def eval_policy(self, eval_episodes=10): env_wrapper = create_env_wrapper(self.config) avg_reward = 0 for _ in range(eval_episodes): state = env_wrapper.reset() done = False while not done: action = self.target_policy_net.get_action( state).detach().cpu().numpy().flatten() next_state, reward, done = env_wrapper.step(action) avg_reward += reward state = next_state if done: break avg_reward /= eval_episodes print("---------------------------------------") print(f"Evaluation over {eval_episodes} episodes: {avg_reward:.3f}") print("---------------------------------------") return avg_reward