Пример #1
0
    def __init__(self,
                 config,
                 policy,
                 global_episode,
                 n_agent=0,
                 agent_type='exploration',
                 log_dir=''):
        print(f"Initializing agent {n_agent}...")
        self.config = config
        self.n_agent = n_agent
        self.agent_type = agent_type
        self.max_steps = config['max_ep_length']
        self.num_episode_save = config['num_episode_save']
        self.global_episode = global_episode
        self.local_episode = 0
        self.log_dir = log_dir

        # Create environment
        self.env_wrapper = create_env_wrapper(config)
        self.ou_noise = OUNoise(dim=config["action_dim"],
                                low=config["action_low"],
                                high=config["action_high"])
        self.ou_noise.reset()

        self.actor = policy
        print("Agent ", n_agent, self.actor.device)

        # Logger
        log_path = f"{log_dir}/agent-{n_agent}"
        self.logger = Logger(log_path)
Пример #2
0
    def eval_policy(self, eval_episodes=10):
        env_wrapper = create_env_wrapper(self.config)
        avg_reward = 0
        for _ in range(eval_episodes):
            state = env_wrapper.reset()
            done = False
            while not done:
                action = self.target_policy_net.get_action(
                    state).detach().cpu().numpy().flatten()
                next_state, reward, done = env_wrapper.step(action)
                avg_reward += reward
                state = next_state
                if done:
                    break

        avg_reward /= eval_episodes
        print("---------------------------------------")
        print(f"Evaluation over {eval_episodes} episodes: {avg_reward:.3f}")
        print("---------------------------------------")
        return avg_reward