Exemplo n.º 1
0
def main(checkpoint, debug=False):
    filename = os.path.basename(checkpoint)
    s = filename.split('-')

    # Create Environment
    # Derive environment ID from the checkpoint filename
    file_prefix = s[0]
    openai_env_ids = {
        "pole": "CartPole-v1",
        "lunarcont": "LunarLanderContinuous-v2",
        "lunar": "LunarLander-v2",
        "carcont": "MountainCarContinuous-v0",
        "pendulum": "Pendulum-v0",
    }
    if file_prefix in openai_env_ids:
        env_id = openai_env_ids[file_prefix]
    else:
        env_id = file_prefix

    s = s[1:]

    env = create_env(env_id)

    # Create agent
    sample_action = sample_action_fn(checkpoint, env.action_space)

    stats = Statistics()

    try:
        while True:
            episode_stats = play_episode(env, sample_action, debug=debug)
            stats.set_all(episode_stats)
            print(
                ("Episode #{}: {:.2f}; Average Reward: {:.2f}; " +
                 "Episode length: {}; Average episode length: {:.1f}").format(
                     stats.sum("episodes"), episode_stats.avg("rewards"),
                     stats.avg("rewards"), int(episode_stats.avg("steps")),
                     stats.avg("steps")))
    except KeyboardInterrupt:
        env.close()
        return
    env.close()
Exemplo n.º 2
0
    def _run_one_phase(self, is_training):
        stats = Statistics()
        agent_stats = Statistics()

        self._agent.eval = not is_training
        min_steps = (self._training_steps if is_training else
                     self._evaluation_steps) * self._env.n_agents

        self._env.reset()
        while stats.sum("steps") < min_steps:
            step_time0 = time.time()

            states = np.copy(self._env.states)
            actions = self._agent.step(states)

            rewards, next_states, dones, env_stats = \
                self._env.step(actions)
            stats.set_all(env_stats)

            if self._traj_buffer is not None:
                self._traj_buffer.push(states, actions, rewards, next_states,
                                       dones)

            if is_training:
                t0 = time.time()
                agent_stats.set_all(
                    self._agent.transitions(states, actions, rewards,
                                            next_states, dones))
                stats.set("agent_time", time.time() - t0)
                stats.set("step_time", time.time() - step_time0)

            sys.stdout.write(
                "Iteration {} ({}). ".format(
                    self._iteration, "train" if is_training else "eval") +
                "Steps executed: {} ".format(stats.sum("steps")) +
                "Episode length: {} ".format(int(stats.avg("steps"))) +
                "Return: {:.4f}      \r".format(stats.avg("rewards")))
            sys.stdout.flush()
        print()
        self._agent.episodes_end()
        return stats, agent_stats