示例#1
0
    def setUp(self) -> None:
        self.temp_dir = tempfile.TemporaryDirectory()

        episodes = 80
        seeds = [0, 1, 3, 4, 5]
        experiment_name = "test_env"
        logger = Logger(
            output_path=Path(self.temp_dir.name),
            experiment_name=experiment_name,
            step_write_frequency=None,
            episode_write_frequency=None,
        )

        benchmark = SigmoidBenchmark()
        env = benchmark.get_benchmark()
        agent = RandomAgent(env)
        logger.set_env(env)

        env_logger = logger.add_module(env)
        for seed in seeds:
            env.seed(seed)
            logger.set_additional_info(seed=seed)
            logger.reset_episode()

            for episode in range(episodes):
                state = env.reset()
                done = False
                reward = 0
                step = 0
                while not done:
                    action = agent.act(state, reward)
                    env_logger.log(
                        "logged_step",
                        step,
                    )
                    env_logger.log(
                        "logged_episode",
                        episode,
                    )
                    next_state, reward, done, _ = env.step(action)
                    env_logger.log(
                        "reward",
                        reward,
                    )
                    env_logger.log(
                        "done",
                        done,
                    )
                    agent.train(next_state, reward)
                    state = next_state
                    logger.next_step()

                    step += 1
                agent.end_episode(state, reward)
                logger.next_episode()

        env.close()
        logger.close()

        self.log_file = env_logger.log_file.name
示例#2
0
def train_chainer(
    agent, env, num_episodes=10, flatten_state=False, logger: Logger = None
):
    for i in range(num_episodes):
        state = env.reset()
        if flatten_state:
            state = np.array(flatten([state[k] for k in state.keys()]))
            state = state.astype(np.float32)
        done = False
        r = 0
        reward = 0
        while not done:
            action = agent.act_and_train(state, reward)
            next_state, reward, done, _ = env.step(action)
            r += reward
            if flatten_state:
                state = np.array(flatten([next_state[k] for k in next_state.keys()]))
                state = state.astype(np.float32)
            else:
                state = next_state
            if logger is not None:
                logger.next_step()
        agent.stop_episode_and_train(state, reward, done=done)
        if logger is not None:
            logger.next_episode()
        print(
            f"Episode {i}/{num_episodes}...........................................Reward: {r}"
        )
示例#3
0
# Make chainer agent
obs_size = env.observation_space.low.size
action_size = env.action_space.low.size
agent = make_chainer_a3c(obs_size, action_size)

# Training
num_episodes = 3
for i in range(num_episodes):
    # Reset environment to begin episode
    state = env.reset()

    # Initialize episode
    done = False
    r = 0
    reward = 0
    while not done:
        # Select action
        action = agent.act_and_train(state, reward)
        # Execute action
        next_state, reward, done, _ = env.step(action)
        r += reward
        logger.next_step()
        state = next_state
    logger.next_episode()
    # Train agent after episode has ended
    agent.stop_episode_and_train(state, reward, done=done)
    # Log episode
    print(
        f"Episode {i+1}/{num_episodes}...........................................Reward: {r}"
    )