Exemplo n.º 1
0
        # env.render()

        # 4. Run agent on the state
        action = mario.act(state)

        # 5. Agent performs action
        next_state, reward, done, info = env.step(action)

        # 6. Remember
        mario.cache(state, next_state, action, reward, done)

        # 7. Learn
        q, loss = mario.learn()

        # 8. Logging
        logger.log_step(reward, loss, q)

        # 9. Update state
        state = next_state

        # 10. Check if end of game
        if done or info['flag_get']:
            break

    logger.log_episode()

    if e % print_e == 0:
        logger.record(episode=e,
                      epsilon=mario.exploration_rate,
                      step=mario.curr_step)
        episode_rewards.append(reward)
        q, loss = doomguy.learn()

        logger.log_step(reward, loss, q)

        # If the game is finished
        if done:
            # the episode ends so no next state
            next_state = np.zeros((84, 84), dtype=np.int)
            next_state, stacked_frames = stack_frames(stacked_frames,
                                                      next_state, False)

            # Get the total reward of the episode
            total_reward = np.sum(episode_rewards)
            break
        else:
            next_state, stacked_frames = get_next_state(False)
            #memory.add((state, action, reward, next_state, done))
            doomguy.cache(state, next_state, action, reward, done)
            state = next_state

        if step > max_steps:
            total_reward = np.sum(episode_rewards)
            break

    logger.log_episode()
    if episode % 20 == 0:
        logger.record(episode=episode,
                      epsilon=doomguy.exploration_rate,
                      step=doomguy.curr_step)