# env.render() # 4. Run agent on the state action = mario.act(state) # 5. Agent performs action next_state, reward, done, info = env.step(action) # 6. Remember mario.cache(state, next_state, action, reward, done) # 7. Learn q, loss = mario.learn() # 8. Logging logger.log_step(reward, loss, q) # 9. Update state state = next_state # 10. Check if end of game if done or info['flag_get']: break logger.log_episode() if e % print_e == 0: logger.record(episode=e, epsilon=mario.exploration_rate, step=mario.curr_step)
for e in range(episodes): state = env.reset() while True: env.render() action = mario.act(state) next_state, reward, done, info = env.step(action) mario.cache(state, next_state, action, reward, done) logger.log_step(reward, None, None) state = next_state if done or info['flag_get']: break logger.log_episode() if e % 20 == 0: logger.record( episode=e, epsilon=mario.exploration_rate, step=mario.curr_step )