# RL take action and get next state and reward _, next_state_index, reward, done = env.step(action) # RL choose action based on next state next_action = RL.choose_action(str(next_state_index)) # RL learn from this transition (s, a, r, s, a) ==> Sarsa RL.learn(str(state), action, reward, str(next_state_index), next_action) # swap state and action state = next_state_index action = next_action # break while loop when end of this episode if done: break # end of game print('game over') env.destroy() if __name__ == "__main__": env = GridWorld() RL = Sarsa(actions=list(range(env.n_actions))) env.after(10000, update) env.mainloop() print(RL.q_table)