if __name__ == '__main__': max_steps = 100 max_iters = 1000 seed = random.randint(0, 100) agent = qAgent() grid = GridWorld(size=8, force_fast=True, seed=seed) grid.show() print() for iter in range(max_iters): agent.set_grid(grid) i, j = 0, 0 # initial state cum_reward = 0 for step in range(max_steps): action = agent.get_action(i, j) new_i, new_j = grid.move(i, j, action) reward, is_final = grid.get_reward(i, j) cum_reward += reward agent.update_q(i, j, new_i, new_j, action, reward) if is_final: break i = new_i j = new_j if iter % 100 == 0: print( 'Episode {} finished after {} steps with cumulative reward of {}' .format(iter, step, cum_reward)) grid = GridWorld(size=8, force_fast=True, seed=seed) print() show_qtable(agent, grid.size)