Пример #1
0

if __name__ == '__main__':
    max_steps = 100
    max_iters = 1000
    seed = random.randint(0, 100)
    agent = qAgent()
    grid = GridWorld(size=8, force_fast=True, seed=seed)
    grid.show()
    print()
    for iter in range(max_iters):
        agent.set_grid(grid)
        i, j = 0, 0  # initial state
        cum_reward = 0
        for step in range(max_steps):
            action = agent.get_action(i, j)
            new_i, new_j = grid.move(i, j, action)
            reward, is_final = grid.get_reward(i, j)
            cum_reward += reward
            agent.update_q(i, j, new_i, new_j, action, reward)
            if is_final:
                break
            i = new_i
            j = new_j
        if iter % 100 == 0:
            print(
                'Episode {} finished after {} steps with cumulative reward of {}'
                .format(iter, step, cum_reward))
        grid = GridWorld(size=8, force_fast=True, seed=seed)
    print()
    show_qtable(agent, grid.size)