示例#1
0
def test_run_experiment():
    mdp = ResGridworld((1, 1))
    print_grid(mdp)
    agent = PrioritizedSweepingAgent(mdp=mdp,
                                     n_planning_steps=5,
                                     theta=1e-5,
                                     alpha=0.6,
                                     epsilon=0.15,
                                     discount=0.95)

    avg_num_updates = run_experiment(mdp, agent, 5)
    print(avg_num_updates)
示例#2
0
def print_policy_delta(mdp, agent, agent_state, f=None):
    # display on a grid
    grid = print_grid(mdp)

    # the mdp keeps numpy indexing so have to flip grid back
    grid = grid[::-1]
    for state in mdp.get_states(
    ):  # note higher y is lower in the list, so will need to invert to match the grid coordinates
        x, y = state
        # show the best action for this state
        actions = mdp.get_possible_actions(state)
        q_values = [agent.get_q_value(state, a) for a in actions]
        if np.allclose(q_values,
                       1e-11):  #all([q == max(q_values) for q in q_values]):
            marker = grid[y][x]  # all q values are the same so show blank
        else:
            marker = action_to_nwse(
                actions[np.argmax(q_values)])  # show the best action
        # update grid with marker
        grid[y][x] = marker

    grid = grid[::-1]

    x, y = agent_state
    grid[y][x] = 'X'

    print(tabulate(grid, tablefmt='grid'), file=f)
    return grid
示例#3
0
def fig_8_3():
    mdp = Gridworld()
    print_grid(mdp)

    n_runs = 30
    n_episodes = 50
    planning_steps = [0, 5, 50]

    agents = [
        DynaQAgent(mdp=mdp,
                   n_planning_steps=n,
                   alpha=0.1,
                   epsilon=0.1,
                   discount=0.95) for n in planning_steps
    ]

    steps_per_episode = np.zeros((len(agents), n_runs, n_episodes))

    for i, a in enumerate(agents):
        for j in tqdm(range(n_runs)):
            np.random.seed(29)  #29  #47
            a.reset()
            for k in range(n_episodes):
                states, actions, rewards = a.run_episode()
                steps_per_episode[i, j, k] = len(states)

    steps_per_episode = np.mean(steps_per_episode, axis=1)

    for i, a in enumerate(agents):
        plt.plot(np.arange(1, n_episodes),
                 steps_per_episode[i, 1:],
                 label='{} planning steps'.format(a.n_planning_steps))
    plt.xlabel('Episodes')
    plt.ylabel('Steps per episode')
    plt.legend(loc='upper right')

    plt.savefig('figures/ch08_fig_8_3.png')
    plt.close()
示例#4
0
def test_dijkstra():
    mdp = ResGridworld((2, 4))
    print_grid(mdp)
    optimal_path = dijkstra(mdp)
    print_path(mdp, optimal_path)
示例#5
0
from gridworld import create_grid_world, print_grid

print_grid(create_grid_world())
示例#6
0
        # Check if game is over #
        if done:
            break
    print(f"Episode {e + 1}: total reward -> {total_reward}")


#####################
# Solve the problem #
#####################
done = False
my_state = start_state
counter = 0
while not done:
    
    print_grid(my_state.grid)

    sa = utils.q(q_table, ACTIONS, my_state)
    my_action = np.argmax(sa)
    my_state, reward, done = utils.act(my_state, my_action)

    counter += 1
    if counter > 15: break

print_grid(my_state.grid)
if counter > 15: print('Your agent has probably entered a loop and will not be able to complete the game. Aborting mission.')

################
# What I learn #
################
print()