def test_run_experiment(): mdp = ResGridworld((1, 1)) print_grid(mdp) agent = PrioritizedSweepingAgent(mdp=mdp, n_planning_steps=5, theta=1e-5, alpha=0.6, epsilon=0.15, discount=0.95) avg_num_updates = run_experiment(mdp, agent, 5) print(avg_num_updates)
def print_policy_delta(mdp, agent, agent_state, f=None): # display on a grid grid = print_grid(mdp) # the mdp keeps numpy indexing so have to flip grid back grid = grid[::-1] for state in mdp.get_states( ): # note higher y is lower in the list, so will need to invert to match the grid coordinates x, y = state # show the best action for this state actions = mdp.get_possible_actions(state) q_values = [agent.get_q_value(state, a) for a in actions] if np.allclose(q_values, 1e-11): #all([q == max(q_values) for q in q_values]): marker = grid[y][x] # all q values are the same so show blank else: marker = action_to_nwse( actions[np.argmax(q_values)]) # show the best action # update grid with marker grid[y][x] = marker grid = grid[::-1] x, y = agent_state grid[y][x] = 'X' print(tabulate(grid, tablefmt='grid'), file=f) return grid
def fig_8_3(): mdp = Gridworld() print_grid(mdp) n_runs = 30 n_episodes = 50 planning_steps = [0, 5, 50] agents = [ DynaQAgent(mdp=mdp, n_planning_steps=n, alpha=0.1, epsilon=0.1, discount=0.95) for n in planning_steps ] steps_per_episode = np.zeros((len(agents), n_runs, n_episodes)) for i, a in enumerate(agents): for j in tqdm(range(n_runs)): np.random.seed(29) #29 #47 a.reset() for k in range(n_episodes): states, actions, rewards = a.run_episode() steps_per_episode[i, j, k] = len(states) steps_per_episode = np.mean(steps_per_episode, axis=1) for i, a in enumerate(agents): plt.plot(np.arange(1, n_episodes), steps_per_episode[i, 1:], label='{} planning steps'.format(a.n_planning_steps)) plt.xlabel('Episodes') plt.ylabel('Steps per episode') plt.legend(loc='upper right') plt.savefig('figures/ch08_fig_8_3.png') plt.close()
def test_dijkstra(): mdp = ResGridworld((2, 4)) print_grid(mdp) optimal_path = dijkstra(mdp) print_path(mdp, optimal_path)
from gridworld import create_grid_world, print_grid print_grid(create_grid_world())
# Check if game is over # if done: break print(f"Episode {e + 1}: total reward -> {total_reward}") ##################### # Solve the problem # ##################### done = False my_state = start_state counter = 0 while not done: print_grid(my_state.grid) sa = utils.q(q_table, ACTIONS, my_state) my_action = np.argmax(sa) my_state, reward, done = utils.act(my_state, my_action) counter += 1 if counter > 15: break print_grid(my_state.grid) if counter > 15: print('Your agent has probably entered a loop and will not be able to complete the game. Aborting mission.') ################ # What I learn # ################ print()