obstructions = np.array([[0, 7], [1, 1], [1, 2], [1, 3], [1, 7], [2, 1], [2, 3], [2, 7], [3, 1], [3, 3], [3, 5], [4, 3], [4, 5], [4, 7], [5, 3], [5, 7], [5, 9], [6, 3], [6, 9], [7, 1], [7, 6], [7, 7], [7, 8], [7, 9], [8, 1], [8, 5], [8, 6], [9, 1]]) bad_states = np.array([[1, 9], [4, 2], [4, 4], [7, 5], [9, 9]]) restart_states = np.array([[3, 7], [8, 2]]) start_state = np.array([[0, 4]]) goal_states = np.array([[0, 9], [2, 2], [8, 7]]) # create model gw = GridWorld(num_rows=num_rows, num_cols=num_cols, start_state=start_state, goal_states=goal_states) gw.add_obstructions(obstructed_states=obstructions, bad_states=bad_states, restart_states=restart_states) gw.add_rewards(step_reward=-1, goal_reward=10, bad_state_reward=-6, restart_state_reward=-10) gw.add_transition_probability(p_good_transition=0.7, bias=0.5) gw.add_discount(discount=0.9) model = gw.create_gridworld() # plot world path = "../doc/imgs/unsolved_gridworld.png" plot_gridworld(model, title="Test world", path=path)
def test_value_iteration(): #load the test data vp = np.load("../data/test_data/test_value_iteration.npy") test_value = vp[:, 0].reshape(-1, 1) test_pi = vp[:, 1].reshape(-1, 1) # specify world parameters num_cols = 12 num_rows = 9 obstructions = np.array([[8, 6], [7, 6], [6, 6], [5, 6], [4, 6], [3, 6], [3, 7], [3, 8], [3, 9]]) bad_states = np.array([[2, 1]]) start_state = np.array([[0, 1]]) goal_state = np.array([[7, 8]]) # create model gw = GridWorld(num_rows=num_rows, num_cols=num_cols, start_state=start_state, goal_states=goal_state) gw.add_obstructions(obstructed_states=obstructions, bad_states=bad_states) gw.add_rewards(step_reward=-1, goal_reward=10, bad_state_reward=-6) gw.add_transition_probability(p_good_transition=0.7, bias=0.5) gw.add_discount(discount=0.9) model = gw.create_gridworld() # solve with value iteration value_function, pi = value_iteration(model, maxiter=100) # test value iteration outputs assert np.all(value_function == test_value) assert np.all(pi == test_pi)
def test_gridworld(): # load the test data grid_world = loadmat('../data/test_data/gridworld.mat')['model'] # specify world parameters num_cols = 12 num_rows = 9 obstructions = np.array([[8, 6], [7, 6], [6, 6], [5, 6], [4, 6], [3, 6], [3, 7], [3, 8], [3, 9]]) bad_states = np.array([[2, 1]]) start_state = np.array([[0, 1]]) goal_state = np.array([[7, 8]]) # create model gw = GridWorld(num_rows=num_rows, num_cols=num_cols, start_state=start_state, goal_states=goal_state) gw.add_obstructions(obstructed_states=obstructions, bad_states=bad_states) gw.add_rewards(step_reward=-1, goal_reward=10, bad_state_reward=-6) gw.add_transition_probability(p_good_transition=0.7, bias=0.5) gw.add_discount(discount=0.9) model = gw.create_gridworld() # run tests assert np.all(model.R == grid_world['R'][0][0][:, 0].reshape(-1, 1)) assert np.all(model.P[:, :, 0] == grid_world['P'][0][0][:, :, 0]) assert np.all(model.P[:, :, 1] == grid_world['P'][0][0][:, :, 1]) assert np.all(model.P[:, :, 2] == grid_world['P'][0][0][:, :, 2]) assert np.all(model.P[:, :, 3] == grid_world['P'][0][0][:, :, 3])
def test_qlearning(): np.random.seed(1) # load the test data vp = np.load("../data/test_data/test_qlearning.npy") test_q = vp[:, 0].reshape(-1, 1) test_pi = vp[:, 1].reshape(-1, 1) # specify world parameters num_rows = 4 num_cols = 4 obstacles = np.array([[1, 1], [2, 1], [1, 2]]) bad_states = np.array([[3, 0]]) restart_state = np.array([[2, 2]]) start_state = np.array([[0, 0]]) goal_state = np.array([[3, 3]]) # create world gw = GridWorld(num_rows=num_rows, num_cols=num_cols, start_state=start_state, goal_states=goal_state) gw.add_obstructions(obstructed_states=obstacles, bad_states=bad_states, restart_states=restart_state) gw.add_rewards(step_reward=-1, goal_reward=10, bad_state_reward=-6, restart_state_reward=-10) gw.add_transition_probability(p_good_transition=0.8, bias=0.5) gw.add_discount(discount=0.9) model = gw.create_gridworld() # solve with sarsa q, pi, _ = qlearning(model, alpha=0.8, epsilon=0.1, maxiter=100, maxeps=1000) # test value iteration outputs assert np.all(q == test_q) assert np.all(pi == test_pi)
def test_cliffworld(): # load the test data grid_world = loadmat('../data/test_data/cliffworld.mat')['model'] # specify world parameters num_rows = 5 num_cols = 10 restart_states = np.array([[4, 1], [4, 2], [4, 3], [4, 4], [4, 5], [4, 6], [4, 7]]) obstructed_states = np.array([[0, 9], [1, 9], [2, 9], [3, 9], [4, 9]]) start_state = np.array([[4, 0]]) goal_states = np.array([[4, 8]]) # create model gw = GridWorld(num_rows=num_rows, num_cols=num_cols, start_state=start_state, goal_states=goal_states) gw.add_obstructions(obstructed_states=obstructed_states, restart_states=restart_states) gw.add_rewards(step_reward=-1, goal_reward=10, restart_state_reward=-100) gw.add_transition_probability(p_good_transition=1, bias=0) gw.add_discount(discount=0.9) model = gw.create_gridworld() # run tests assert np.all(model.R == grid_world['R'][0][0][:, 0].reshape(-1, 1)) assert np.all(model.P[:, :, 0] == grid_world['P'][0][0][:, :, 0]) assert np.all(model.P[:, :, 1] == grid_world['P'][0][0][:, :, 1]) assert np.all(model.P[:, :, 2] == grid_world['P'][0][0][:, :, 2]) assert np.all(model.P[:, :, 3] == grid_world['P'][0][0][:, :, 3])
def test_smallworld(): small_world = loadmat('../data/test_data/smallworld.mat')['model'] # specify world parameters num_cols = 4 num_rows = 4 obstacles = np.array([[1, 1], [2, 1], [1, 2]]) start_state = np.array([[0, 0]]) goal_state = np.array([[3, 3]]) # create model gw = GridWorld(num_rows=num_rows, num_cols=num_cols, start_state=start_state, goal_states=goal_state) gw.add_obstructions(obstructed_states=obstacles) gw.add_rewards(step_reward=-1, goal_reward=10) gw.add_transition_probability(p_good_transition=0.8, bias=0.5) gw.add_discount(discount=0.9) model = gw.create_gridworld() # run tests assert np.all(model.R == small_world['R'][0][0][:, 0].reshape(-1, 1)) assert np.all(model.P[:, :, 0] == small_world['P'][0][0][:, :, 0]) assert np.all(model.P[:, :, 1] == small_world['P'][0][0][:, :, 1]) assert np.all(model.P[:, :, 2] == small_world['P'][0][0][:, :, 2]) assert np.all(model.P[:, :, 3] == small_world['P'][0][0][:, :, 3])