Пример #1
0
    def test_lava(self):
        """
        Run agent into lava, test to see if episode ends with negative reward
        """

        env = GridUniverseEnv(lava_states=[1])

        env.render()
        action = env.action_descriptor_to_int['RIGHT']
        observation, reward, done, info = env.step(action)

        self.assertTrue(reward == -10 and done)
def run_monte_carlo_evaluation():
    """
    Run Monte Carlo evaluation on random policy and then act greedily with respect to the value function
    after the evaluation is complete
    """

    print('\n' + '*' * 20 +
          'Starting Monte Carlo evaluation and greedy policy' + '*' * 20 +
          '\n')
    world_shape = (8, 8)
    # env = GridUniverseEnv(grid_shape=world_shape) # Default GridUniverse
    env = GridUniverseEnv(world_shape, random_maze=True)
    policy0 = np.ones([env.world.size, env.action_space.n
                       ]) / env.action_space.n

    print('Running an episode with a random agent (with initial policy)')
    st_history, rw_history, done = run_episode(policy0, env)

    print('Starting Monte-Carlo evaluation of random policy')
    value0 = monte_carlo_evaluation(policy0,
                                    env,
                                    every_visit=True,
                                    num_episodes=30)
    print(value0)

    # Create greedy policy from value function and run it on environment
    policy1 = utils.greedy_policy_from_value_function(policy0, env, value0)
    print(policy1)

    print('Policy: (up, right, down, left)\n',
          utils.get_policy_map(policy1, world_shape))
    np.set_printoptions(linewidth=75, precision=8)

    print('Starting greedy policy episode')
    curr_state = env.reset()
    env.render_policy_arrows(policy1)

    for t in range(500):
        env.render(mode='graphic')

        action = np.argmax(policy1[curr_state])
        print('go ' + env.action_descriptors[action])
        curr_state, reward, done, info = env.step(action)

        if done:
            print('Terminal state found in {} steps'.format(t + 1))
            env.render(mode='graphic')
            time.sleep(5)
            break
Пример #3
0
    def test_custom_griduniverse_from_text_file(self):
        """
        Test whether we can complete the GridUniverse created from the text file within
        """

        env = GridUniverseEnv(custom_world_fp='../core/envs/maze_text_files/test_env.txt')
        actions_to_take = [2, 2, 2, 2, 2, 2, 2, 1]
        for step_no, action in enumerate(actions_to_take):
            env.render()
            print('go ' + env.action_descriptors[action])
            observation, reward, done, info = env.step(action)
            if done:
                print("Episode finished after {} timesteps".format(step_no + 1))

        self.assertTrue((step_no + 1) == len(actions_to_take) and done)
Пример #4
0
    def test_lava_works_from_text_file(self):
        """
        Test whether we can end the episode by making the agent travel into lava in environment created from text file
        """

        env = GridUniverseEnv(custom_world_fp='../core/envs/maze_text_files/test_env.txt')
        actions_to_take = [env.action_descriptor_to_int[action_desc] for action_desc in ['DOWN', 'DOWN', 'DOWN', 'RIGHT', 'RIGHT']]
        for step_no, action in enumerate(actions_to_take):
            env.render()
            print('go ' + env.action_descriptors[action])
            observation, reward, done, info = env.step(action)
            if done:
                print("Episode finished after {} timesteps".format(step_no + 1))

        self.assertTrue(reward == -10 and done)
Пример #5
0
    def test_terminal_out_of_bounds_error(self):
        """
        Default Env contains 16 states (0-15) so state 16 should crash environment.
        """

        with self.assertRaises(IndexError):
            GridUniverseEnv(goal_states=[16])
Пример #6
0
    def test_large_griduniverse_completion_in_53_steps(self):
        """
        Test whether the agent completes the a large rectangular GridUniverse in the expected 53 steps
        """
        env = GridUniverseEnv(grid_shape=(25, 30))

        actions_to_take = [1] * 24 + [2] * 29 # 24 steps right + 29 steps down
        num_actions = len(actions_to_take)
        print('Num actions to take to get to terminal state: {}'.format(num_actions))
        for t in range(100):
            action = actions_to_take[t]
            observation, reward, done, info = env.step(action)

            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                break
        self.assertTrue((t + 1) == num_actions and done)
Пример #7
0
    def test_incorrect_parameter_types(self):
        """
        Test that TypeError is raised if goal_states, lava_states, walls are not a list.
        Test that TypeError is raised if grid_shape is over 2 dimensions, not a tuple/list or contains non-int
        """

        # todo don't show errors/red writing (env.close() is always called) for testing aesthetics
        with self.assertRaises(TypeError):
            GridUniverseEnv(goal_states=5.0)

        with self.assertRaises(TypeError):
            GridUniverseEnv(lava_states='a')

        with self.assertRaises(TypeError):
            GridUniverseEnv(walls='aaaa')

        # Test grid_shape with over 2 dimensions
        with self.assertRaises(TypeError):
            GridUniverseEnv(grid_shape=(2, 2, 2))

        # Test grid_shape if not a tuple/list
        with self.assertRaises(TypeError):
            GridUniverseEnv(grid_shape=set([2, 3]))

        with self.assertRaises(TypeError):
            GridUniverseEnv(grid_shape=[2, 2.0])

        with self.assertRaises(TypeError):
            GridUniverseEnv(grid_shape=2)
Пример #8
0
    def test_default_griduniverse_completion_in_six_steps(self):
        """
        Test whether the agent reaches a terminal state within the
        default square GridUniverse within six steps by going right 3 times
        and then going down 3 times.
        """
        env = GridUniverseEnv()

        actions_to_take = [1, 1, 1, 2, 2, 2] # 3 rights and 3 downs
        for t in range(100):
            env.render()
            action = actions_to_take[t]
            print('go ' + env.action_descriptors[action])
            observation, reward, done, info = env.step(action)

            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                self.assertTrue((t + 1) == 6)
                break
Пример #9
0
    def test_each_boundary_within_default_env(self):
        """
        The agent follows a sequence of steps to check each boundary acts as expected (the current
        observation should be the same as the previous if you move into a boundary).
        The agent tries the top-left, top-right and bottom-right corners while avoiding the Terminal state.
        The step numbers where the agent ends up in the same state as previously
        are stored and then compared to the expected values and if exactly the same the test passes.
        """
        env = GridUniverseEnv()

        # self.action_descriptors = ['up', 'right', 'down', 'left']
        actions_to_take = [3, 0, 1, 1, 1, 1, 2, 2, 3, 2, 2, 3, 3, 3]
        boundary_test_step_numbers = [0, 1, 5, 10, 13]
        collected_boundary_step_numbers = []
        prev_observation = env.reset()
        for step_no, action in enumerate(actions_to_take):
            env.render()
            print('go ' + env.action_descriptors[action])
            observation, reward, done, info = env.step(action)

            if observation == prev_observation:
                collected_boundary_step_numbers.append(step_no)

            prev_observation = observation

        print('collected_boundary_steps:', collected_boundary_step_numbers)
        print('boundary_test_steps', boundary_test_step_numbers)
        boolean_elementwise_comparison = [a == b for a, b in zip(collected_boundary_step_numbers, boundary_test_step_numbers)]
        print(boolean_elementwise_comparison)
        print(all(boolean_elementwise_comparison))
        self.assertTrue(all(boolean_elementwise_comparison))
Пример #10
0
    def test_griduniverse_wall_not_trespassed(self):
        """
        Test whether agent is still in the same place after moving into a wall
        """
        env = GridUniverseEnv(walls=[1])
        env.render()
        action = 1 # go right

        observation, reward, done, info = env.step(action)
        print('go ' + env.action_descriptors[action])

        env.render()
        self.assertTrue(observation == 0) # check if in same place
def run_griduniverse_with_lava():
    """
    Run a random agent on an environment with lava
    """

    print('\n' + '*' * 20 + 'Starting to run random agent on default GridUniverse' + '*' * 20 + '\n')
    env = GridUniverseEnv(grid_shape=(10, 10), lava_states=[4, 14, 24, 34, 44, 54, 64, 74])
    for i_episode in range(5):
        observation = env.reset()
        for t in range(100):
            env.render(mode='graphic')  # set mode='graphic for pyglet render
            action = env.action_space.sample()
            observation, reward, done, info = env.step(action)

            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                print('Final states reward: ', reward)
                break
def run_random_maze():
    """
    Run a random agent on a randomly generated maze. If random_maze parameter is set to True,
    a maze generation algorithm will place walls to form the maze in the requested shape.
    """

    print('\n' + '*' * 20 + 'Creating a random GridUniverse and running random agent on it' + '*' * 20 + '\n')
    env = GridUniverseEnv(grid_shape=(11, 11), random_maze=True)
    # env = GridUniverseEnv(grid_shape=(101, 101), random_maze=True)
    # env = GridUniverseEnv(grid_shape=(49, 51), random_maze=True)
    # env = GridUniverseEnv(grid_shape=(51, 49), random_maze=True)
    for i_episode in range(1):
        observation = env.reset()
        for t in range(1000):
            env.render(mode='graphic')
            env.step_num = t
            action = env.action_space.sample()
            # print('go ' + env.action_descriptors[action])
            observation, reward, done, info = env.step(action)
            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                break
def run_default_griduniverse():
    """
    Run a random agent on the default griduniverse.
    This piece of code shows the main interface to the environment. This runs in ascii format
    """

    print('\n' + '*' * 20 + 'Starting to run random agent on default GridUniverse' + '*' * 20 + '\n')
    env = GridUniverseEnv()
    for i_episode in range(1):
        observation = env.reset()
        for t in range(100):
            env.render()  # set mode='graphic for pyglet render
            action = env.action_space.sample()
            print('go ' + env.action_descriptors[action])
            observation, reward, done, info = env.step(action)

            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                break
def run_griduniverse_from_text_file():
    """
    Run a random agent on an environment that was save via ascii text file.
    Check core/envs/maze_text_files for examples or the _create_custom_world_from_text() function within the environment
    """

    print('\n' + '*' * 20 + 'Creating a pre-made GridUniverse from text file and running random agent on it' + '*' * 20 + '\n')
    env = GridUniverseEnv(custom_world_fp='../core/envs/maze_text_files/test_env.txt')
    # env = GridUniverseEnv(custom_world_fp='../core/envs/maze_text_files/maze_21x21.txt')
    # env = GridUniverseEnv(custom_world_fp='../core/envs/maze_text_files/maze_101x101.txt')
    for i_episode in range(1):
        observation = env.reset()
        for t in range(1000):
            env.render(mode='graphic')
            action = env.action_space.sample()
            # print('go ' + env.action_descriptors[action])
            # time.sleep(0.1) # uncomment to watch slower
            observation, reward, done, info = env.step(action)
            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                break
def run_policy_and_value_iteration():
    """
    Majority of code is within utils.py and dynamic_programming.py for this function
    This function does 4 things:

    1. Evaluate the value function of a random policy a number of times
    2. Create a greedy policy created from from this value function
    3. Run Policy Iteration
    4. Run Value Iteration
    5. Run agent on environment on policy found from Value Iteration
    """
    print('\n' + '*' * 20 + 'Starting value and policy iteration' + '*' * 20 +
          '\n')

    # 1. Evaluate the value function of a random policy a number of times
    world_shape = (4, 4)
    # env = GridUniverseEnv(grid_shape=world_shape, goal_states=[3, 12]) # Sutton and Barlo/David Silver example
    # specific case with lava and path true it
    # env = GridUniverseEnv(grid_shape=world_shape, lava_states=[i for i in range(15) if i not in [0, 4, 8, 12, 13, 14, 15]])
    world_shape = (11, 11)
    env = GridUniverseEnv(grid_shape=world_shape, random_maze=True)
    policy0 = np.ones([env.world.size,
                       len(env.action_state_to_next_state)]) / len(
                           env.action_state_to_next_state)
    v0 = np.zeros(env.world.size)
    val_fun = v0
    for k in range(500):
        val_fun = utils.single_step_policy_evaluation(policy0,
                                                      env,
                                                      value_function=val_fun)
    print(utils.reshape_as_griduniverse(val_fun, world_shape))

    # 2. Create a greedy policy created from from this value function
    policy1 = utils.greedy_policy_from_value_function(policy0, env, val_fun)
    policy_map1 = utils.get_policy_map(policy1, world_shape)
    print('Policy: (0=up, 1=right, 2=down, 3=left)\n', policy_map1)
    np.set_printoptions(linewidth=75 * 2, precision=4)
    print('Policy: (up, right, down, left)\n',
          utils.get_policy_map(policy1, world_shape))
    np.set_printoptions(linewidth=75, precision=8)

    # 3. Run Policy Iteration
    print('Policy iteration:')
    policy0 = np.ones([env.world.size,
                       len(env.action_state_to_next_state)]) / len(
                           env.action_state_to_next_state)
    optimal_value, optimal_policy = dp.policy_iteration(policy0,
                                                        env,
                                                        v0,
                                                        threshold=0.001,
                                                        max_steps=1000)
    print('Value:\n', utils.reshape_as_griduniverse(optimal_value,
                                                    world_shape))
    print('Policy: (0=up, 1=right, 2=down, 3=left)\n',
          utils.get_policy_map(optimal_policy, world_shape))
    np.set_printoptions(linewidth=75 * 2, precision=4)
    print('Policy: (up, right, down, left)\n',
          utils.get_policy_map(optimal_policy, world_shape))
    np.set_printoptions(linewidth=75, precision=8)

    # 4. Run Value Iteration
    print('Value iteration:')
    policy0 = np.ones([env.world.size,
                       len(env.action_state_to_next_state)]) / len(
                           env.action_state_to_next_state)
    optimal_value, optimal_policy = dp.value_iteration(policy0,
                                                       env,
                                                       v0,
                                                       threshold=0.001,
                                                       max_steps=100)
    print('Value:\n', utils.reshape_as_griduniverse(optimal_value,
                                                    world_shape))
    print('Policy: (0=up, 1=right, 2=down, 3=left)\n',
          utils.get_policy_map(optimal_policy, world_shape))
    np.set_printoptions(linewidth=75 * 2, precision=4)
    print('Policy: (up, right, down, left)\n',
          utils.get_policy_map(optimal_policy, world_shape))
    np.set_printoptions(linewidth=75, precision=8)

    # 5. Run agent on environment on policy found from Value Iteration
    print('Starting to run agent on environment with optimal policy')
    curr_state = env.reset()
    env.render_policy_arrows(optimal_policy)

    # Dynamic programming doesn't necessarily have the concept of an agent.
    # But you can create an agent to run on the environment using the found policy
    for t in range(100):
        env.render(mode='graphic')

        action = np.argmax(optimal_policy[curr_state])
        print('go ' + env.action_descriptors[action])
        curr_state, reward, done, info = env.step(action)

        if done:
            print('Terminal state reached in {} steps'.format(t + 1))
            env.render(
                mode='graphic')  # must render here to see agent in final state
            time.sleep(6)
            env.render(close=True)
            break
Пример #16
0
# from queue import *
from queue import Queue
# import queue

from core.envs.griduniverse_env import GridUniverseEnv

# todo: could rename file to path_finding.py

if __name__ == '__main__':
    print('\n' + '*' * 20 +
          'Creating a random GridUniverse and running random agent on it' +
          '*' * 20 + '\n')

    first_time = True
    for i in range(10):
        env = GridUniverseEnv(grid_shape=(15, 15), random_maze=True)
        curr_state = initial_state = env.reset()

        actions = range(4)

        all_valid_states = []

        visited = {}
        nodes_and_edges = {}

        # https: // en.wikipedia.org / wiki / A * _search_algorithm

        # def find_all_neighbouring_states(curr_state):
        #     nodes_and_edges[curr_state] = []
        #     for action in actions:
        #         # next_states = []
Пример #17
0
 def test_wrong_terminal_type_error(self):
     with self.assertRaises(IndexError):
         GridUniverseEnv(goal_states=['a'])