Exemplo n.º 1
0
    def test_reward_single_agent_makespan(self):
        grid = MapfGrid(['....', '....', '....', '....', '....'])

        start_locations = ((0, 0), )
        goal_locations = ((4, 0), )

        determinstic_env = MapfEnv(grid, 1, start_locations, goal_locations, 0,
                                   REWARD_OF_CLASH, REWARD_OF_GOAL,
                                   REWARD_OF_LIVING,
                                   OptimizationCriteria.Makespan)
        total_reward = 0
        down_action = vector_action_to_integer((DOWN, ))
        _, r, _, _ = determinstic_env.step(down_action)
        total_reward += r
        _, r, _, _ = determinstic_env.step(down_action)
        total_reward += r
        _, r, _, _ = determinstic_env.step(down_action)
        total_reward += r
        s, r, done, _ = determinstic_env.step(down_action)
        total_reward += r

        self.assertEqual(s,
                         determinstic_env.locations_to_state(goal_locations))
        self.assertEqual(r, REWARD_OF_LIVING + REWARD_OF_GOAL)

        self.assertEqual(total_reward, REWARD_OF_GOAL + 4 * REWARD_OF_LIVING)
Exemplo n.º 2
0
def test_corridor_switch_no_clash_possible(solver_describer: SolverDescriber):
    grid = MapfGrid(['...', '@.@'])
    agents_starts = ((0, 0), (0, 2))
    agents_goals = ((0, 2), (0, 0))

    # These parameters are for making sure that the solver avoids collision regardless of reward efficiency
    env = MapfEnv(grid, 2, agents_starts, agents_goals, 0.1, 0.1, -0.001, 0,
                  -1)

    info = {}
    policy = solver_describer.func(env, info)

    # Assert no conflict is possible
    interesting_state = env.locations_to_state(((1, 1), (0, 1)))
    expected_possible_actions = [
        vector_action_to_integer((STAY, UP)),
        vector_action_to_integer((DOWN, UP))
    ]

    assert policy.act(interesting_state) in expected_possible_actions

    # Check the policy performance
    reward, clashed, _ = evaluate_policy(policy, 100, 100)

    # Make sure no clash happened
    assert not clashed

    # Assert the reward is reasonable
    assert reward >= 100.0 * env.reward_of_living
Exemplo n.º 3
0
    def test_similar_transitions_probability_summed(self):
        grid = MapfGrid(['..', '..'])
        env = MapfEnv(grid, 1, ((0, 0), ), ((1, 1), ), 0.1, REWARD_OF_CLASH,
                      REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        a = vector_action_to_integer((STAY, STAY))
        self.assertEqual(env.P[env.s][a],
                         [((1, False), env.s, REWARD_OF_LIVING, False)])
Exemplo n.º 4
0
    def test_against_the_wall(self):
        map_file_path = os.path.abspath(
            os.path.join(__file__, MAPS_DIR, 'empty-8-8/empty-8-8.map'))
        grid = MapfGrid(parse_map_file(map_file_path))

        s = ((0, 0), (7, 7))

        new_state = execute_action(grid, s, (LEFT, RIGHT))
        self.assertEqual(new_state, ((0, 0), (7, 7)))
Exemplo n.º 5
0
    def test_against_obstacle_stays_in_place(self):
        grid = MapfGrid(['..@..', '..@..', '.....', '..@..', '..@..'])

        s = ((0, 1), )  # start near an obstacle.

        new_state = execute_action(grid, s, (RIGHT, ))
        self.assertEqual(
            new_state,
            ((0, 1), ))  # The agent hits an obstacle and should stay in place.
Exemplo n.º 6
0
    def test_stay_action(self):
        map_file_path = os.path.abspath(
            os.path.join(__file__, MAPS_DIR, 'empty-8-8/empty-8-8.map'))
        grid = MapfGrid(parse_map_file(map_file_path))

        s = ((0, 0), (7, 7))

        new_state = execute_action(grid, s, (STAY, STAY))
        self.assertEqual(new_state, ((0, 0), (7, 7)))
Exemplo n.º 7
0
    def test_copy_mapf_env(self):
        grid = MapfGrid(['....', '....', '....', '....', '....'])
        env = MapfEnv(grid, 1, ((0, 0), ), ((4, 0), ), 0, REWARD_OF_CLASH,
                      REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        env.step(vector_action_to_integer((RIGHT, )))

        env_copy = copy(env)
        env_copy.step(vector_action_to_integer((RIGHT, )))
Exemplo n.º 8
0
    def test_moving_on_empty_grid(self):
        map_file_path = os.path.abspath(
            os.path.join(__file__, MAPS_DIR, 'empty-8-8/empty-8-8.map'))
        grid = MapfGrid(parse_map_file(map_file_path))

        s = ((0, 0), (7, 7))

        new_state = execute_action(grid, s, (RIGHT, UP))
        self.assertEqual(new_state, ((0, 1), (6, 7)))

        new_state = execute_action(grid, s, (DOWN, LEFT))
        self.assertEqual(new_state, ((1, 0), (7, 6)))
Exemplo n.º 9
0
    def test_berlin_1_256(self):
        map_file_path = os.path.abspath(
            os.path.join(__file__, MAPS_DIR, 'Berlin_1_256/Berlin_1_256.map'))
        grid = MapfGrid(parse_map_file(map_file_path))

        self.assertTrue(grid[0, 0] is EmptyCell)
        self.assertTrue(grid[0, 104] is EmptyCell)
        self.assertTrue(grid[0, 105] is ObstacleCell)
        self.assertTrue(grid[0, 106] is ObstacleCell)
        self.assertTrue(grid[0, 107] is ObstacleCell)
        self.assertTrue(grid[0, 108] is ObstacleCell)
        self.assertTrue(grid[0, 109] is EmptyCell)
Exemplo n.º 10
0
def create_sanity_mapf_env(n_rooms, room_size, n_agents, fail_prob,
                           reward_of_clash, reward_of_goal, reward_of_living,
                           optimization_criteria):
    single_room = ['.' * room_size] * room_size
    grid_lines = single_room[:]
    n_agents_per_room = int(n_agents / n_rooms)
    n_agents_last_room = n_agents - (n_agents_per_room * (n_rooms - 1))
    agents_starts = tuple()
    agents_goals = tuple()

    if n_agents_last_room == 0 or n_agents_per_room == 0:
        raise ValueError(
            f"asked for a sanity env with {n_rooms} rooms  and {n_agents} agents, There are redundant rooms"
        )

    # concatenate n-1 rooms to a single room
    for i in range(n_rooms - 1):
        # Add the extra room to the map
        for line_idx, line in enumerate(grid_lines[:-1]):
            grid_lines[line_idx] = line + '@@' + single_room[line_idx]

        grid_lines[-1] = grid_lines[-1] + '..' + single_room[-1]

    for i in range(n_rooms):
        # Set the new start and goal locations according to current offset
        map_file, scen_file = map_name_to_files(
            f'empty-{room_size}-{room_size}', i % 25 + 1)
        if i != n_rooms - 1:
            orig_agents_starts, orig_agents_goals = parse_scen_file(
                scen_file, n_agents_per_room)
        else:
            orig_agents_starts, orig_agents_goals = parse_scen_file(
                scen_file, n_agents_last_room)

        new_agents_starts = tuple()
        for start in orig_agents_starts:
            new_start = (start[0], start[1] + (i) * (len(single_room[0]) + 2))
            new_agents_starts += (new_start, )

        new_agents_goals = tuple()
        for goal in orig_agents_goals:
            new_goal = (goal[0], goal[1] + (i) * (len(single_room[0]) + 2))
            new_agents_goals += (new_goal, )

        agents_starts += new_agents_starts
        agents_goals += new_agents_goals

    grid = MapfGrid(grid_lines)

    return MapfEnv(grid, n_agents, agents_starts, agents_goals, fail_prob,
                   reward_of_clash, reward_of_goal, reward_of_living,
                   optimization_criteria)
Exemplo n.º 11
0
    def test_empty_8_8(self):
        map_file_path = os.path.abspath(
            os.path.join(__file__, MAPS_DIR, 'empty-8-8/empty-8-8.map'))
        grid = MapfGrid(parse_map_file(map_file_path))

        self.assertTrue(grid[0, 0] is EmptyCell)
        self.assertTrue(grid[1, 1] is EmptyCell)
        self.assertTrue(grid[0, 1] is EmptyCell)
        self.assertTrue(grid[2, 1] is EmptyCell)
        self.assertTrue(grid[7, 7] is EmptyCell)

        with self.assertRaises(IndexError):
            grid[8, 1]
Exemplo n.º 12
0
    def test_reward_multiagent_soc_stay_actions(self):
        grid = MapfGrid(['....', '....', '....', '....'])

        start_locations = ((0, 0), (3, 3), (1, 1))
        goal_locations = ((0, 1), (1, 3), (1, 2))

        determinstic_env = MapfEnv(grid, 3, start_locations, goal_locations, 0,
                                   REWARD_OF_CLASH, REWARD_OF_GOAL,
                                   REWARD_OF_LIVING, OptimizationCriteria.SoC)

        right_stay_stay = vector_action_to_integer((RIGHT, STAY, STAY))
        s, r, done, _ = determinstic_env.step(right_stay_stay)
        self.assertEqual(r, -3)
Exemplo n.º 13
0
def create_mapf_env(map_name, scen_id, n_agents, fail_prob, reward_of_clash,
                    reward_of_goal, reward_of_living, optimization_criteria):
    if map_name.startswith('sanity'):
        [n_rooms, room_size] = [int(n) for n in map_name.split('-')[1:]]
        return create_sanity_mapf_env(n_rooms, room_size, n_agents, fail_prob,
                                      reward_of_clash, reward_of_goal,
                                      reward_of_living, optimization_criteria)

    map_file, scen_file = map_name_to_files(map_name, scen_id)
    grid = MapfGrid(parse_map_file(map_file))
    agents_starts, agents_goals = parse_scen_file(scen_file, n_agents)
    n_agents = len(agents_goals)

    env = MapfEnv(grid, n_agents, agents_starts, agents_goals, fail_prob,
                  reward_of_clash, reward_of_goal, reward_of_living,
                  optimization_criteria)

    return env
Exemplo n.º 14
0
    def test_switch_spots_is_a_collision(self):
        grid = MapfGrid(['..'])

        agents_starts = (
            (0, 0),
            (0, 1),
        )
        agents_goals = ((0, 1), (0, 0))

        determinstic_env = MapfEnv(grid, 2, agents_starts, agents_goals, 0,
                                   REWARD_OF_CLASH, REWARD_OF_GOAL,
                                   REWARD_OF_LIVING,
                                   OptimizationCriteria.Makespan)

        s, r, done, _ = determinstic_env.step(
            vector_action_to_integer((RIGHT, LEFT)))

        # Assert the game terminated in a collision
        self.assertEqual(done, True)
        self.assertEqual(r, REWARD_OF_LIVING + REWARD_OF_CLASH)
Exemplo n.º 15
0
    def test_colliding_agents_state_is_terminal_and_negative_reward(self):
        map_file_path = os.path.abspath(
            os.path.join(__file__, MAPS_DIR, 'empty-8-8/empty-8-8.map'))

        grid = MapfGrid(parse_map_file(map_file_path))

        # agents are starting a
        agent_starts = ((0, 0), (0, 2))
        agents_goals = ((7, 7), (5, 5))

        env = MapfEnv(grid, 2, agent_starts, agents_goals, FAIL_PROB,
                      REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)
        transitions = [
            ((round(prob, 2), collision), next_state, reward, done)
            for ((prob, collision), next_state, reward,
                 done) in env.P[env.s][vector_action_to_integer((RIGHT, LEFT))]
        ]

        self.assertIn(((0.64, True), env.locations_to_state(
            ((0, 1), (0, 1))), REWARD_OF_LIVING + REWARD_OF_CLASH, True),
                      set(transitions))
Exemplo n.º 16
0
    def test_action_from_terminal_state_has_no_effect(self):
        grid = MapfGrid(['..', '..'])
        env = MapfEnv(grid, 1, ((0, 0), ), ((1, 1), ), 0, REWARD_OF_CLASH,
                      REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        state, reward, done, _ = env.step(vector_action_to_integer((RIGHT, )))
        self.assertEqual(reward, REWARD_OF_LIVING)
        self.assertEqual(done, False)
        state, reward, done, _ = env.step(vector_action_to_integer((DOWN, )))
        self.assertEqual(reward, REWARD_OF_LIVING + REWARD_OF_GOAL)
        self.assertEqual(done, True)
        # now, after the game is finished - do another step and make sure it has not effect.
        state_after_done, reward_after_done, done_after_done, _ = env.step(
            vector_action_to_integer((UP, )))
        self.assertEqual(state_after_done, state)
        self.assertEqual(done_after_done, True)
        self.assertEqual(reward_after_done, 0)
        # another time like I'm trying to reach the goal
        state_after_done, reward_after_done, done_after_done, _ = env.step(
            vector_action_to_integer((DOWN, )))
        self.assertEqual(state_after_done, state)
        self.assertEqual(done_after_done, True)
        self.assertEqual(reward_after_done, 0)
Exemplo n.º 17
0
    def test_reawrd_multiagent_makespan(self):
        grid = MapfGrid(['....', '....', '....', '....'])

        start_locations = ((0, 0), (3, 3), (1, 1))
        goal_locations = ((0, 1), (1, 3), (1, 2))

        determinstic_env = MapfEnv(grid, 3, start_locations, goal_locations, 0,
                                   REWARD_OF_CLASH, REWARD_OF_GOAL,
                                   REWARD_OF_LIVING,
                                   OptimizationCriteria.Makespan)

        total_reward = 0
        right_up_right = vector_action_to_integer((RIGHT, UP, RIGHT))
        s, r, done, _ = determinstic_env.step(right_up_right)
        total_reward += r
        self.assertFalse(done)

        stay_up_stay = vector_action_to_integer((STAY, UP, STAY))
        s, r, done, _ = determinstic_env.step(stay_up_stay)
        total_reward += r
        self.assertEqual(s,
                         determinstic_env.locations_to_state(goal_locations))
        self.assertTrue(done)
        self.assertEqual(total_reward, 2 * REWARD_OF_LIVING + REWARD_OF_GOAL)
Exemplo n.º 18
0
    long_ma_rtdp_min_dijkstra_describer,
    id_rtdp_describer,
]

strong_tested_solvers = [
    long_rtdp_stop_no_improvement_sum_heuristic_describer,
    long_ma_rtdp_sum_pvi_describer, long_id_rtdp_sum_pvi_describer,
    long_rtdp_stop_no_improvement_min_dijkstra_heuristic_describer,
    long_rtdp_stop_no_improvement_sum_dijkstra_heuristic_describer,
    long_ma_rtdp_sum_dijkstra_describer
]

all_tested_solvers = weak_tested_solvers + mid_tested_solvers + strong_tested_solvers

easy_envs = [
    (MapfEnv(MapfGrid(['.' * 8] * 8), 1, ((7, 0), ), ((0, 7), ), 0.1, 0.1,
             -1000, 0, -1), 'empty_grid_single_agent'),
    (MapfEnv(MapfGrid(['..@...', '..@...', '......', '..@...'
                       '..@...']), 2, ((2, 0), (2, 5)), ((2, 5), (2, 0)), 0, 0,
             -0.001, 0, -1), 'symmetrical bottle-neck deterministic'),
    (MapfEnv(MapfGrid(['..@...', '..@...', '......', '..@...'
                       '..@...']), 2, ((2, 0), (2, 5)), ((2, 5), (2, 0)), 0, 0,
             -0.001, 100,
             -1), 'symmetrical bottle-neck deterministic large goal reward'),
    (MapfEnv(MapfGrid(['..@...', '..@...', '......', '..@...'
                       '..@...']), 2, ((2, 0), (2, 5)), ((2, 5), (2, 0)), 0.1,
             0.1, -0.001, 0, -1), 'symmetrical bottle-neck stochastic'),
    (MapfEnv(MapfGrid(['..@..', '..@..', '.....', '..@..'
                       '..@..']), 2, ((2, 0), (2, 4)), ((2, 4), (2, 0)), 0, 0,
             -0.001, 0, -1), 'Asymmetrical bottle-neck deterministic'),
    (MapfEnv(MapfGrid(['..@..', '..@..', '.....', '..@..'
Exemplo n.º 19
0
    def test_transition_function_empty_grid(self):
        """Assert the basic steps are done right.

        * Define an empty 8x8 environment with two agents starting at (0,0),(7,7) and desire to reach (0,2),(5,7).
        * Perform one (RIGHT, UP) step and assert that the transitions are correct.
        * Perform another (RIGHT, UP) step from the most probable next state from before ((0,1), (6,7)) and assert
            that the transitions are correct again, including the terminal one.
        """
        map_file_path = os.path.abspath(
            os.path.join(__file__, MAPS_DIR, 'empty-8-8/empty-8-8.map'))
        grid = MapfGrid(parse_map_file(map_file_path))

        # agents are starting a
        agent_starts = ((0, 0), (7, 7))
        agents_goals = ((0, 2), (5, 7))

        env = MapfEnv(grid, 2, agent_starts, agents_goals, FAIL_PROB,
                      REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        first_step_transitions = [
            ((round(prob, 2), collision), next_state, reward, done)
            for ((prob, collision), next_state, reward,
                 done) in env.P[env.s][vector_action_to_integer((RIGHT, UP))]
        ]

        self.assertEqual(
            set(first_step_transitions),
            {
                ((0.64, False), env.locations_to_state(
                    ((0, 1), (6, 7))), REWARD_OF_LIVING, False),  # (RIGHT, UP)
                ((0.08, False), env.locations_to_state(
                    ((1, 0), (6, 7))), REWARD_OF_LIVING, False),  # (DOWN, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 0), (6, 7))), REWARD_OF_LIVING, False),  # (UP, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 1),
                     (7, 7))), REWARD_OF_LIVING, False),  # (RIGHT, RIGHT)
                ((0.08, False), env.locations_to_state(
                    ((0, 1),
                     (7, 6))), REWARD_OF_LIVING, False),  # (RIGHT, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((1, 0),
                     (7, 7))), REWARD_OF_LIVING, False),  # (DOWN, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((1, 0),
                     (7, 6))), REWARD_OF_LIVING, False),  # (DOWN, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((0, 0), (7, 7))), REWARD_OF_LIVING, False),  # (UP, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((0, 0), (7, 6))), REWARD_OF_LIVING, False)  # (UP, LEFT)
            })

        wish_state = env.locations_to_state(((0, 1), (6, 7)))
        second_step_transitions = [
            ((round(prob, 2), collision), next_state, reward, done)
            for ((prob, collision), next_state, reward,
                 done) in env.P[wish_state][vector_action_to_integer((RIGHT,
                                                                      UP))]
        ]

        # [(0,0), (7,7)]
        self.assertEqual(
            set(second_step_transitions),
            {
                ((0.64, False), env.locations_to_state(
                    ((0, 2),
                     (5, 7))), REWARD_OF_LIVING + REWARD_OF_GOAL, True),
                # (RIGHT, UP)
                ((0.08, False), env.locations_to_state(
                    ((1, 1), (5, 7))), REWARD_OF_LIVING, False),  # (DOWN, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 1), (5, 7))), REWARD_OF_LIVING, False),  # (UP, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 2),
                     (6, 7))), REWARD_OF_LIVING, False),  # (RIGHT, RIGHT)
                ((0.08, False), env.locations_to_state(
                    ((0, 2),
                     (6, 6))), REWARD_OF_LIVING, False),  # (RIGHT, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((1, 1),
                     (6, 7))), REWARD_OF_LIVING, False),  # (DOWN, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((1, 1),
                     (6, 6))), REWARD_OF_LIVING, False),  # (DOWN, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((0, 1), (6, 7))), REWARD_OF_LIVING, False),  # (UP, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((0, 1), (6, 6))), REWARD_OF_LIVING, False)  # (UP, LEFT)
            })
Exemplo n.º 20
0
    def test_predecessors(self):
        """Assert the predecessors function works correctly.

        Create an environment which looks like that:
        ....
        ..0.
        .1..

        3X4 grid.
        agent 0 is at (1,2)
        agent 1 is at (2,1)

        The predecessors for agent 0 are:
        1. (0,2)
        2. (1,1)
        3. (1,3)
        4. (2,2)

        The predecessors for agent 1 are:
        1. (2,2)
        2. (2,0)
        3. (1,1)

        Therefore, the predecessors states of the initial state corresponds to these locations:
        1.  ((0,2), (2,2))
        2.  ((0,2), (2,0))
        3.  ((0,2), (1,1))
        4.  ((0,2), (2,1))
        5.  ((1,1), (2,2))
        6.  ((1,1), (2,0))
        7.  ((1,1), (1,1))
        8.  ((1,1), (2,1))
        9.  ((1,3), (2,2))
        10. ((1,3), (2,0))
        11. ((1,3), (1,1))
        12. ((1,3), (2,1))
        13. ((2,2), (2,2))
        14. ((2,2), (2,0))
        15. ((2,2), (1,1))
        16. ((2,2), (2,1))
        17. ((1,2), (2,2))
        18. ((1,2), (2,0))
        19. ((1,2), (1,1))
        20. ((1,2), (2,1))
        """
        grid = MapfGrid(['....', '....', '....'])

        agents_starts = ((1, 2), (2, 1))
        # don't care
        agents_goals = ((0, 0), (2, 3))

        env = MapfEnv(grid, 2, agents_starts, agents_goals, 0, REWARD_OF_CLASH,
                      REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        expected_locations = [((0, 2), (2, 2)), ((0, 2), (2, 0)),
                              ((0, 2), (1, 1)), ((0, 2), (2, 1)),
                              ((1, 1), (2, 2)), ((1, 1), (2, 0)),
                              ((1, 1), (1, 1)), ((1, 1), (2, 1)),
                              ((1, 3), (2, 2)), ((1, 3), (2, 0)),
                              ((1, 3), (1, 1)), ((1, 3), (2, 1)),
                              ((2, 2), (2, 2)), ((2, 2), (2, 0)),
                              ((2, 2), (1, 1)), ((2, 2), (2, 1)),
                              ((1, 2), (2, 2)), ((1, 2), (2, 0)),
                              ((1, 2), (1, 1)), ((1, 2), (2, 1))]

        expected_states = [
            env.locations_to_state(loc) for loc in expected_locations
        ]

        self.assertSetEqual(set(expected_states), set(env.predecessors(env.s)))