def check_single_motion_plan(self, motion_planner, start_pos_and_or, goal_pos_and_or, expected_length=None): dummy_agent = P((3, 2), n) start_state = OvercookedState([P(*start_pos_and_or), dummy_agent], {}, order_list=['any', 'any']) action_plan, pos_and_or_plan, plan_cost = motion_planner.get_plan( start_pos_and_or, goal_pos_and_or) # Checking that last state obtained matches goal position self.assertEqual(pos_and_or_plan[-1], goal_pos_and_or) # In single motion plans the graph cost should be equal to # the plan cost (= plan length) as agents should never STAY graph_plan_cost = sum( [motion_planner._graph_action_cost(a) for a in action_plan]) self.assertEqual(plan_cost, graph_plan_cost) joint_action_plan = [(a, stay) for a in action_plan] env = OvercookedEnv(motion_planner.mdp, horizon=1000) resulting_state, _ = env.execute_plan(start_state, joint_action_plan) self.assertEqual(resulting_state.players_pos_and_or[0], goal_pos_and_or) if expected_length is not None: self.assertEqual(len(action_plan), expected_length)
def check_joint_plan(self, joint_motion_planner, start, goal, times=None, min_t=None, display=False): """Runs the plan in the environment and checks that the intended goals are achieved.""" debug = False action_plan, end_pos_and_orients, plan_lengths = joint_motion_planner.get_low_level_action_plan( start, goal) if debug: print("Start state: {}, Goal state: {}, Action plan: {}".format( start, goal, action_plan)) start_state = OvercookedState( [P(*start[0]), P(*start[1])], {}, order_list=['any', 'any']) env = OvercookedEnv(joint_motion_planner.mdp, horizon=1000) resulting_state, _ = env.execute_plan(start_state, action_plan, display=display) self.assertTrue( any([ agent_goal in resulting_state.players_pos_and_or for agent_goal in goal ])) self.assertEqual(resulting_state.players_pos_and_or, end_pos_and_orients) self.assertEqual(len(action_plan), min(plan_lengths)) if min_t is not None: self.assertEqual(len(action_plan), min_t) if times is not None: self.assertEqual(plan_lengths, times)
def check_full_plan(self, start_state, planner, debug=False): heuristic = Heuristic(planner.mp) joint_action_plan = planner.get_low_level_action_plan( start_state, heuristic.simple_heuristic, debug=debug, goal_info=debug) env = OvercookedEnv(planner.mdp, horizon=1000) resulting_state, _ = env.execute_plan(start_state, joint_action_plan, display=False) self.assertEqual(len(resulting_state.order_list), 0)
class TestOvercookedEnvironment(unittest.TestCase): def setUp(self): self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room") self.env = OvercookedEnv(self.base_mdp, **DEFAULT_ENV_PARAMS) self.rnd_agent_pair = AgentPair(FixedPlanAgent([stay, w, w]), FixedPlanAgent([stay, e, e])) np.random.seed(0) def test_constructor(self): try: OvercookedEnv(self.base_mdp, horizon=10) except Exception as e: self.fail("Failed to instantiate OvercookedEnv:\n{}".format(e)) with self.assertRaises(TypeError): OvercookedEnv(self.base_mdp, **{"invalid_env_param": None}) def test_step_fn(self): for _ in range(10): joint_action = random_joint_action() self.env.step(joint_action) def test_execute_plan(self): action_plan = [random_joint_action() for _ in range(10)] self.env.execute_plan(self.base_mdp.get_standard_start_state(), action_plan) def test_run_agents(self): start_state = self.env.state self.env.run_agents(self.rnd_agent_pair) self.assertNotEqual(self.env.state, start_state) def test_rollouts(self): try: self.env.get_rollouts(self.rnd_agent_pair, 3) except Exception as e: self.fail("Failed to get rollouts from environment:\n{}".format(e)) def test_one_player_env(self): mdp = OvercookedGridworld.from_layout_name("cramped_room_single") env = OvercookedEnv(mdp, horizon=12) a0 = FixedPlanAgent([stay, w, w, e, e, n, e, interact, w, n, interact]) ag = AgentGroup(a0) env.run_agents(ag, display=False) self.assertEqual(env.state.players_pos_and_or, (((2, 1), (0, -1)), )) def test_four_player_env_fixed(self): mdp = OvercookedGridworld.from_layout_name("multiplayer_schelling") assert mdp.num_players == 4 env = OvercookedEnv(mdp, horizon=16) a0 = FixedPlanAgent([stay, w, w]) a1 = FixedPlanAgent([ stay, stay, e, e, n, n, n, e, interact, n, n, w, w, w, n, interact, e ]) a2 = FixedPlanAgent( [stay, w, interact, n, n, e, e, e, n, e, n, interact, w]) a3 = FixedPlanAgent([e, interact, n, n, w, w, w, n, interact, e, s]) ag = AgentGroup(a0, a1, a2, a3) env.run_agents(ag, display=False) self.assertEqual(env.state.players_pos_and_or, (((1, 1), (-1, 0)), ((3, 1), (0, -1)), ((2, 1), (-1, 0)), ((4, 2), (0, 1)))) def test_multiple_mdp_env(self): mdp0 = OvercookedGridworld.from_layout_name("cramped_room") mdp1 = OvercookedGridworld.from_layout_name("counter_circuit") mdp_fn = lambda: np.random.choice([mdp0, mdp1]) # Default env env = OvercookedEnv(mdp_fn, horizon=100) env.get_rollouts(self.rnd_agent_pair, 5) def test_starting_position_randomization(self): self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room") start_state_fn = self.base_mdp.get_random_start_state_fn( random_start_pos=True, rnd_obj_prob_thresh=0.0) env = OvercookedEnv(self.base_mdp, start_state_fn) start_state = env.state.players_pos_and_or for _ in range(3): env.reset() print(env) curr_terrain = env.state.players_pos_and_or self.assertFalse(np.array_equal(start_state, curr_terrain)) def test_starting_obj_randomization(self): self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room") start_state_fn = self.base_mdp.get_random_start_state_fn( random_start_pos=False, rnd_obj_prob_thresh=0.8) env = OvercookedEnv(self.base_mdp, start_state_fn) start_state = env.state.all_objects_list for _ in range(3): env.reset() print(env) curr_terrain = env.state.all_objects_list self.assertFalse(np.array_equal(start_state, curr_terrain)) def test_failing_rnd_layout(self): with self.assertRaises(TypeError): mdp_gen_params = {"None": None} mdp_fn = LayoutGenerator.mdp_gen_fn_from_dict(**mdp_gen_params) OvercookedEnv(mdp=mdp_fn, **DEFAULT_ENV_PARAMS) def test_random_layout(self): mdp_gen_params = {"prop_feats": (1, 1)} mdp_fn = LayoutGenerator.mdp_gen_fn_from_dict(**mdp_gen_params) env = OvercookedEnv(mdp=mdp_fn, **DEFAULT_ENV_PARAMS) start_terrain = env.mdp.terrain_mtx for _ in range(3): env.reset() print(env) curr_terrain = env.mdp.terrain_mtx self.assertFalse(np.array_equal(start_terrain, curr_terrain)) mdp_gen_params = { "mdp_choices": ['cramped_room', 'asymmetric_advantages'] } mdp_fn = LayoutGenerator.mdp_gen_fn_from_dict(**mdp_gen_params) env = OvercookedEnv(mdp=mdp_fn, **DEFAULT_ENV_PARAMS) layouts_seen = [] for _ in range(10): layouts_seen.append(env.mdp.terrain_mtx) env.reset() all_same_layout = all([ np.array_equal(env.mdp.terrain_mtx, terrain) for terrain in layouts_seen ]) self.assertFalse(all_same_layout)