Exemplo n.º 1
0
    def test_trajectory_visualization(self):
        # we don't have good way to check slider automatically so its mostly test for basic stuff like number of outputted images, if using method raises error etc.
        traj_path = os.path.join(TESTING_DATA_DIR, 'test_state_visualizer',
                                 'test_trajectory.json')
        test_trajectory = AgentEvaluator.load_traj_from_json(traj_path)
        expected_images_num = len(test_trajectory["ep_states"][0])
        assert expected_images_num == 10
        action_probs = [
            [RandomAgent(all_actions=True).action(state)[1]["action_probs"]] *
            2 for state in test_trajectory["ep_states"][0]
        ]

        result_img_directory_path = StateVisualizer(
        ).display_rendered_trajectory(test_trajectory,
                                      action_probs=action_probs,
                                      ipython_display=False)
        self.assertEqual(get_file_count(result_img_directory_path),
                         expected_images_num)

        custom_img_directory_path = generate_temporary_file_path(
            prefix="overcooked_visualized_trajectory", extension="")
        self.assertNotEqual(custom_img_directory_path,
                            result_img_directory_path)
        result_img_directory_path = StateVisualizer(
        ).display_rendered_trajectory(
            test_trajectory,
            img_directory_path=custom_img_directory_path,
            ipython_display=False)
        self.assertEqual(custom_img_directory_path, result_img_directory_path)
        self.assertEqual(get_file_count(result_img_directory_path),
                         expected_images_num)
    def custom_init(self,
                    base_env,
                    featurize_fn,
                    baselines_reproducible=False,
                    display=False):
        """
        base_env: OvercookedEnv
        featurize_fn(mdp, state): fn used to featurize states returned in the 'both_agent_obs' field
        """
        if baselines_reproducible:
            # NOTE:
            # This will cause all agent indices to be chosen in sync across simulation
            # envs (for each update, all envs will have index 0 or index 1).
            # This is to prevent the randomness of choosing agent indexes
            # from leaking when using subprocess-vec-env in baselines (which
            # seeding does not reach) i.e. having different results for different
            # runs with the same seed.
            # The effect of this should be negligible, as all other randomness is
            # controlled by the actual run seeds
            np.random.seed(0)

        self.base_env = base_env
        self.featurize_fn = featurize_fn
        self.observation_space = self._setup_observation_space()
        self.action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))

        if display:
            self.visualizer = StateVisualizer()
            print('Importing pygame')
            self.window = None

        self.reset()
Exemplo n.º 3
0
    def test_setting_up_configs(self):
        default_values = copy.deepcopy(StateVisualizer.DEFAULT_VALUES)

        init_config = {"tile_size": 123}
        configure_config = {"tile_size": 234}
        configure_defaults_config = {"tile_size": 345}
        assert default_values["tile_size"] != init_config[
            "tile_size"] != configure_config[
                "tile_size"] != configure_defaults_config["tile_size"]

        visualizer = StateVisualizer(**init_config)
        self.assertEqual(init_config["tile_size"], visualizer.tile_size)

        visualizer.configure(**configure_config)
        self.assertEqual(configure_config["tile_size"], visualizer.tile_size)

        StateVisualizer.configure_defaults(**configure_defaults_config)
        self.assertEqual(configure_defaults_config["tile_size"],
                         StateVisualizer.DEFAULT_VALUES["tile_size"])
        self.assertEqual(configure_defaults_config["tile_size"],
                         StateVisualizer().tile_size)

        invalid_kwargs = {"invalid_argument": 123}
        self.assertRaises(AssertionError, StateVisualizer, **invalid_kwargs)
        self.assertRaises(AssertionError, StateVisualizer.configure_defaults,
                          **invalid_kwargs)
        self.assertRaises(AssertionError, visualizer.configure,
                          **invalid_kwargs)
Exemplo n.º 4
0
def test_render_state_from_dict(test_dict):
    input_dict = copy.deepcopy(test_dict)
    test_dict = copy.deepcopy(test_dict)
    test_dict["kwargs"]["state"] = OvercookedState.from_dict(
        test_dict["kwargs"]["state"])
    # check only if it raise error or not, for image fidelity render_state check is used
    StateVisualizer(**test_dict["config"]).display_rendered_state(
        img_path=example_img_path, **test_dict["kwargs"])

    actual_result = pygame.surfarray.array3d(
        StateVisualizer(**test_dict["config"]).render_state(
            **test_dict["kwargs"]))
    expected_result = np.load(
        os.path.join(state_visualizer_dir, test_dict["result_array_filename"]))
    if not actual_result.shape == expected_result.shape:
        print("test with: ", input_dict["result_array_filename"], "is failed")
        print("test not passed, wrong output shape", actual_result.shape, "!=",
              expected_result.shape)
        print(json.dumps(input_dict, indent=4, sort_keys=True))
        return False

    wrong_rows, wrong_columns, wrong_color_channels = np.where(
        actual_result != expected_result)
    wrong_coordinates = set([(row, col)
                             for row, col in zip(wrong_rows, wrong_columns)])
    incorrect_pixels_num = len(wrong_coordinates)
    all_pixels_num = int(expected_result.size / 3)
    if incorrect_pixels_num:
        wrong_coordinate_list = sorted(list(wrong_coordinates))
        print("test with: ", input_dict["result_array_filename"], "is failed")
        print("test not passed, wrong color of", incorrect_pixels_num,
              "pixels out of", all_pixels_num)
        print("first 100 wrong pixels coordinates",
              wrong_coordinate_list[:100])
        print("coordinate\texpected\tactual")
        for i in range(10):
            (wrong_x, wrong_y) = wrong_coord = wrong_coordinate_list[i]
            print("%s\t%s\t%s" %
                  (str(wrong_coord), str(expected_result[wrong_x, wrong_y]),
                   str(actual_result[wrong_x, wrong_y])))
        print("test_dict", json.dumps(input_dict))
        return False
    print("test with: ", input_dict["result_array_filename"], "is ok")
    return True
Exemplo n.º 5
0
 def test_default_hud_data_from_trajectories(self):
     traj_path = os.path.join(
         TESTING_DATA_DIR, 'test_state_visualizer', 'test_trajectory.json'
     )  # NOTE: for test purposes reward is added here despite there was no soup delivery in trajectory
     test_trajectory = AgentEvaluator.load_traj_from_json(traj_path)
     hud_data_path = os.path.join(
         TESTING_DATA_DIR, 'test_state_visualizer',
         'expected_default_hud_data_from_trajectories.json')
     expected_hud_data = load_from_json(hud_data_path)
     result_hud_data = StateVisualizer().default_hud_data_from_trajectories(
         test_trajectory)
     self.assertEqual(json.dumps(result_hud_data, sort_keys=True),
                      json.dumps(expected_hud_data, sort_keys=True))
Exemplo n.º 6
0
 def test_properties(self):
     visualizer = StateVisualizer(tile_size=30,
                                  hud_interline_size=7,
                                  hud_font_size=26)
     self.assertEqual(visualizer.scale_by_factor, 2)
     self.assertEqual(visualizer.hud_line_height, 26 + 7)
class Overcooked(gym.Env):
    """
    Wrapper for the Env class above that is SOMEWHAT compatible with the standard gym API.

    NOTE: Observations returned are in a dictionary format with various information that is
    necessary to be able to handle the multi-agent nature of the environment. There are probably
    better ways to handle this, but we found this to work with minor modifications to OpenAI Baselines.
    
    NOTE: The index of the main agent in the mdp is randomized at each reset of the environment, and 
    is kept track of by the self.agent_idx attribute. This means that it is necessary to pass on this 
    information in the output to know for which agent index featurizations should be made for other agents.
    
    For example, say one is training A0 paired with A1, and A1 takes a custom state featurization.
    Then in the runner.py loop in OpenAI Baselines, we will get the lossless encodings of the state,
    and the true Overcooked state. When we encode the true state to feed to A1, we also need to know
    what agent index it has in the environment (as encodings will be index dependent).
    """
    env_name = "Overcooked-v0"

    def custom_init(self,
                    base_env,
                    featurize_fn,
                    baselines_reproducible=False,
                    display=False):
        """
        base_env: OvercookedEnv
        featurize_fn(mdp, state): fn used to featurize states returned in the 'both_agent_obs' field
        """
        if baselines_reproducible:
            # NOTE:
            # This will cause all agent indices to be chosen in sync across simulation
            # envs (for each update, all envs will have index 0 or index 1).
            # This is to prevent the randomness of choosing agent indexes
            # from leaking when using subprocess-vec-env in baselines (which
            # seeding does not reach) i.e. having different results for different
            # runs with the same seed.
            # The effect of this should be negligible, as all other randomness is
            # controlled by the actual run seeds
            np.random.seed(0)

        self.base_env = base_env
        self.featurize_fn = featurize_fn
        self.observation_space = self._setup_observation_space()
        self.action_space = gym.spaces.Discrete(len(Action.ALL_ACTIONS))

        if display:
            self.visualizer = StateVisualizer()
            print('Importing pygame')
            self.window = None

        self.reset()

    def _setup_observation_space(self):
        dummy_mdp = self.base_env.mdp
        dummy_state = dummy_mdp.get_standard_start_state()
        obs_shape = self.featurize_fn(dummy_state)[0].shape
        high = np.ones(obs_shape) * float('inf')
        return gym.spaces.Box(high * 0, high, dtype=np.float32)

    def step(self, action, action_as_ind=False):
        """
        action: 
            (agent with index self.agent_idx action, other agent action)
            is a tuple with the joint action of the primary and secondary agents in index format
        
        returns:
            observation: formatted to be standard input for self.agent_idx's policy
        """
        if action_as_ind:
            assert all(
                self.action_space.contains(a)
                for a in action), "%r (%s) invalid" % (action, type(action))
            agent_action, other_agent_action = [
                Action.INDEX_TO_ACTION[a] for a in action
            ]
        else:
            agent_action, other_agent_action = action

        if self.agent_idx == 0:
            joint_action = (agent_action, other_agent_action)
        else:
            joint_action = (other_agent_action, agent_action)

        next_state, reward, done, env_info = self.base_env.step(joint_action)
        ob_p0, ob_p1 = self.featurize_fn(next_state)
        if self.agent_idx == 0:
            both_agents_ob = (ob_p0, ob_p1)
        else:
            both_agents_ob = (ob_p1, ob_p0)

        env_info["policy_agent_idx"] = self.agent_idx

        if "episode" in env_info.keys():
            env_info["episode"]["policy_agent_idx"] = self.agent_idx

        obs = {
            "both_agent_obs": both_agents_ob,
            "overcooked_state": next_state,
            "other_agent_env_idx": 1 - self.agent_idx
        }
        return obs, reward, done, env_info

    def reset(self, regen_mdp=True, return_only_state=False):
        """
        When training on individual maps, we want to randomize which agent is assigned to which
        starting location, in order to make sure that the agents are trained to be able to 
        complete the task starting at either of the hardcoded positions.

        NOTE: a nicer way to do this would be to just randomize starting positions, and not
        have to deal with randomizing indices.
        """
        self.base_env.reset(regen_mdp)
        self.mdp = self.base_env.mdp
        self.agent_idx = 0
        ob_p0, ob_p1 = self.featurize_fn(self.base_env.state)

        if self.agent_idx == 0:
            both_agents_ob = (ob_p0, ob_p1)
        else:
            both_agents_ob = (ob_p1, ob_p0)

        if return_only_state:
            return self.featurize_fn(self.base_env.state)
        else:
            return {
                "both_agent_obs": both_agents_ob,
                "overcooked_state": self.base_env.state,
                "other_agent_env_idx": 1 - self.agent_idx
            }

    def render(self, mode="human", close=False):
        grid = self.base_env.mdp.terrain_mtx
        state = self.base_env.state
        surface = self.visualizer.render_state(state, grid, action_probs=None)
        self.window = run_dynamic_window(self.window, surface)
        return image_to_array(surface)

    def return_state_img(self):
        grid = self.base_env.mdp.terrain_mtx
        state = self.base_env.state
        surface = self.visualizer.render_state(state, grid, action_probs=None)
        return image_to_array(surface)