Пример #1
0
    def test_wrapper_env_reset(self, env_spec: EnvSpec,
                               helpers: Helpers) -> None:
        wrapped_env, _ = helpers.get_wrapped_env(env_spec)
        num_agents = len(wrapped_env.agents)

        timestep = wrapped_env.reset()
        if type(timestep) == tuple:
            dm_env_timestep, env_extras = timestep
        else:
            dm_env_timestep = timestep
        props_which_should_not_be_none = [
            dm_env_timestep, dm_env_timestep.observation
        ]

        assert helpers.verify_all_props_not_none(
            props_which_should_not_be_none), "Failed to ini dm_env_timestep."
        assert (dm_env_timestep.step_type == dm_env.StepType.FIRST
                ), "Failed to have correct StepType."
        if (env_spec.env_name == "tic_tac_toe"
                and env_spec.env_source == EnvSource.OpenSpiel
                and env_spec.env_type == EnvType.Sequential):
            pytest.skip(
                "This test is only applicable to parralel wrappers and only works "
                "for the provided PZ sequential envs because they have 3 agents, and"
                "an OLT has length of 3 (a bug, i'd say)")
        assert (len(dm_env_timestep.observation) == num_agents
                ), "Failed to generate observation for all agents."
        assert wrapped_env._reset_next_step is False, "_reset_next_step not set."

        helpers.assert_env_reset(wrapped_env, dm_env_timestep, env_spec)
Пример #2
0
    def test_step_1_valid_when_env_not_done(self, env_spec: EnvSpec,
                                            helpers: Helpers) -> None:
        wrapped_env, _ = helpers.get_wrapped_env(env_spec)

        # Seed environment since we are sampling actions.
        # We need to seed env and action space.
        random_seed = 42
        wrapped_env.seed(random_seed)
        helpers.seed_action_space(wrapped_env, random_seed)

        #  Get agent names from env
        agents = wrapped_env.agents

        # Parallel env_types
        if env_spec.env_type == EnvType.Parallel:
            test_agents_actions = {
                agent: wrapped_env.action_spaces[agent].sample()
                for agent in agents
            }
            with patch.object(wrapped_env, "step") as parallel_step:
                parallel_step.return_value = None, None, None, None
                _ = wrapped_env.step(test_agents_actions)
                parallel_step.assert_called_once_with(test_agents_actions)

        # Sequential env_types
        elif env_spec.env_type == EnvType.Sequential:
            for agent in agents:
                with patch.object(wrapped_env, "step") as seq_step:
                    seq_step.return_value = None
                    test_agent_action = wrapped_env.action_spaces[
                        agent].sample()
                    _ = wrapped_env.step(test_agent_action)
                    seq_step.assert_called_once_with(test_agent_action)
Пример #3
0
    def test_covert_env_to_dm_env_1_with_action_mask(self, env_spec: EnvSpec,
                                                     helpers: Helpers) -> None:
        wrapped_env, _ = helpers.get_wrapped_env(env_spec)

        # Does the wrapper have the functions we want to test
        if hasattr(wrapped_env, "_convert_observations") or hasattr(
                wrapped_env, "_convert_observation"):
            #  Get agent names from env and mock out data
            agents = wrapped_env.agents
            test_agents_observations = {}
            for agent in agents:
                # TODO If cont action space masking is implemented - Update
                test_agents_observations[agent] = {
                    "observation":
                    np.random.rand(
                        *wrapped_env.observation_spaces[agent].shape),
                    "action_mask":
                    np.random.randint(
                        2, size=wrapped_env.action_spaces[agent].shape),
                }
            # Parallel env_types
            if env_spec.env_type == EnvType.Parallel:
                dm_env_timestep = wrapped_env._convert_observations(
                    test_agents_observations,
                    dones={agent: False
                           for agent in agents},
                )

                for agent in wrapped_env.agents:
                    np.testing.assert_array_equal(
                        test_agents_observations[agent].get("observation"),
                        dm_env_timestep[agent].observation,
                    )
                    np.testing.assert_array_equal(
                        test_agents_observations[agent].get("action_mask"),
                        dm_env_timestep[agent].legal_actions,
                    )
                    assert (bool(dm_env_timestep[agent].terminal) is
                            False), "Failed to set terminal."

            # Sequential env_types
            elif env_spec.env_type == EnvType.Sequential:
                for agent in agents:
                    dm_env_timestep = wrapped_env._convert_observation(
                        agent, test_agents_observations[agent], done=False)

                    np.testing.assert_array_equal(
                        test_agents_observations[agent].get("observation"),
                        dm_env_timestep.observation,
                    )

                    np.testing.assert_array_equal(
                        test_agents_observations[agent].get("action_mask"),
                        dm_env_timestep.legal_actions,
                    )
                    assert (bool(dm_env_timestep.terminal) is
                            False), "Failed to set terminal."
Пример #4
0
    def test_step_0_valid_when_env_not_done(self, env_spec: EnvSpec,
                                            helpers: Helpers) -> None:
        wrapped_env, _ = helpers.get_wrapped_env(env_spec)

        # Seed environment since we are sampling actions.
        # We need to seed env and action space.
        random_seed = 42
        wrapped_env.seed(random_seed)
        helpers.seed_action_space(wrapped_env, random_seed)

        #  Get agent names from env
        agents = wrapped_env.agents

        timestep = wrapped_env.reset()
        if type(timestep) == tuple:
            initial_dm_env_timestep, env_extras = timestep
        else:
            initial_dm_env_timestep = timestep
        # Parallel env_types
        if env_spec.env_type == EnvType.Parallel:
            test_agents_actions = {
                agent: wrapped_env.action_spaces[agent].sample()
                for agent in agents
            }
            curr_dm_timestep = wrapped_env.step(test_agents_actions)

            for agent in wrapped_env.agents:
                assert not np.array_equal(
                    initial_dm_env_timestep.observation[agent].observation,
                    curr_dm_timestep.observation[agent].observation,
                ), "Failed to update observations."

        # Sequential env_types
        elif env_spec.env_type == EnvType.Sequential:
            curr_dm_timestep = initial_dm_env_timestep
            for agent in agents:
                if env_spec.env_source == EnvSource.OpenSpiel:
                    test_agent_actions = np.random.choice(
                        np.where(
                            curr_dm_timestep.observation.legal_actions)[0])
                else:
                    test_agent_actions = wrapped_env.action_spaces[
                        agent].sample()

                curr_dm_timestep = wrapped_env.step(test_agent_actions)

                assert not np.array_equal(
                    initial_dm_env_timestep.observation.observation,
                    curr_dm_timestep.observation.observation,
                ), "Failed to update observations."

        assert (wrapped_env._reset_next_step is
                False), "Failed to set _reset_next_step correctly."
        assert curr_dm_timestep.reward is not None, "Failed to set rewards."
        assert (curr_dm_timestep.step_type is
                dm_env.StepType.MID), "Failed to update step type."
Пример #5
0
    def test_valid_episode(self, env_spec: EnvSpec, helpers: Helpers) -> None:
        wrapped_env, specs = helpers.get_wrapped_env(env_spec)
        env_loop_func = helpers.get_env_loop(env_spec)

        env_loop = env_loop_func(
            wrapped_env,
            MockedSystem(specs),
        )

        result = env_loop.run_episode()

        helpers.assert_valid_episode(result)
Пример #6
0
    def test_valid_multiple_episodes(self, env_spec: EnvSpec, helpers: Helpers) -> None:
        wrapped_env, specs = helpers.get_wrapped_env(env_spec)
        env_loop_func = helpers.get_env_loop(env_spec)

        train_loop = env_loop_func(wrapped_env, MockedSystem(specs), label="train_loop")
        eval_loop = env_loop_func(wrapped_env, MockedExecutor(specs), label="eval_loop")

        num_episodes = 10
        num_episodes_per_eval = 2

        for _ in range(num_episodes // num_episodes_per_eval):
            train_loop.run(num_episodes=num_episodes_per_eval)
            eval_loop.run(num_episodes=1)
Пример #7
0
    def test_preprocess_wrapper_obs_1_standardize(
        self,
        env_spec: EnvSpec,
        helpers: Helpers,
        monkeypatch: MonkeyPatch,
    ) -> None:

        # Parallel env_types
        if env_spec.env_type == EnvType.Parallel:
            StandardizeObservation = StandardizeObservationParallel

        # Sequential env_types
        elif env_spec.env_type == EnvType.Sequential:
            StandardizeObservation = StandardizeObservationSequential

        wrapped_env, _ = helpers.get_wrapped_env(
            env_spec,
            env_preprocess_wrappers=[
                (dtype_v0, {
                    "dtype": np.float32
                }),
                (StandardizeObservation, None),
            ],
        )
        _ = wrapped_env.reset()

        agents = wrapped_env.agents

        # Parallel env_types
        if env_spec.env_type == EnvType.Parallel:
            test_agents_actions = {
                agent: wrapped_env.action_spaces[agent].sample()
                for agent in agents
            }
            for i in range(50):
                curr_dm_timestep = wrapped_env.step(test_agents_actions)
                helpers.verify_observations_are_standardized(
                    curr_dm_timestep.observation, agents, env_spec)

        # Sequential env_types
        elif env_spec.env_type == EnvType.Sequential:
            for i in range(50):
                for agent in agents:
                    test_agent_actions = wrapped_env.action_spaces[
                        agent].sample()
                    curr_dm_timestep = wrapped_env.step(test_agent_actions)
                    helpers.verify_observations_are_standardized(
                        curr_dm_timestep.observation, agents, env_spec)
Пример #8
0
    def test_preprocess_wrapper_obs_0_normalize(
        self,
        env_spec: EnvSpec,
        helpers: Helpers,
        monkeypatch: MonkeyPatch,
    ) -> None:

        wrapped_env, _ = helpers.get_wrapped_env(
            env_spec,
            env_preprocess_wrappers=[
                (dtype_v0, {
                    "dtype": np.float32
                }),
                (normalize_obs_v0, None),
            ],
        )
        timestep = wrapped_env.reset()
        if type(timestep) == tuple:
            initial_dm_env_timestep, env_extras = timestep
        else:
            initial_dm_env_timestep = timestep

        agents = wrapped_env.agents

        helpers.verify_observations_are_normalized(
            initial_dm_env_timestep.observation, agents, env_spec)

        # Parallel env_types
        if env_spec.env_type == EnvType.Parallel:
            for i in range(50):
                test_agents_actions = {
                    agent: wrapped_env.action_spaces[agent].sample()
                    for agent in agents
                }
                curr_dm_timestep = wrapped_env.step(test_agents_actions)
                helpers.verify_observations_are_normalized(
                    curr_dm_timestep.observation, agents, env_spec)

        # Sequential env_types
        elif env_spec.env_type == EnvType.Sequential:
            for i in range(50):
                for agent in agents:
                    test_agent_actions = wrapped_env.action_spaces[
                        agent].sample()
                    curr_dm_timestep = wrapped_env.step(test_agent_actions)
                    helpers.verify_observations_are_normalized(
                        curr_dm_timestep.observation, agents, env_spec)
Пример #9
0
    def test_covert_env_to_dm_env_0_no_action_mask(self, env_spec: EnvSpec,
                                                   helpers: Helpers) -> None:
        wrapped_env, _ = helpers.get_wrapped_env(env_spec)

        # Does the wrapper have the functions we want to test
        if hasattr(wrapped_env, "_convert_observations") or hasattr(
                wrapped_env, "_convert_observation"):
            #  Get agent names from env and mock out data
            agents = wrapped_env.agents
            test_agents_observations = {
                agent:
                np.random.rand(*wrapped_env.observation_spaces[agent].shape)
                for agent in agents
            }

            # Parallel env_types
            if env_spec.env_type == EnvType.Parallel:
                dm_env_timestep = wrapped_env._convert_observations(
                    test_agents_observations,
                    dones={agent: False
                           for agent in agents})

                for agent in wrapped_env.agents:
                    np.testing.assert_array_equal(
                        test_agents_observations[agent],
                        dm_env_timestep[agent].observation,
                    )

                    assert (bool(dm_env_timestep[agent].terminal) is
                            False), "Failed to set terminal."

            # Sequential env_types
            elif env_spec.env_type == EnvType.Sequential:
                for agent in agents:
                    dm_env_timestep = wrapped_env._convert_observation(
                        agent, test_agents_observations[agent], done=False)

                    np.testing.assert_array_equal(
                        test_agents_observations[agent],
                        dm_env_timestep.observation,
                    )
                    assert (bool(dm_env_timestep.terminal) is
                            False), "Failed to set terminal."
Пример #10
0
    def test_initialize_env_loop(self, env_spec: EnvSpec, helpers: Helpers) -> None:
        wrapped_env, specs = helpers.get_wrapped_env(env_spec)
        env_loop_func = helpers.get_env_loop(env_spec)

        env_loop = env_loop_func(
            wrapped_env,
            MockedSystem(specs),
        )

        props_which_should_not_be_none = [
            env_loop,
            env_loop._environment,
            env_loop._executor,
            env_loop._counter,
            env_loop._logger,
            env_loop._should_update,
        ]
        assert helpers.verify_all_props_not_none(
            props_which_should_not_be_none
        ), "Failed to initialize env loop."
Пример #11
0
    def test_preprocess_wrapper_reward_1_custom_function(
        self,
        env_spec: EnvSpec,
        helpers: Helpers,
        monkeypatch: MonkeyPatch,
    ) -> None:

        wrapped_env, _ = helpers.get_wrapped_env(
            env_spec,
            env_preprocess_wrappers=[
                (dtype_v0, {
                    "dtype": np.float32
                }),
                (reward_lambda_v0, {
                    "change_reward_fn": lambda r: r + 100
                }),
            ],
        )

        _ = wrapped_env.reset()
        agents = wrapped_env.agents

        # Parallel env_types
        if env_spec.env_type == EnvType.Parallel:
            test_agents_actions = {
                agent: wrapped_env.action_spaces[agent].sample()
                for agent in agents
            }
            curr_dm_timestep = wrapped_env.step(test_agents_actions)
            for agent in agents:
                assert (curr_dm_timestep.reward[agent] >=
                        100), "Failed custom reward shaping. "

        # Sequential env_types
        elif env_spec.env_type == EnvType.Sequential:
            for agent in agents:
                test_agent_actions = wrapped_env.action_spaces[agent].sample()
                curr_dm_timestep = wrapped_env.step(test_agent_actions)
                assert curr_dm_timestep.reward >= 100, "Failed custom reward shaping. "
Пример #12
0
    def test_wrapper_initialization(self, env_spec: EnvSpec,
                                    helpers: Helpers) -> None:
        wrapped_env, _ = helpers.get_wrapped_env(env_spec)
        num_agents = len(wrapped_env.agents)

        props_which_should_not_be_none = [
            wrapped_env,
            wrapped_env.environment,
            wrapped_env.observation_spec(),
            wrapped_env.action_spec(),
            wrapped_env.reward_spec(),
            wrapped_env.discount_spec(),
        ]

        assert helpers.verify_all_props_not_none(
            props_which_should_not_be_none), "Failed to ini wrapped env."
        assert (len(wrapped_env.observation_spec()) == num_agents
                ), "Failed to generate observation specs for all agents."
        assert (len(wrapped_env.action_spec()) == num_agents
                ), "Failed to generate action specs for all agents."
        assert (len(wrapped_env.reward_spec()) == num_agents
                ), "Failed to generate reward specs for all agents."
        assert (len(wrapped_env.discount_spec()) == num_agents
                ), "Failed to generate discount specs for all agents."
Пример #13
0
    def test_preprocess_wrapper_reward_0_normalize(
        self,
        env_spec: EnvSpec,
        helpers: Helpers,
        monkeypatch: MonkeyPatch,
    ) -> None:

        min = 0.2
        max = 1

        # Parallel env_types
        if env_spec.env_type == EnvType.Parallel:
            StandardizeReward = StandardizeRewardParallel

        # Sequential env_types
        elif env_spec.env_type == EnvType.Sequential:
            StandardizeReward = StandardizeRewardSequential

        wrapped_env, _ = helpers.get_wrapped_env(
            env_spec,
            env_preprocess_wrappers=[
                (dtype_v0, {
                    "dtype": np.float32
                }),
                (StandardizeReward, {
                    "lower_bound": min,
                    "upper_bound": max
                }),
            ],
        )

        _ = wrapped_env.reset()
        agents = wrapped_env.agents

        # Parallel env_types
        if env_spec.env_type == EnvType.Parallel:
            for i in range(50):
                test_agents_actions = {
                    agent: wrapped_env.action_spaces[agent].sample()
                    for agent in agents
                }
                curr_dm_timestep = wrapped_env.step(test_agents_actions)
                helpers.verify_reward_is_normalized(curr_dm_timestep.reward,
                                                    agents,
                                                    env_spec,
                                                    min=min,
                                                    max=max)

        # Sequential env_types
        elif env_spec.env_type == EnvType.Sequential:
            for i in range(50):
                for agent in agents:
                    test_agent_actions = wrapped_env.action_spaces[
                        agent].sample()
                    curr_dm_timestep = wrapped_env.step(test_agent_actions)
                    helpers.verify_reward_is_normalized(
                        curr_dm_timestep.reward,
                        agents,
                        env_spec,
                        min=min,
                        max=max)
Пример #14
0
    def test_step_2_invalid_when_env_done(self, env_spec: EnvSpec,
                                          helpers: Helpers,
                                          monkeypatch: MonkeyPatch) -> None:
        wrapped_env, _ = helpers.get_wrapped_env(env_spec)

        if env_spec.env_source == EnvSource.OpenSpiel:
            pytest.skip("Open Spiel does not use the .last() method")

        # Seed environment since we are sampling actions.
        # We need to seed env and action space.
        random_seed = 42
        wrapped_env.seed(random_seed)
        helpers.seed_action_space(wrapped_env, random_seed)

        #  Get agent names from env
        _ = wrapped_env.reset()
        agents = wrapped_env.agents

        # Parallel env_types
        if env_spec.env_type == EnvType.Parallel:
            test_agents_actions = {
                agent: wrapped_env.action_spaces[agent].sample()
                for agent in agents
            }

            monkeypatch.setattr(wrapped_env, "env_done", helpers.mock_done)

            curr_dm_timestep = wrapped_env.step(test_agents_actions)

            helpers.assert_env_reset(wrapped_env, curr_dm_timestep, env_spec)

        # Sequential env_types
        # TODO (Kale-ab): Make this part below less reliant on PZ.
        elif env_spec.env_type == EnvType.Sequential:
            n_agents = wrapped_env.num_agents

            # Mock functions to act like PZ environment is done
            def mock_environment_last() -> Any:
                observe = wrapped_env.observation_spaces[agent].sample()
                reward = 0.0
                done = True
                info: Dict = {}
                return observe, reward, done, info

            def mock_step(action: types.Action) -> None:
                return

            # Mocks certain functions - if functions don't exist, error is not thrown.
            monkeypatch.setattr(wrapped_env._environment,
                                "last",
                                mock_environment_last,
                                raising=False)
            monkeypatch.setattr(wrapped_env._environment,
                                "step",
                                mock_step,
                                raising=False)

            for index, (agent) in enumerate(wrapped_env.agent_iter(n_agents)):
                test_agent_actions = wrapped_env.action_spaces[agent].sample()

                # Mock whole env being done when you reach final agent
                if index == n_agents - 1:
                    monkeypatch.setattr(
                        wrapped_env,
                        "env_done",
                        helpers.mock_done,
                    )

                # Mock update has occurred in step
                monkeypatch.setattr(wrapped_env._environment,
                                    "_has_updated",
                                    True,
                                    raising=False)

                curr_dm_timestep = wrapped_env.step(test_agent_actions)

                # Check each agent is on last step
                assert (curr_dm_timestep.step_type is
                        dm_env.StepType.LAST), "Failed to update step type."

            helpers.assert_env_reset(wrapped_env, curr_dm_timestep, env_spec)

        assert (wrapped_env._reset_next_step is
                True), "Failed to set _reset_next_step correctly."
        assert (curr_dm_timestep.step_type is
                dm_env.StepType.LAST), "Failed to update step type."