Пример #1
0
    def test_sac_2x2_grid_world_with_container_actions(self):
        """
        Creates a SAC agent and runs it via a Runner on a simple 2x2 GridWorld using container actions.
        """
        # ftj = forward + turn + jump
        env_spec = dict(world="2x2", action_type="ftj", state_representation="xy+orientation")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path("configs/sac_agent_for_2x2_gridworld_with_container_actions.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")

        agent = SACAgent.from_spec(
            agent_config,
            state_space=FloatBox(shape=(4,)),
            action_space=dummy_env.action_space,
        )

        time_steps = 10000
        worker = SingleThreadedWorker(
            env_spec=lambda: GridWorld.from_spec(env_spec),
            agent=agent,
            preprocessing_spec=preprocessing_spec,
            worker_executes_preprocessing=False,
            render=False
        )
        results = worker.execute_timesteps(time_steps, use_exploration=True)
        print(results)
    def test_double_dqn_on_2x2_grid_world(self):
        """
        Creates a double DQNAgent and runs it via a Runner on a simple 2x2 GridWorld.
        """
        env_spec = dict(world="2x2")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path(
            "configs/dqn_agent_for_2x2_gridworld.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")
        agent = DQNAgent.from_spec(
            agent_config,
            dueling_q=False,
            state_space=self.grid_world_2x2_flattened_state_space,
            action_space=dummy_env.action_space,
            execution_spec=dict(seed=10),
            update_spec=dict(update_interval=4,
                             batch_size=24,
                             sync_interval=32),
            optimizer_spec=dict(type="adam", learning_rate=0.05),
            store_last_q_table=True)

        time_steps = 1000
        worker = SingleThreadedWorker(
            env_spec=lambda: GridWorld.from_spec(env_spec),
            agent=agent,
            preprocessing_spec=preprocessing_spec,
            worker_executes_preprocessing=True)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        print("STATES:\n{}".format(agent.last_q_table["states"]))
        print("\n\nQ(s,a)-VALUES:\n{}".format(
            np.round_(agent.last_q_table["q_values"], decimals=2)))

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], -4.5)
        self.assertGreaterEqual(results["max_episode_reward"], 0.0)
        self.assertLessEqual(results["episodes_executed"], 350)

        # Check q-table for correct values.
        expected_q_values_per_state = {
            (1.0, 0, 0, 0): (-1, -5, 0, -1),
            (0, 1.0, 0, 0): (-1, 1, 0, 0)
        }
        for state, q_values in zip(agent.last_q_table["states"],
                                   agent.last_q_table["q_values"]):
            state, q_values = tuple(state), tuple(q_values)
            assert state in expected_q_values_per_state, \
                "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state)
            recursive_assert_almost_equal(q_values,
                                          expected_q_values_per_state[state],
                                          decimals=0)
Пример #3
0
    def test_multi_gpu_ppo_agent_learning_test_gridworld_2x2(self):
        """
        Tests if the multi gpu strategy can learn successfully on a multi gpu system, but
        also runs on a CPU-only system using fake-GPU logic for testing purposes.
        """
        env_spec = dict(type="grid-world", world="2x2")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path(
            "configs/multi_gpu_ppo_for_2x2_gridworld.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")
        agent = PPOAgent.from_spec(
            agent_config,
            state_space=self.grid_world_2x2_flattened_state_space,
            action_space=dummy_env.action_space,
        )

        time_steps = 10000
        worker = SingleThreadedWorker(env_spec=env_spec,
                                      agent=agent,
                                      worker_executes_preprocessing=True,
                                      preprocessing_spec=preprocessing_spec)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        # Assume we have learned something.
        # TODO: This test needs more tuning. -1.0 is not great for the 2x2 grid world.
        self.assertGreater(results["mean_episode_reward"], -1.0)
    def test_double_dqn_on_2x2_grid_world_single_action_to_container(self):
        """
        Tests how dqn solves a mapping of a single integer to multiple actions (as opposed to using container
        actions).
        """
        # ftj = forward + turn + jump
        env_spec = dict(world="2x2",
                        action_type="ftj",
                        state_representation="xy+orientation")
        agent_config = config_from_path(
            "configs/dqn_agent_for_2x2_gridworld_single_to_container.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")

        action_space = IntBox(0, 18)
        agent = DQNAgent.from_spec(agent_config,
                                   huber_loss=True,
                                   double_q=True,
                                   dueling_q=True,
                                   state_space=FloatBox(shape=(4, )),
                                   action_space=action_space,
                                   store_last_q_table=True)

        time_steps = 10000
        worker = SingleThreadedWorker(
            env_spec=lambda: GridWorld.from_spec(env_spec),
            agent=agent,
            preprocessing_spec=preprocessing_spec,
            worker_executes_preprocessing=True,
            render=False)
        results = worker.execute_timesteps(time_steps, use_exploration=True)
        print(results)
    def test_ppo_on_2x2_grid_world_with_container_actions(self):
        """
        Creates a PPO agent and runs it via a Runner on a simple 2x2 GridWorld using container actions.
        """
        # -----
        # |^|H|
        # -----
        # | |G|  ^=start, looking up
        # -----

        # ftj = forward + turn + jump
        env_spec = dict(world="2x2",
                        action_type="ftj",
                        state_representation="xy+orientation")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path(
            "configs/ppo_agent_for_2x2_gridworld_with_container_actions.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")

        agent = PPOAgent.from_spec(agent_config,
                                   state_space=FloatBox(shape=(4, )),
                                   action_space=dummy_env.action_space)

        time_steps = 5000
        worker = SingleThreadedWorker(
            env_spec=lambda: GridWorld.from_spec(env_spec),
            agent=agent,
            preprocessing_spec=preprocessing_spec,
            worker_executes_preprocessing=True,
            render=False)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        print(results)

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertLessEqual(results["episodes_executed"], time_steps)
        # Assume we have learned something.
        self.assertGreaterEqual(results["mean_episode_reward"], -2.0)
Пример #6
0
    def test_multi_gpu_dqn_agent_learning_test_gridworld_2x2(self):
        """
        Tests if the multi gpu strategy can learn successfully on a multi gpu system, but
        also runs on a CPU-only system using fake-GPU logic for testing purposes.
        """
        env_spec = dict(type="grid-world", world="2x2")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path(
            "configs/multi_gpu_dqn_for_2x2_gridworld.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")
        agent = DQNAgent.from_spec(
            agent_config,
            state_space=self.grid_world_2x2_flattened_state_space,
            action_space=dummy_env.action_space,
        )

        time_steps = 1000
        worker = SingleThreadedWorker(env_spec=env_spec,
                                      agent=agent,
                                      worker_executes_preprocessing=True,
                                      preprocessing_spec=preprocessing_spec)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        # Marge q-tables of all four GPUs:
        agent.last_q_table["q_values"] = agent.last_q_table[
            "q_values"].reshape((48, 4))

        print("STATES:\n{}".format(agent.last_q_table["states"]))
        print("\n\nQ(s,a)-VALUES:\n{}".format(
            np.round_(agent.last_q_table["q_values"], decimals=2)))

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], -4.5)
        self.assertGreaterEqual(results["max_episode_reward"], 0.0)
        self.assertLessEqual(results["episodes_executed"], time_steps / 2)

        # Check q-table for correct values.
        expected_q_values_per_state = {
            (1.0, 0, 0, 0): (-1, -5, 0, -1),
            (0, 1.0, 0, 0): (-1, 1, 0, 0)
        }
        for state, q_values in zip(agent.last_q_table["states"],
                                   agent.last_q_table["q_values"]):
            state, q_values = tuple(state), tuple(q_values)
            assert state in expected_q_values_per_state, \
                "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state)
            recursive_assert_almost_equal(q_values,
                                          expected_q_values_per_state[state],
                                          decimals=0)
Пример #7
0
    def test_multi_gpu_dqn_agent_learning_test_gridworld_2x2(self):
        """
        Tests if the multi gpu strategy can learn successfully on a multi gpu system, but
        also runs on a CPU-only system using fake-GPU logic for testing purposes.
        """
        env_spec = dict(type="grid-world", world="2x2")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path(
            "configs/multi_gpu_dqn_for_2x2_gridworld.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")
        agent = DQNAgent.from_spec(
            agent_config,
            state_space=self.grid_world_2x2_flattened_state_space,
            action_space=dummy_env.action_space,
        )

        time_steps = 2000
        worker = SingleThreadedWorker(env_spec=env_spec,
                                      agent=agent,
                                      worker_executes_preprocessing=True,
                                      preprocessing_spec=preprocessing_spec)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], -4.5)
        self.assertGreaterEqual(results["max_episode_reward"], 0.0)
        self.assertLessEqual(results["episodes_executed"], time_steps / 2)

        # Check all learnt Q-values.
        q_values = agent.graph_executor.execute(
            ("get_q_values", one_hot(np.array([0, 1]), depth=4)))[:]
        recursive_assert_almost_equal(q_values[0], (0.8, -5, 0.9, 0.8),
                                      decimals=1)
        recursive_assert_almost_equal(q_values[1], (0.8, 1.0, 0.9, 0.9),
                                      decimals=1)
    def test_double_dqn_on_2x2_grid_world_with_container_actions(self):
        """
        Creates a double DQNAgent and runs it via a Runner on a simple 2x2 GridWorld using container actions.
        """
        # ftj = forward + turn + jump
        env_spec = dict(world="2x2",
                        action_type="ftj",
                        state_representation="xy+orientation")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path(
            "configs/dqn_agent_for_2x2_gridworld_with_container_actions.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")

        agent = DQNAgent.from_spec(agent_config,
                                   double_q=True,
                                   dueling_q=False,
                                   state_space=FloatBox(shape=(4, )),
                                   action_space=dummy_env.action_space,
                                   execution_spec=dict(seed=15),
                                   store_last_q_table=True)

        time_steps = 10000
        worker = SingleThreadedWorker(
            env_spec=lambda: GridWorld.from_spec(env_spec),
            agent=agent,
            preprocessing_spec=preprocessing_spec,
            worker_executes_preprocessing=True,
            render=False)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        print("LAST q-table:\n{}".format(agent.last_q_table))

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], -7)
        self.assertGreaterEqual(results["max_episode_reward"], -1.0)
        self.assertLessEqual(results["episodes_executed"], time_steps / 3)

        # Check q-table for correct values.
        expected_q_values_per_state = {
            (0., 0., -1., 0.): {
                "forward": (-5.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
            (0., 0., 1., 0.): {
                "forward": (0.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
            (0., 0., 0., -1.): {
                "forward": (0.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
            (0., 0., 0., 1.): {
                "forward": (0.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
            (0., 1., -1., 0.): {
                "forward": (0.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
            (0., 1., 1., 0.): {
                "forward": (0.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
            (0., 1., 0., -1.): {
                "forward": (0.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
            (0., 1., 0., 1.): {
                "forward": (0.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
        }
        for state, q_values_forward, q_values_jump in zip(
                agent.last_q_table["states"],
                agent.last_q_table["q_values"]["forward"],
                agent.last_q_table["q_values"]["jump"]):
            state, q_values_forward, q_values_jump = tuple(state), tuple(
                q_values_forward), tuple(q_values_jump)
            assert state in expected_q_values_per_state, \
                "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state)
            recursive_assert_almost_equal(
                q_values_forward,
                expected_q_values_per_state[state]["forward"],
                decimals=0)
            recursive_assert_almost_equal(
                q_values_jump,
                expected_q_values_per_state[state]["jump"],
                decimals=0)