Python DDDQN примеры использования

Язык программирования: Python

Пространство имен/Пакет: surreal.algos.dddqn

Класс/Тип: DDDQN

Примеров на hotexamples.com: 7

Python DDDQN - 7 примеров найдено. Это лучшие примеры Python кода для surreal.algos.dddqn.DDDQN, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

DDDQN(7)

Q(1)

Основные методы

DDDQN (7)

Q (1)

Пример #1

Показать файл

Файл: test_dddqn_functionality.py Проект: rosea-tf/surreal

    def test_dddqn_n_step_memory_insertion_n_step_samples_only(self):
        """
        Tests the n-step post-processing and memory-insertions of DDDQN (with the n_step_only option set to True).
        """
        # Create an Env object.
        env = GridWorld("2x2", actors=1)
        # Create a very standard DDQN.
        dqn_config = DDDQNConfig.make(
            "{}/../configs/dddqn_grid_world_2x2_learning.json".format(
                os.path.dirname(__file__)),
            n_step=2,  # fix n-step to 2, just in case.
            gamma=0.5,  # fix gamma for unique-memory-checks purposes
            epsilon=[1.0, 0.5],  # fix epsilon to get lots of random actions.
            preprocessor=Preprocessor(lambda inputs_: tf.one_hot(
                inputs_, depth=env.actors[0].state_space.num_categories)),
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space)
        algo = DDDQN(config=dqn_config, name="my-dddqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run for n ticks, then check memory contents for correct n-step tuples.
        for _ in range(5):
            env.run(ticks=100, sync=True, render=False)
            self._check_2x2_grid_world_mem(algo.memory, n_step_only=True)

        env.terminate()

Пример #2

Показать файл

    def test_dddqn_learning_on_cart_pole_with_4_actors(self):
        # Create an Env object.
        env = OpenAIGymEnv("CartPole-v0", actors=4)

        # Create a Config.
        dqn_config = DDDQNConfig.make(
            "{}/../configs/dddqn_cart_pole_learning_n_actors.json".format(os.path.dirname(__file__)),
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space
        )

        # Create an Algo object.
        algo = DDDQN(config=dqn_config, name="my-dqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(ticks=2000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        last_n = 10
        mean_last_episodes = np.mean(env.historic_episodes_returns[-last_n:])
        print("Avg return over last {} episodes: {}".format(last_n, mean_last_episodes))
        self.assertTrue(mean_last_episodes > 160.0)

        env.terminate()

Пример #3

Показать файл

Файл: test_dddqn_medium_tasks.py Проект: rosea-tf/surreal

    def test_dddqn_learning_on_mountain_car_4_actors(self):
        # Note: MountainCar is tricky as per its reward function: Hence, we need a quite large episode
        # cutoff to solve it with ease.
        # With a large enough n-step, the algo should be able to learn the env very quickly after having solved
        # it once via randomness.
        env = OpenAIGymEnv("MountainCar-v0", actors=4, max_episode_steps=5000)

        # Create a DQN2015Config.
        dqn_config = DDDQNConfig.make(
            "{}/../configs/dddqn_mountain_car_learning_n_actors.json".format(
                os.path.dirname(
                    __file__)),  # TODO: filename wrong (num actors)
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space)

        # Create an Algo object.
        algo = DDDQN(config=dqn_config, name="my-dqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(ticks=7000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        last_n = 10
        mean_last_episodes = np.mean(env.historic_episodes_returns[-last_n:])
        print("Avg return over last {} episodes: {}".format(
            last_n, mean_last_episodes))
        self.assertTrue(mean_last_episodes > -200.0)

        env.terminate()

Пример #4

Показать файл

    def test_dddqn_learning_on_car_racing(self):
        # Action-map: Discrete to Continuous, 9 actions.
        # 0=noop
        # 1=left
        # 2=right
        # 3=break only
        # 4=break and left
        # 5=break and right
        # 6=gas only
        # 7=gas and left
        # 8=gas and right
        def action_map(a):
            b = np.reshape(a, (-1, 1))
            return np.where(
                #b == 0, [0.0, 0.0, 0.0], np.where(
                #    b == 1, [-1.0, 0.0, 0.0], np.where(
                #        b == 2, [1.0, 0.0, 0.0], np.where(
                            b == 0, [0.0, 0.0, 1.0], np.where(
                                b == 1, [-1.0, 0.0, 1.0], np.where(
                                    b == 2, [1.0, 0.0, 1.0], np.where(
                                        b == 3, [0.0, 1.0, 0.0], np.where(
                                            b == 4, [-1.0, 1.0, 0.0], [1.0, 1.0, 0.0]
            )))))

        # Create an Env object.
        env = OpenAIGymEnv("CarRacing-v0", actors=1, action_map=action_map)

        # Create a DQN2015Config.
        config = DDDQNConfig.make(
            "{}/../configs/dddqn_car_racing_learning.json".format(os.path.dirname(__file__)),
            preprocessor=Preprocessor(
                #ImageCrop(x=0, y=0, width=150, height=167),
                GrayScale(keepdims=True),
                ImageResize(width=84, height=84, interpolation="bilinear"),
                lambda inputs_: ((inputs_ / 128) - 1.0).astype(np.float32),
                # simple preprocessor: [0,255] to [-1.0,1.0]
                Sequence(sequence_length=4, adddim=False)
            ),
            state_space=env.actors[0].state_space,
            action_space=Int(6)
        )
        # Create an Algo object.
        algo = DDDQN(config=config, name="my-dddqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(ticks=20000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        n = 10
        mean_last_n = np.mean(env.historic_episodes_returns[-n:])
        print("Avg return over last {} episodes: {}".format(n, mean_last_n))
        self.assertTrue(mean_last_n > 150.0)

        env.terminate()

Пример #5

Показать файл

    def test_dddqn_learning_on_grid_world_2x2(self):
        # Create an Env object.
        env = GridWorld("2x2", actors=1)

        # Add the preprocessor.
        preprocessor = Preprocessor(
            lambda inputs_: tf.one_hot(inputs_, depth=env.actors[0].state_space.num_categories)
        )
        # Create a Config.
        dqn_config = DDDQNConfig.make(
            "{}/../configs/dddqn_grid_world_2x2_learning.json".format(os.path.dirname(__file__)),
            preprocessor=preprocessor,
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space
        )

        # Create an Algo object.
        algo = DDDQN(config=dqn_config, name="my-dddqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(ticks=3000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        n = 10
        mean_last_n = np.mean(env.historic_episodes_returns[-n:])
        print("Avg return over last {} episodes: {}".format(n, mean_last_n))
        self.assertTrue(mean_last_n >= 0.6)

        # Check learnt Q-function (using our dueling layer).
        a_and_v = algo.Q(one_hot(np.array([0, 0, 0, 0, 1, 1, 1, 1]), depth=4))
        q = dueling(a_and_v, np.array([0, 1, 2, 3, 0, 1, 2, 3]))
        print(q)
        self.assertTrue(q[1] < min(q[2:]) and q[1] < q[0])  # q(s=0,a=right) is the worst
        check(q[5], 1.0, atol=0.4)  # Q(1,->) is close to 1.0.
        #self.assertTrue(q[5] > max(q[:4]) and q[5] > max(q[6:]))  # q(s=1,a=right) is the best
        #check(q, [0.8, -5.0, 0.9, 0.8, 0.8, 1.0, 0.9, 0.9], decimals=1)  # a=up,down,left,right

        env.terminate()

Пример #6

Показать файл

Файл: test_dddqn_functionality.py Проект: rosea-tf/surreal

    def test_dddqn_compilation(self):
        """
        Tests the c'tor of DDDQN.
        """
        env = OpenAIGymEnv("MsPacman-v0", actors=4)
        # Create a Config (for any Atari game).
        config = DDDQNConfig.make(
            # Breakout should be the same as MsPacman.
            "{}/../configs/dddqn_breakout_learning.json".format(
                os.path.dirname(__file__)),
            memory_capacity=1000,
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space)
        dddqn = DDDQN(config)
        print("DDDQN built ({}).".format(dddqn))

        env.terminate()

Пример #7

Показать файл

    def test_dddqn_learning_on_breakout(self):
        # Create an Env object.
        env = OpenAIGymEnv(
            "Breakout-v4", actors=16, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8,
            frame_skip=(2, 5)
        )

        preprocessor = Preprocessor(
            ImageCrop(x=5, y=29, width=150, height=167),
            GrayScale(keepdims=True),
            ImageResize(width=84, height=84, interpolation="bilinear"),
            lambda inputs_: ((inputs_ / 128) - 1.0).astype(np.float32),  # simple preprocessor: [0,255] to [-1.0,1.0]
            Sequence(sequence_length=4, adddim=False)
        )
        # Create a DQN2015Config.
        config = DDDQNConfig.make(
            "{}/../configs/dddqn_breakout_learning.json".format(os.path.dirname(__file__)),
            preprocessor=preprocessor,
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space
        )
        # Create an Algo object.
        algo = DDDQN(config=config, name="my-dddqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(actor_time_steps=10000000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        n = 10
        mean_last_n = np.mean(env.historic_episodes_returns[-n:])
        print("Avg return over last {} episodes: {}".format(n, mean_last_n))
        self.assertTrue(mean_last_n > 150.0)

        env.terminate()