def test_dddqn_n_step_memory_insertion_n_step_samples_only(self):
        """
        Tests the n-step post-processing and memory-insertions of DDDQN (with the n_step_only option set to True).
        """
        # Create an Env object.
        env = GridWorld("2x2", actors=1)
        # Create a very standard DDQN.
        dqn_config = DDDQNConfig.make(
            "{}/../configs/dddqn_grid_world_2x2_learning.json".format(
                os.path.dirname(__file__)),
            n_step=2,  # fix n-step to 2, just in case.
            gamma=0.5,  # fix gamma for unique-memory-checks purposes
            epsilon=[1.0, 0.5],  # fix epsilon to get lots of random actions.
            preprocessor=Preprocessor(lambda inputs_: tf.one_hot(
                inputs_, depth=env.actors[0].state_space.num_categories)),
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space)
        algo = DDDQN(config=dqn_config, name="my-dddqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run for n ticks, then check memory contents for correct n-step tuples.
        for _ in range(5):
            env.run(ticks=100, sync=True, render=False)
            self._check_2x2_grid_world_mem(algo.memory, n_step_only=True)

        env.terminate()
示例#2
0
    def test_dddqn_learning_on_cart_pole_with_4_actors(self):
        # Create an Env object.
        env = OpenAIGymEnv("CartPole-v0", actors=4)

        # Create a Config.
        dqn_config = DDDQNConfig.make(
            "{}/../configs/dddqn_cart_pole_learning_n_actors.json".format(os.path.dirname(__file__)),
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space
        )

        # Create an Algo object.
        algo = DDDQN(config=dqn_config, name="my-dqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(ticks=2000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        last_n = 10
        mean_last_episodes = np.mean(env.historic_episodes_returns[-last_n:])
        print("Avg return over last {} episodes: {}".format(last_n, mean_last_episodes))
        self.assertTrue(mean_last_episodes > 160.0)

        env.terminate()
    def test_dddqn_learning_on_mountain_car_4_actors(self):
        # Note: MountainCar is tricky as per its reward function: Hence, we need a quite large episode
        # cutoff to solve it with ease.
        # With a large enough n-step, the algo should be able to learn the env very quickly after having solved
        # it once via randomness.
        env = OpenAIGymEnv("MountainCar-v0", actors=4, max_episode_steps=5000)

        # Create a DQN2015Config.
        dqn_config = DDDQNConfig.make(
            "{}/../configs/dddqn_mountain_car_learning_n_actors.json".format(
                os.path.dirname(
                    __file__)),  # TODO: filename wrong (num actors)
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space)

        # Create an Algo object.
        algo = DDDQN(config=dqn_config, name="my-dqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(ticks=7000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        last_n = 10
        mean_last_episodes = np.mean(env.historic_episodes_returns[-last_n:])
        print("Avg return over last {} episodes: {}".format(
            last_n, mean_last_episodes))
        self.assertTrue(mean_last_episodes > -200.0)

        env.terminate()
示例#4
0
    def test_dddqn_learning_on_car_racing(self):
        # Action-map: Discrete to Continuous, 9 actions.
        # 0=noop
        # 1=left
        # 2=right
        # 3=break only
        # 4=break and left
        # 5=break and right
        # 6=gas only
        # 7=gas and left
        # 8=gas and right
        def action_map(a):
            b = np.reshape(a, (-1, 1))
            return np.where(
                #b == 0, [0.0, 0.0, 0.0], np.where(
                #    b == 1, [-1.0, 0.0, 0.0], np.where(
                #        b == 2, [1.0, 0.0, 0.0], np.where(
                            b == 0, [0.0, 0.0, 1.0], np.where(
                                b == 1, [-1.0, 0.0, 1.0], np.where(
                                    b == 2, [1.0, 0.0, 1.0], np.where(
                                        b == 3, [0.0, 1.0, 0.0], np.where(
                                            b == 4, [-1.0, 1.0, 0.0], [1.0, 1.0, 0.0]
            )))))

        # Create an Env object.
        env = OpenAIGymEnv("CarRacing-v0", actors=1, action_map=action_map)

        # Create a DQN2015Config.
        config = DDDQNConfig.make(
            "{}/../configs/dddqn_car_racing_learning.json".format(os.path.dirname(__file__)),
            preprocessor=Preprocessor(
                #ImageCrop(x=0, y=0, width=150, height=167),
                GrayScale(keepdims=True),
                ImageResize(width=84, height=84, interpolation="bilinear"),
                lambda inputs_: ((inputs_ / 128) - 1.0).astype(np.float32),
                # simple preprocessor: [0,255] to [-1.0,1.0]
                Sequence(sequence_length=4, adddim=False)
            ),
            state_space=env.actors[0].state_space,
            action_space=Int(6)
        )
        # Create an Algo object.
        algo = DDDQN(config=config, name="my-dddqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(ticks=20000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        n = 10
        mean_last_n = np.mean(env.historic_episodes_returns[-n:])
        print("Avg return over last {} episodes: {}".format(n, mean_last_n))
        self.assertTrue(mean_last_n > 150.0)

        env.terminate()
    def test_dddqn_compilation(self):
        """
        Tests the c'tor of DDDQN.
        """
        env = OpenAIGymEnv("MsPacman-v0", actors=4)
        # Create a Config (for any Atari game).
        config = DDDQNConfig.make(
            # Breakout should be the same as MsPacman.
            "{}/../configs/dddqn_breakout_learning.json".format(
                os.path.dirname(__file__)),
            memory_capacity=1000,
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space)
        dddqn = DDDQN(config)
        print("DDDQN built ({}).".format(dddqn))

        env.terminate()
示例#6
0
    def test_dddqn_learning_on_grid_world_2x2(self):
        # Create an Env object.
        env = GridWorld("2x2", actors=1)

        # Add the preprocessor.
        preprocessor = Preprocessor(
            lambda inputs_: tf.one_hot(inputs_, depth=env.actors[0].state_space.num_categories)
        )
        # Create a Config.
        dqn_config = DDDQNConfig.make(
            "{}/../configs/dddqn_grid_world_2x2_learning.json".format(os.path.dirname(__file__)),
            preprocessor=preprocessor,
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space
        )

        # Create an Algo object.
        algo = DDDQN(config=dqn_config, name="my-dddqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(ticks=3000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        n = 10
        mean_last_n = np.mean(env.historic_episodes_returns[-n:])
        print("Avg return over last {} episodes: {}".format(n, mean_last_n))
        self.assertTrue(mean_last_n >= 0.6)

        # Check learnt Q-function (using our dueling layer).
        a_and_v = algo.Q(one_hot(np.array([0, 0, 0, 0, 1, 1, 1, 1]), depth=4))
        q = dueling(a_and_v, np.array([0, 1, 2, 3, 0, 1, 2, 3]))
        print(q)
        self.assertTrue(q[1] < min(q[2:]) and q[1] < q[0])  # q(s=0,a=right) is the worst
        check(q[5], 1.0, atol=0.4)  # Q(1,->) is close to 1.0.
        #self.assertTrue(q[5] > max(q[:4]) and q[5] > max(q[6:]))  # q(s=1,a=right) is the best
        #check(q, [0.8, -5.0, 0.9, 0.8, 0.8, 1.0, 0.9, 0.9], decimals=1)  # a=up,down,left,right

        env.terminate()
示例#7
0
    def test_dddqn_learning_on_breakout(self):
        # Create an Env object.
        env = OpenAIGymEnv(
            "Breakout-v4", actors=16, fire_after_reset=True, episodic_life=True, max_num_noops_after_reset=8,
            frame_skip=(2, 5)
        )

        preprocessor = Preprocessor(
            ImageCrop(x=5, y=29, width=150, height=167),
            GrayScale(keepdims=True),
            ImageResize(width=84, height=84, interpolation="bilinear"),
            lambda inputs_: ((inputs_ / 128) - 1.0).astype(np.float32),  # simple preprocessor: [0,255] to [-1.0,1.0]
            Sequence(sequence_length=4, adddim=False)
        )
        # Create a DQN2015Config.
        config = DDDQNConfig.make(
            "{}/../configs/dddqn_breakout_learning.json".format(os.path.dirname(__file__)),
            preprocessor=preprocessor,
            state_space=env.actors[0].state_space,
            action_space=env.actors[0].action_space
        )
        # Create an Algo object.
        algo = DDDQN(config=config, name="my-dddqn")

        # Point actor(s) to the algo.
        env.point_all_actors_to_algo(algo)

        # Run and wait for env to complete.
        env.run(actor_time_steps=10000000, sync=True, render=debug.RenderEnvInLearningTests)

        # Check last n episode returns.
        n = 10
        mean_last_n = np.mean(env.historic_episodes_returns[-n:])
        print("Avg return over last {} episodes: {}".format(n, mean_last_n))
        self.assertTrue(mean_last_n > 150.0)

        env.terminate()