Exemplo n.º 1
0
    def test_sequential_vector_env(self):
        num_envs = 4
        env = SequentialVectorEnv(num_environments=num_envs,
                                  env_spec={
                                      "type": "gridworld",
                                      "world": "2x2"
                                  })

        # Simple test runs with fixed actions.
        # X=player's position
        s = env.reset(index=0)  # ["XH", " G"]  X=player's position
        self.assertTrue(s == 0)

        s = env.reset_all()
        all(self.assertTrue(s_ == 0) for s_ in s)

        s, r, t, _ = env.step([2
                               for _ in range(num_envs)])  # down: [" H", "XG"]
        all(self.assertTrue(s_ == 1) for s_ in s)
        all(self.assertTrue(r_ == -1.0) for r_ in r)
        all(self.assertTrue(not t_) for t_ in t)

        s, r, t, _ = env.step([1 for _ in range(num_envs)
                               ])  # right: [" H", " X"]
        all(self.assertTrue(s_ == 3) for s_ in s)
        all(self.assertTrue(r_ == 1.0) for r_ in r)
        all(self.assertTrue(t_) for t_ in t)

        [env.reset(index=i)
         for i in range(num_envs)]  # ["XH", " G"]  X=player's position
        s, r, t, _ = env.step([1 for _ in range(num_envs)
                               ])  # right: [" X", " G"] -> in the hole
        all(self.assertTrue(s_ == 2) for s_ in s)
        all(self.assertTrue(r_ == -5.0) for r_ in r)
        all(self.assertTrue(t_) for t_ in t)

        # Run against a wall.
        env.reset_all()  # ["XH", " G"]  X=player's position
        s, r, t, _ = env.step([3
                               for _ in range(num_envs)])  # left: ["XH", " G"]
        all(self.assertTrue(s_ == 0) for s_ in s)
        all(self.assertTrue(r_ == -1.0) for r_ in r)
        all(self.assertTrue(not t_) for t_ in t)
        s, r, t, _ = env.step([2
                               for _ in range(num_envs)])  # down: [" H", "XG"]
        all(self.assertTrue(s_ == 1) for s_ in s)
        all(self.assertTrue(r_ == -1.0) for r_ in r)
        all(self.assertTrue(not t_) for t_ in t)
        s, r, t, _ = env.step([0 for _ in range(num_envs)])  # up: ["XH", " G"]
        all(self.assertTrue(s_ == 0) for s_ in s)
        all(self.assertTrue(r_ == -1.0) for r_ in r)
        all(self.assertTrue(not t_) for t_ in t)
Exemplo n.º 2
0
    def test_sequential_vector_env(self):
        vector_env = SequentialVectorEnv(num_environments=self.num_vector_envs,
                                         env_spec=self.env_spec,
                                         num_background_envs=2)
        agent = Agent.from_spec(
            # Uses 2015 DQN parameters as closely as possible.
            config_from_path("configs/dqn_vector_env.json"),
            state_space=vector_env.state_space,
            # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
            action_space=vector_env.action_space)

        states = vector_env.reset_all()
        start = time.monotonic()
        ep_lengths = [0 for _ in range_(self.num_vector_envs)]

        for _ in range_(int(self.samples / self.num_vector_envs)):
            # Sample all envs at once.
            actions, preprocessed_states = agent.get_action(
                states, extra_returns="preprocessed_states")
            states, rewards, terminals, infos = vector_env.step(actions)
            ep_lengths = [ep_length + 1 for ep_length in ep_lengths]

            for i, terminal in enumerate(terminals):
                if terminal:
                    print("reset env {} after {} states".format(
                        i, ep_lengths[i]))
                    vector_env.reset(i)
                    ep_lengths[i] = 0

        runtime = time.monotonic() - start
        tp = self.samples / runtime

        print('Testing vector env {} performance:'.format(
            self.env_spec["gym_env"]))
        print('Ran {} steps, throughput: {} states/s, total time: {} s'.format(
            self.samples, tp, runtime))