Exemplo n.º 1
0
    def test_act(self):
        env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True)
        agent_config = config_from_path("configs/ray_apex_for_pong.json")
        if get_backend() == "pytorch":
            agent_config["memory_spec"]["type"] = "mem_prioritized_replay"
        agent = DQNAgent.from_spec(
            # Uses 2015 DQN parameters as closely as possible.
            agent_config,
            state_space=env.state_space,
            # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
            action_space=env.action_space
        )
        state = env.reset()
        action = agent.get_action(state)
        print("Component call count = {}".format(Component.call_count))

        state_space = env.state_space
        count = 200

        samples = state_space.sample(count)
        start = time.perf_counter()
        for s in samples:
            action = agent.get_action(s)
        end = time.perf_counter() - start

        print("Took {} s for {} separate actions, mean = {}".format(end, count, end / count))

        # Now instead test 100 batch actions
        samples = state_space.sample(count)
        start = time.perf_counter()
        action = agent.get_action(samples)
        end = time.perf_counter() - start
        print("Took {} s for {} batched actions.".format(end, count))
        profile = Component.call_times
        print_call_chain(profile, False, 0.03)
Exemplo n.º 2
0
    def test_post_processing(self):
        env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True)
        agent_config = config_from_path("configs/ray_apex_for_pong.json")

        # Test cpu settings for batching here.
        agent_config["memory_spec"]["type"] = "mem_prioritized_replay"
        agent_config["execution_spec"]["torch_num_threads"] = 1
        agent_config["execution_spec"]["OMP_NUM_THREADS"] = 1

        agent = ApexAgent.from_spec(
            # Uses 2015 DQN parameters as closely as possible.
            agent_config,
            state_space=env.state_space,
            # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
            action_space=env.action_space
        )
        samples = 200
        rewards = np.random.random(size=samples)
        states = list(agent.preprocessed_state_space.sample(samples))
        actions = agent.action_space.sample(samples)
        terminals = np.zeros(samples, dtype=np.uint8)
        next_states = states[1:]
        next_states.extend([agent.preprocessed_state_space.sample(1)])
        next_states = np.asarray(next_states)
        states = np.asarray(states)
        weights = np.ones_like(rewards)

        for _ in range(1):
            start = time.perf_counter()
            _, loss_per_item = agent.post_process(
                dict(
                    states=states,
                    actions=actions,
                    rewards=rewards,
                    terminals=terminals,
                    next_states=next_states,
                    importance_weights=weights
                )
            )
            print("post process time = {}".format(time.perf_counter() - start))
        profile = Component.call_times
        print_call_chain(profile, False, 0.003)