def test_act(self): env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True) agent_config = config_from_path("configs/ray_apex_for_pong.json") if get_backend() == "pytorch": agent_config["memory_spec"]["type"] = "mem_prioritized_replay" agent = DQNAgent.from_spec( # Uses 2015 DQN parameters as closely as possible. agent_config, state_space=env.state_space, # Try with "reduced" action space (actually only 3 actions, up, down, no-op) action_space=env.action_space ) state = env.reset() action = agent.get_action(state) print("Component call count = {}".format(Component.call_count)) state_space = env.state_space count = 200 samples = state_space.sample(count) start = time.perf_counter() for s in samples: action = agent.get_action(s) end = time.perf_counter() - start print("Took {} s for {} separate actions, mean = {}".format(end, count, end / count)) # Now instead test 100 batch actions samples = state_space.sample(count) start = time.perf_counter() action = agent.get_action(samples) end = time.perf_counter() - start print("Took {} s for {} batched actions.".format(end, count)) profile = Component.call_times print_call_chain(profile, False, 0.03)
def test_readme_example(self): """ Tests deterministic functionality of RandomEnv. """ from rlgraph.agents import DQNAgent from rlgraph.environments import OpenAIGymEnv environment = OpenAIGymEnv('CartPole-v0') config = config_from_path("../../examples/configs/dqn_cartpole.json") # Create from .json file or dict, see agent API for all # possible configuration parameters. agent = DQNAgent.from_spec(config, state_space=environment.state_space, action_space=environment.action_space) # Get an action, take a step, observe reward. state = environment.reset() preprocessed_state, action = agent.get_action( states=state, extra_returns="preprocessed_states") # Execute step in environment. next_state, reward, terminal, info = environment.step(action) # Observe result. agent.observe(preprocessed_states=preprocessed_state, actions=action, internals=[], next_states=next_state, rewards=reward, terminals=terminal) # Call update when desired: loss = agent.update()
def test_openai_atari_env(self): env = OpenAIGymEnv("Pong-v0") # Simple test runs with fixed actions. s = env.reset() # Assert we have pixels. self.assertGreaterEqual(np.mean(s), 0) self.assertLessEqual(np.mean(s), 255) accum_reward = 0.0 for _ in range(100): s, r, t, _ = env.step(env.action_space.sample()) assert isinstance(r, np.ndarray) assert r.dtype == np.float32 assert isinstance(t, bool) self.assertGreaterEqual(np.mean(s), 0) self.assertLessEqual(np.mean(s), 255) accum_reward += r print("Accumulated Reward: ".format(accum_reward))