def test_single_impala_agent_functionality(self): """ Creates a single IMPALAAgent and runs it for a few steps in a DeepMindLab Env to test all steps of the actor and learning process. """ try: from rlgraph.environments.deepmind_lab import DeepmindLabEnv except ImportError: print("Deepmind Lab not installed: Will skip this test.") return agent_config = config_from_path( "configs/impala_agent_for_deepmind_lab_env.json") env_spec = dict(level_id="lt_hallway_slope", observations=["RGB_INTERLEAVED", "INSTR"], frameskip=4) dummy_env = DeepmindLabEnv.from_spec(env_spec) agent = SingleIMPALAAgent.from_spec( default_dict(dict(type="single-impala-agent"), agent_config), architecture="large", environment_spec=default_dict(dict(type="deepmind-lab"), env_spec), state_space=dummy_env.state_space, action_space=dummy_env.action_space, # TODO: automate this (by lookup from NN). internal_states_space=IMPALAAgent.default_internal_states_space, # Summarize time-steps to have an overview of the env-stepping speed. summary_spec=dict(summary_regexp="time-step", directory="/home/rlgraph/"), dynamic_batching=False, num_workers=4) # Count items in the queue. print("Items in queue: {}".format( agent.call_api_method("get_queue_size"))) updates = 5 update_times = list() print("Updating from queue ...") for _ in range(updates): start_time = time.monotonic() agent.update() update_times.append(time.monotonic() - start_time) print( "Updates per second (including waiting for enqueued items): {}/s". format(updates / np.sum(update_times))) time.sleep(5) agent.terminate()
def test_isolated_impala_actor_agent_functionality(self): """ Creates a non-distributed IMPALAAgent (actor) and runs it for a few steps in a DeepMindLab Env to test all steps of the learning process. """ try: from rlgraph.environments.deepmind_lab import DeepmindLabEnv except ImportError: print("Deepmind Lab not installed: Will skip this test.") return agent_config = config_from_path( "configs/impala_agent_for_deepmind_lab_env.json") env_spec = dict(level_id="seekavoid_arena_01", observations=["RGB_INTERLEAVED", "INSTR"], frameskip=4) dummy_env = DeepmindLabEnv.from_spec(env_spec) agent = IMPALAAgent.from_spec( agent_config, type="actor", architecture="large", environment_spec=default_dict(dict(type="deepmind-lab"), env_spec), state_space=dummy_env.state_space, action_space=dummy_env.action_space, # TODO: automate this (by lookup from NN). internal_states_space=IMPALAAgent.default_internal_states_space, execution_spec=dict( #mode="distributed", #distributed_spec=dict(job="actor", task_index=0, cluster_spec=self.cluster_spec_single_actor), disable_monitoring=True), # Need large queue to be able to fill it up (don't have a learner). fifo_queue_spec=dict(capacity=10000)) # Start Specifiable Server with Env manually (monitoring is disabled). agent.environment_stepper.environment_server.start_server() time_start = time.perf_counter() steps = 5 for _ in range(steps): agent.call_api_method("perform_n_steps_and_insert_into_fifo") time_total = time.perf_counter() - time_start print( "Done running {}x{} steps in Deepmind Lab env using IMPALA network in {}sec ({} actions/sec)." .format(steps, agent.worker_sample_size, time_total, agent.worker_sample_size * steps / time_total)) agent.environment_stepper.environment_server.stop_server() agent.terminate()