示例#1
0
    def test_single_impala_agent_functionality(self):
        """
        Creates a single IMPALAAgent and runs it for a few steps in a DeepMindLab Env to test
        all steps of the actor and learning process.
        """
        try:
            from rlgraph.environments.deepmind_lab import DeepmindLabEnv
        except ImportError:
            print("Deepmind Lab not installed: Will skip this test.")
            return

        agent_config = config_from_path(
            "configs/impala_agent_for_deepmind_lab_env.json")
        env_spec = dict(level_id="lt_hallway_slope",
                        observations=["RGB_INTERLEAVED", "INSTR"],
                        frameskip=4)
        dummy_env = DeepmindLabEnv.from_spec(env_spec)

        agent = SingleIMPALAAgent.from_spec(
            default_dict(dict(type="single-impala-agent"), agent_config),
            architecture="large",
            environment_spec=default_dict(dict(type="deepmind-lab"), env_spec),
            state_space=dummy_env.state_space,
            action_space=dummy_env.action_space,
            # TODO: automate this (by lookup from NN).
            internal_states_space=IMPALAAgent.default_internal_states_space,
            # Summarize time-steps to have an overview of the env-stepping speed.
            summary_spec=dict(summary_regexp="time-step",
                              directory="/home/rlgraph/"),
            dynamic_batching=False,
            num_workers=4)
        # Count items in the queue.
        print("Items in queue: {}".format(
            agent.call_api_method("get_queue_size")))

        updates = 5
        update_times = list()
        print("Updating from queue ...")
        for _ in range(updates):
            start_time = time.monotonic()
            agent.update()
            update_times.append(time.monotonic() - start_time)

        print(
            "Updates per second (including waiting for enqueued items): {}/s".
            format(updates / np.sum(update_times)))

        time.sleep(5)

        agent.terminate()
示例#2
0
    def test_isolated_impala_actor_agent_functionality(self):
        """
        Creates a non-distributed IMPALAAgent (actor) and runs it for a few steps in a DeepMindLab Env to test
        all steps of the learning process.
        """
        try:
            from rlgraph.environments.deepmind_lab import DeepmindLabEnv
        except ImportError:
            print("Deepmind Lab not installed: Will skip this test.")
            return

        agent_config = config_from_path(
            "configs/impala_agent_for_deepmind_lab_env.json")
        env_spec = dict(level_id="seekavoid_arena_01",
                        observations=["RGB_INTERLEAVED", "INSTR"],
                        frameskip=4)
        dummy_env = DeepmindLabEnv.from_spec(env_spec)

        agent = IMPALAAgent.from_spec(
            agent_config,
            type="actor",
            architecture="large",
            environment_spec=default_dict(dict(type="deepmind-lab"), env_spec),
            state_space=dummy_env.state_space,
            action_space=dummy_env.action_space,
            # TODO: automate this (by lookup from NN).
            internal_states_space=IMPALAAgent.default_internal_states_space,
            execution_spec=dict(
                #mode="distributed",
                #distributed_spec=dict(job="actor", task_index=0, cluster_spec=self.cluster_spec_single_actor),
                disable_monitoring=True),
            # Need large queue to be able to fill it up (don't have a learner).
            fifo_queue_spec=dict(capacity=10000))
        # Start Specifiable Server with Env manually (monitoring is disabled).
        agent.environment_stepper.environment_server.start_server()
        time_start = time.perf_counter()
        steps = 5
        for _ in range(steps):
            agent.call_api_method("perform_n_steps_and_insert_into_fifo")
        time_total = time.perf_counter() - time_start
        print(
            "Done running {}x{} steps in Deepmind Lab env using IMPALA network in {}sec ({} actions/sec)."
            .format(steps, agent.worker_sample_size, time_total,
                    agent.worker_sample_size * steps / time_total))
        agent.environment_stepper.environment_server.stop_server()
        agent.terminate()