示例#1
0
    def test_learning_cartpole(self):
        """
        Tests if apex can learn a simple environment using a single worker, thus replicating
        dqn.
        """
        env_spec = dict(type="openai", gym_env="CartPole-v0")
        agent_config = config_from_path("configs/apex_agent_cartpole.json")
        # TODO remove after unified backends
        if get_backend() == "pytorch":
            agent_config["memory_spec"]["type"] = "mem_prioritized_replay"

        executor = ApexExecutor(
            environment_spec=env_spec,
            agent_config=agent_config,
        )
        # Define executor, test assembly.
        print("Successfully created executor.")

        # Executes actual workload.
        result = executor.execute_workload(
            workload=dict(num_timesteps=20000,
                          report_interval=1000,
                          report_interval_min_seconds=1))
        print("Finished executing workload:")
        print(result)
示例#2
0
    def test_learning_cartpole_n_step(self):
        """
        Tests if apex can learn a simple environment using a single worker, thus replicating
        DQN.
        """
        env_spec = dict(type="openai", gym_env="CartPole-v0")
        agent_config = config_from_path("configs/apex_agent_cartpole.json")

        # Use n-step adjustments.
        agent_config["execution_spec"]["ray_spec"]["worker_spec"][
            "n_step_adjustment"] = 3
        agent_config["execution_spec"]["ray_spec"]["apex_replay_spec"][
            "n_step_adjustment"] = 3
        agent_config["n_step"] = 3

        executor = ApexExecutor(
            environment_spec=env_spec,
            agent_config=agent_config,
        )
        # Define executor, test assembly.
        print("Successfully created executor.")

        # Executes actual workload.
        result = executor.execute_workload(
            workload=dict(num_timesteps=20000,
                          report_interval=1000,
                          report_interval_min_seconds=1))
        print("Finished executing workload:")
        print(result)
示例#3
0
    def test_worker_init(self):
        """
        Tests if workers initialize without problems for the pong config.
        """
        agent_config = config_from_path("configs/ray_apex_for_pong.json")

        # Long initialization times can lead to Ray crashes.
        start = time.monotonic()
        executor = ApexExecutor(
            environment_spec=self.env_spec,
            agent_config=agent_config,
        )
        end = time.monotonic() - start
        print("Initialized {} workers in {} s.".format(
            executor.num_sample_workers, end))
        executor.test_worker_init()
示例#4
0
    def test_initial_training_pong(self):
        """
        Tests if Apex can start learning pong effectively on ray.
        """
        agent_config = config_from_path("configs/ray_apex_for_pong.json")
        executor = ApexExecutor(
            environment_spec=self.env_spec,
            agent_config=agent_config,
        )

        # Executes actual workload.
        result = executor.execute_workload(workload=dict(
            num_timesteps=10000000, report_interval=10000, report_interval_min_seconds=10)
        )
        print("Finished executing workload:")
        print(result)
示例#5
0
    def test_learning_2x2_grid_world(self):
        """
        Tests if apex can learn a simple environment using a single worker, thus replicating
        dqn.
        """
        env_spec = dict(type="grid-world", world="2x2", save_mode=False)
        agent_config = config_from_path(
            "configs/apex_agent_for_2x2_gridworld.json")
        # TODO remove after unified backends
        if get_backend() == "pytorch":
            agent_config["memory_spec"]["type"] = "mem_prioritized_replay"
        executor = ApexExecutor(
            environment_spec=env_spec,
            agent_config=agent_config,
        )
        # Define executor, test assembly.
        print("Successfully created executor.")

        # Executes actual workload.
        result = executor.execute_workload(
            workload=dict(num_timesteps=5000,
                          report_interval=100,
                          report_interval_min_seconds=1))
        full_worker_stats = executor.result_by_worker()
        print("All finished episode rewards")
        print(full_worker_stats["episode_rewards"])

        print("STATES:\n{}".format(
            executor.local_agent.last_q_table["states"]))
        print("\n\nQ(s,a)-VALUES:\n{}".format(
            np.round_(executor.local_agent.last_q_table["q_values"],
                      decimals=2)))

        # Check q-table for correct values.
        expected_q_values_per_state = {
            (1.0, 0, 0, 0): (-1, -5, 0, -1),
            (0, 1.0, 0, 0): (-1, 1, 0, 0)
        }
        for state, q_values in zip(
                executor.local_agent.last_q_table["states"],
                executor.local_agent.last_q_table["q_values"]):
            state, q_values = tuple(state), tuple(q_values)
            assert state in expected_q_values_per_state, \
                "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state)
            recursive_assert_almost_equal(q_values,
                                          expected_q_values_per_state[state],
                                          decimals=0)
示例#6
0
    def test_from_callable_env_spec(self):
        env_spec = dict(type="openai", gym_env="CartPole-v0")
        agent_config = config_from_path("configs/apex_agent_cartpole.json")

        def create_env():
            return Environment.from_spec(env_spec)

        # Pass lambda, not spec.
        executor = ApexExecutor(
            environment_spec=create_env,
            agent_config=agent_config,
        )

        # Define executor, test assembly.
        print("Successfully created executor.")

        # Executes actual workload.
        result = executor.execute_workload(
            workload=dict(num_timesteps=20000,
                          report_interval=1000,
                          report_interval_min_seconds=1))
        print("Finished executing workload:")
        print(result)
示例#7
0
    def test_learning_2x2_grid_world_container_actions(self):
        """
        Tests Apex container action functionality.
        """
        env_spec = dict(type="grid-world",
                        world="2x2",
                        save_mode=False,
                        action_type="ftj",
                        state_representation="xy+orientation")
        agent_config = config_from_path(
            "configs/apex_agent_for_2x2_gridworld_with_container_actions.json")
        executor = ApexExecutor(
            environment_spec=env_spec,
            agent_config=agent_config,
        )
        # Define executor, test assembly.
        print("Successfully created executor.")

        # Executes actual workload.
        result = executor.execute_workload(
            workload=dict(num_timesteps=10000,
                          report_interval=100,
                          report_interval_min_seconds=1))
        print(result)
示例#8
0
    def test_with_final_eval(self):
        """
        Tests if apex can learn a simple environment using a single worker, thus replicating
        DQN.
        """
        env_spec = dict(type="openai", gym_env="CartPole-v0")
        agent_config = config_from_path("configs/apex_agent_cartpole.json")

        # Use n-step adjustments.
        agent_config["execution_spec"]["ray_spec"]["worker_spec"][
            "n_step_adjustment"] = 3
        agent_config["execution_spec"]["ray_spec"]["apex_replay_spec"][
            "n_step_adjustment"] = 3
        agent_config["n_step"] = 3

        executor = ApexExecutor(
            environment_spec=env_spec,
            agent_config=agent_config,
        )
        # Define executor, test assembly.
        print("Successfully created executor.")

        # Executes actual workload.
        result = executor.execute_workload(
            workload=dict(num_timesteps=20000,
                          report_interval=1000,
                          report_interval_min_seconds=1))
        print("Finished executing workload:")
        print(result)

        # Get agent.
        agent = executor.local_agent
        preprocessing_spec = agent_config["preprocessing_spec"]

        # Create env.
        env = OpenAIGymEnv.from_spec(env_spec)

        if preprocessing_spec is not None:
            preprocessing_spec = deepcopy(preprocessing_spec)
            in_space = env.state_space.with_batch_rank()
            in_space = deepcopy(in_space)
            # Set scopes.
            scopes = [
                preprocessor["scope"] for preprocessor in preprocessing_spec
            ]
            # Set backend to python.
            for spec in preprocessing_spec:
                spec["backend"] = "python"
            processor_stack = PreprocessorStack(*preprocessing_spec,
                                                backend="python")
            build_space = in_space
            for sub_comp_scope in scopes:
                processor_stack.sub_components[
                    sub_comp_scope].create_variables(
                        input_spaces=dict(preprocessing_inputs=build_space),
                        action_space=None)
                build_space = processor_stack.sub_components[
                    sub_comp_scope].get_preprocessed_space(build_space)
            processor_stack.reset()
        else:
            processor_stack = None

        ep_rewards = []
        print("finished learning, starting eval")
        for _ in range(10):
            state = env.reset()
            terminal = False
            ep_reward = 0
            while not terminal:
                state = agent.state_space.force_batch(state)
                if processor_stack is not None:
                    state = processor_stack.preprocess(state)

                actions = agent.get_action(states=state,
                                           use_exploration=False,
                                           apply_preprocessing=False)
                next_state, step_reward, terminal, info = env.step(
                    actions=actions[0])
                ep_reward += step_reward

                state = next_state
                if terminal:
                    ep_rewards.append(ep_reward)
                    break

        print("Eval episode rewards:")
        print(ep_rewards)