Exemplo n.º 1
0
    def test_cartpole_with_worker(self):
        env = OpenAIGymEnv("CartPole-v0")
        agent_config = config_from_path("configs/backend_performance_dqn_cartpole.json")

        # Test cpu settings for batching here.
        agent_config["update_spec"] = None

        agent = DQNAgent.from_spec(
            # Uses 2015 DQN parameters as closely as possible.
            agent_config,
            state_space=env.state_space,
            # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
            action_space=env.action_space
        )

        worker = SingleThreadedWorker(
            env_spec=lambda: OpenAIGymEnv("CartPole-v0"),
            agent=agent,
            frameskip=1,
            num_environments=1,
            worker_executes_preprocessing=False
        )

        result = worker.execute_timesteps(1000)
        print(result)
    def test_dqn_on_cart_pole(self):
        """
        Creates a DQNAgent and runs it via a Runner on the CartPole Env.
        """
        dummy_env = OpenAIGymEnv("CartPole-v0")
        agent = DQNAgent.from_spec(
            config_from_path("configs/dqn_agent_for_cartpole.json"),
            double_q=False,
            dueling_q=False,
            state_space=dummy_env.state_space,
            action_space=dummy_env.action_space,
            execution_spec=dict(seed=15),
            update_spec=dict(update_interval=4,
                             batch_size=24,
                             sync_interval=64),
            optimizer_spec=dict(type="adam", learning_rate=0.05),
            store_last_q_table=True)

        time_steps = 3000
        worker = SingleThreadedWorker(
            env_spec=lambda: OpenAIGymEnv("CartPole-v0", seed=15),
            agent=agent,
            render=self.is_windows,
            worker_executes_preprocessing=False)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], 25)
        self.assertGreaterEqual(results["max_episode_reward"], 100.0)
        self.assertLessEqual(results["episodes_executed"], 200)
Exemplo n.º 3
0
    def test_double_dueling_dqn_on_cart_pole(self):
        """
        Creates a double and dueling DQNAgent and runs it via a Runner on the CartPole Env.
        """
        dummy_env = OpenAIGymEnv("CartPole-v0")
        agent = DQNAgent.from_spec(
            config_from_path("configs/dqn_agent_for_cartpole.json"),
            double_q=True,
            dueling_q=True,
            state_space=dummy_env.state_space,
            action_space=dummy_env.action_space,
            observe_spec=dict(buffer_size=200),
            execution_spec=dict(seed=156),
            update_spec=dict(update_interval=4,
                             batch_size=64,
                             sync_interval=16),
            optimizer_spec=dict(type="adam", learning_rate=0.05),
            store_last_q_table=True)

        time_steps = 3000
        worker = SingleThreadedWorker(
            env_spec=lambda: OpenAIGymEnv("CartPole-v0", seed=10),
            agent=agent,
            render=self.is_windows,
            worker_executes_preprocessing=False)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        #print("STATES:\n{}".format(agent.last_q_table["states"]))
        #print("\n\nQ(s,a)-VALUES:\n{}".format(np.round_(agent.last_q_table["q_values"], decimals=2)))

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], 15)
        self.assertGreaterEqual(results["max_episode_reward"], 160.0)
        self.assertLessEqual(results["episodes_executed"], 100)
Exemplo n.º 4
0
    def test_sac_on_cartpole(self):
        """
        Creates an SAC-Agent and runs it on CartPole.
        """
        env = OpenAIGymEnv("CartPole-v0")
        agent = SACAgent.from_spec(
            config_from_path("configs/sac_agent_for_cartpole.json"),
            state_space=env.state_space,
            action_space=env.action_space
        )

        worker = SingleThreadedWorker(
            env_spec=lambda: env,
            agent=agent,
            worker_executes_preprocessing=False,
            render=False,  # self.is_windows,
            episode_finish_callback=lambda episode_return, duration, timesteps, **kwargs:
            print("episode: return={} ts={}".format(episode_return, timesteps))
        )

        time_steps = 5000
        results = worker.execute_timesteps(time_steps)

        print(results)

        self.assertTrue(results["timesteps_executed"] == time_steps)
        self.assertLessEqual(results["episodes_executed"], time_steps / 20)
        self.assertGreater(results["mean_episode_reward"], 40.0)
        self.assertGreater(results["max_episode_reward"], 100.0)
        self.assertGreater(results["mean_episode_reward_last_10_episodes"], 100.0)
Exemplo n.º 5
0
    def test_sac_on_pendulum(self):
        """
        Creates an SAC-Agent and runs it on Pendulum.
        """
        env = OpenAIGymEnv("Pendulum-v0")
        agent = SACAgent.from_spec(
            config_from_path("configs/sac_agent_for_pendulum.json"),
            state_space=env.state_space,
            action_space=env.action_space
        )

        worker = SingleThreadedWorker(
            env_spec=lambda: env,
            agent=agent,
            worker_executes_preprocessing=False,
            render=self.is_windows
        )
        # Note: SAC is more computationally expensive.
        episodes = 50
        results = worker.execute_episodes(episodes)

        print(results)

        self.assertTrue(results["timesteps_executed"] == episodes * 200)
        self.assertTrue(results["episodes_executed"] == episodes)
        self.assertGreater(results["mean_episode_reward"], -800)
    def test_ppo_on_cart_pole(self):
        """
        Creates a PPO Agent and runs it via a Runner on the CartPole env.
        """
        env = OpenAIGymEnv("CartPole-v0", seed=36)
        agent = PPOAgent.from_spec(
            config_from_path("configs/ppo_agent_for_cartpole.json"),
            state_space=env.state_space,
            action_space=env.action_space)

        time_steps = 3000
        worker = SingleThreadedWorker(
            env_spec=lambda: env,
            agent=agent,
            worker_executes_preprocessing=False,
            render=False,  #self.is_windows
            episode_finish_callback=lambda episode_return, duration, timesteps,
            env_num: print("episode return {}; steps={}".format(
                episode_return, timesteps)))
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        print(results)

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertLessEqual(results["episodes_executed"], time_steps / 10)
        # Assume we have learned something.
        self.assertGreaterEqual(results["mean_episode_reward"], 40.0)
    def test_dqn_on_pong(self):
        """
        Creates a DQNAgent and runs it via a Runner on an openAI Pong Env.
        """
        env = OpenAIGymEnv("Pong-v0",
                           frameskip=4,
                           max_num_noops=30,
                           episodic_life=True,
                           visualize=False)
        agent_config = config_from_path("configs/dqn_agent_for_pong.json")
        preprocessing_spec = agent_config.pop("preprocessor_spec")
        agent = Agent.from_spec(
            # Uses 2015 DQN parameters as closely as possible.
            agent_config,
            state_space=self.pong_preprocessed_state_space,
            # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
            action_space=env.action_space)

        time_steps = 4000000
        worker = SingleThreadedWorker(env_spec=lambda: env,
                                      agent=agent,
                                      render=True,
                                      preprocessing_spec=preprocessing_spec,
                                      worker_executes_preprocessing=True)
        results = worker.execute_timesteps(time_steps, use_exploration=True)
Exemplo n.º 8
0
    def test_ppo_on_cart_pole(self):
        """
        Creates a PPO Agent and runs it via a Runner on the CartPole Env.
        """
        env = OpenAIGymEnv("CartPole-v0", seed=36)
        agent = PPOAgent.from_spec(
            config_from_path("configs/ppo_agent_for_cartpole.json"),
            state_space=env.state_space,
            action_space=env.action_space
        )

        time_steps = 3000
        worker = SingleThreadedWorker(
            env_spec=lambda: env,
            agent=agent,
            worker_executes_preprocessing=False,
            render=self.is_windows
        )
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        print(results)

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        #self.assertGreaterEqual(results["mean_episode_reward"], 23)
        #self.assertGreaterEqual(results["max_episode_reward"], 100.0)
        self.assertLessEqual(results["episodes_executed"], time_steps / 10)
Exemplo n.º 9
0
    def test_readme_example(self):
        """
        Tests deterministic functionality of RandomEnv.
        """
        from rlgraph.agents import DQNAgent
        from rlgraph.environments import OpenAIGymEnv

        environment = OpenAIGymEnv('CartPole-v0')
        config = config_from_path("../../examples/configs/dqn_cartpole.json")

        # Create from .json file or dict, see agent API for all
        # possible configuration parameters.
        agent = DQNAgent.from_spec(config,
                                   state_space=environment.state_space,
                                   action_space=environment.action_space)

        # Get an action, take a step, observe reward.
        state = environment.reset()
        preprocessed_state, action = agent.get_action(
            states=state, extra_returns="preprocessed_states")

        # Execute step in environment.
        next_state, reward, terminal, info = environment.step(action)

        # Observe result.
        agent.observe(preprocessed_states=preprocessed_state,
                      actions=action,
                      internals=[],
                      next_states=next_state,
                      rewards=reward,
                      terminals=terminal)

        # Call update when desired:
        loss = agent.update()
    def test_post_processing(self):
        """
        Tests external batch post-processing for the PPO agent.
        """
        env = OpenAIGymEnv("Pong-v0",
                           frameskip=4,
                           max_num_noops=30,
                           episodic_life=True)
        agent_config = config_from_path("configs/ppo_agent_for_pong.json")
        agent = PPOAgent.from_spec(agent_config,
                                   state_space=env.state_space,
                                   action_space=env.action_space)
        num_samples = 200
        states = agent.preprocessed_state_space.sample(num_samples)
        reward_space = FloatBox(add_batch_rank=True)
        terminal_space = BoolBox(add_batch_rank=True)
        sequence_indices_space = BoolBox(add_batch_rank=True)

        # GAE is separately tested, just testing if this API method returns results.
        pg_advantages = agent.post_process(
            dict(states=states,
                 rewards=reward_space.sample(num_samples),
                 terminals=terminal_space.sample(num_samples, fill_value=0),
                 sequence_indices=sequence_indices_space.sample(num_samples,
                                                                fill_value=0)))
Exemplo n.º 11
0
    def test_memory_compilation(self):
        # Builds a memory and returns build stats.
        env = OpenAIGymEnv("Pong-v0",
                           frameskip=4,
                           max_num_noops=30,
                           episodic_life=True)

        record_space = Dict(states=env.state_space,
                            actions=env.action_space,
                            rewards=float,
                            terminals=BoolBox(),
                            add_batch_rank=True)
        input_spaces = dict(
            # insert: records
            records=record_space,
            # get_records: num_records
            num_records=int,
            # update_records: indices, update
            indices=IntBox(add_batch_rank=True),
            update=FloatBox(add_batch_rank=True))

        input_spaces.pop("num_records")
        memory = MemPrioritizedReplay(capacity=20000, )
        test = ComponentTest(component=memory,
                             input_spaces=input_spaces,
                             auto_build=False)
        return test.build()
Exemplo n.º 12
0
    def test_impala_on_outbreak(self):
        """
        Creates a DQNAgent and runs it via a Runner on an openAI Pong Env.
        """
        env = OpenAIGymEnv("Breakout-v0",
                           frameskip=4,
                           max_num_noops=30,
                           episodic_life=True,
                           visualize=False)
        config_ = config_from_path("configs/impala_agent_for_breakout.json")
        agent = IMPALAAgent.from_spec(
            config_,
            state_space=env.state_space,
            action_space=env.action_space,
        )

        learn_updates = 4000000
        mean_returns = []
        for i in range(learn_updates):
            ret = agent.update()
            mean_return = self._calc_mean_return(ret)
            mean_returns.append(mean_return)
            print("i={} Loss={:.4} Avg-reward={:.2}".format(
                i, float(ret[1]), mean_return))

        time.sleep(3)
        agent.terminate()
        time.sleep(3)
Exemplo n.º 13
0
    def test_act(self):
        env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True)
        agent_config = config_from_path("configs/ray_apex_for_pong.json")
        if get_backend() == "pytorch":
            agent_config["memory_spec"]["type"] = "mem_prioritized_replay"
        agent = DQNAgent.from_spec(
            # Uses 2015 DQN parameters as closely as possible.
            agent_config,
            state_space=env.state_space,
            # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
            action_space=env.action_space
        )
        state = env.reset()
        action = agent.get_action(state)
        print("Component call count = {}".format(Component.call_count))

        state_space = env.state_space
        count = 200

        samples = state_space.sample(count)
        start = time.perf_counter()
        for s in samples:
            action = agent.get_action(s)
        end = time.perf_counter() - start

        print("Took {} s for {} separate actions, mean = {}".format(end, count, end / count))

        # Now instead test 100 batch actions
        samples = state_space.sample(count)
        start = time.perf_counter()
        action = agent.get_action(samples)
        end = time.perf_counter() - start
        print("Took {} s for {} batched actions.".format(end, count))
        profile = Component.call_times
        print_call_chain(profile, False, 0.03)
Exemplo n.º 14
0
    def test_sac_on_pendulum(self):
        """
        Creates an SAC-Agent and runs it on Pendulum.
        """
        env = OpenAIGymEnv("Pendulum-v0")
        agent = SACAgent.from_spec(
            config_from_path("configs/sac_agent_for_pendulum.json"),
            state_space=env.state_space,
            action_space=env.action_space
        )

        worker = SingleThreadedWorker(
            env_spec=lambda: env,
            agent=agent,
            worker_executes_preprocessing=False,
            render=False,  # self.is_windows
            episode_finish_callback=lambda episode_return, duration, timesteps, **kwargs:
            print("episode: return={} ts={}".format(episode_return, timesteps))
        )
        # Note: SAC is more computationally expensive.
        episodes = 50
        results = worker.execute_episodes(episodes)

        print(results)

        self.assertTrue(results["timesteps_executed"] == episodes * 200)
        self.assertTrue(results["episodes_executed"] == episodes)
        self.assertGreater(results["mean_episode_reward_last_10_episodes"], -700)
        self.assertGreater(results["max_episode_reward"], -100)
Exemplo n.º 15
0
    def test_moving_standardize_python(self):
        env = OpenAIGymEnv("Pong-v0")
        space = env.state_space

        moving_standardize = MovingStandardize(backend="python")
        moving_standardize.create_variables(
            input_spaces=dict(preprocessing_inputs=space), action_space=None)
        samples = [space.sample() for _ in range(100)]
        out = None
        for sample in samples:
            out = moving_standardize._graph_fn_apply(sample)

        # Assert shape remains intact.
        expected_shape = (1, ) + space.shape
        self.assertEqual(expected_shape, moving_standardize.mean_est.shape)
        # Assert mean estimate.
        expected_mean = np.mean(samples, axis=0)
        self.assertTrue(np.allclose(moving_standardize.mean_est,
                                    expected_mean))

        expected_variance = np.var(samples, ddof=1, axis=0)
        variance_estimate = moving_standardize.std_sum_est / (
            moving_standardize.sample_count - 1.0)
        self.assertEqual(expected_shape, variance_estimate.shape)
        self.assertTrue(np.allclose(variance_estimate, expected_variance))

        std = np.sqrt(variance_estimate) + SMALL_NUMBER

        # Final output.
        expected_out = (samples[-1] - moving_standardize.mean_est) / std
        self.assertTrue(np.allclose(out, expected_out))
Exemplo n.º 16
0
    def test_value_function_weights(self):
        """
        Tests changing of value function weights.
        """
        env = OpenAIGymEnv("Pong-v0")
        agent_config = config_from_path("configs/ppo_agent_for_pong.json")
        agent = PPOAgent.from_spec(agent_config,
                                   state_space=env.state_space,
                                   action_space=env.action_space)
        weights = agent.get_weights()
        assert "value_function_weights" in weights
        assert "policy_weights" in weights

        policy_weights = weights["policy_weights"]
        value_function_weights = weights["value_function_weights"]

        # Just change vf weights.
        for key, weight in value_function_weights.items():
            value_function_weights[key] = weight + 0.01
        agent.set_weights(policy_weights, value_function_weights)
        new_actual_weights = agent.get_weights()

        recursive_assert_almost_equal(
            new_actual_weights["value_function_weights"],
            value_function_weights)
Exemplo n.º 17
0
    def test_subgraph_components(self):
        return
        # TODO fix when we have built selective subgraph fetching correctly.
        # Create agent.
        agent_config = config_from_path("configs/ray_apex_for_pong.json")
        agent_config["execution_spec"].pop("ray_spec")
        environment = OpenAIGymEnv("Pong-v0", frameskip=4)

        # Do not build yet.
        agent = ApexAgent.from_spec(agent_config,
                                    state_space=environment.state_space,
                                    action_space=environment.action_space,
                                    auto_build=False)

        # Prepare all steps until build device strategy so we can test subgraph fetching.
        agent.graph_executor.init_execution()
        agent.graph_executor.setup_graph()

        # Meta graph must be built for sub-graph tracing.
        agent.graph_builder.build_meta_graph(agent.input_spaces)

        sub_graph = agent.graph_builder.get_subgraph(
            "update_from_external_batch")
        print("Sub graph components:")
        print(sub_graph.sub_components)
        print("Sub graph API: ")
        print(sub_graph.api_methods)
Exemplo n.º 18
0
class TestSingleThreadedDQN(unittest.TestCase):

    # TODO test on the relevant Atari environments.
    env = OpenAIGymEnv(gym_env='Pong-v0')

    # TODO define classic atari dqn network.
    network = list()

    def test_replay_memory_atari_throughput(self):
        """
        Tests throughput on standard Atari environments using the replay memory.
        """
        agent = DQNAgent(
            states_spec=self.env.state_space,
            action_spec=self.env.action_space,
            network_spec=self.network,
            memory_spec=dict(
                type='replay_memory',
                capacity=100000,
                next_states=True
            )
        )
        worker = SingleThreadedWorker(
            env_spec=lambda: self.env,
            agent=agent,
            frameskip=1
        )

        result = worker.execute_timesteps(num_timesteps=1000000, use_exploration=True)
        print('Agent throughput = {} ops/s'.format(result['ops_per_second']))
        print('Environment throughput = {} frames/s'.format(result['env_frames_per_second']))

    def test_prioritized_replay_atari_throughput(self):
        """
        Tests throughput on standard Atari environments using the prioritized replay memory.
        """
        agent = DQNAgent(
            states_spec=self.env.state_space,
            action_spec=self.env.action_space,
            network_spec=self.network,
            memory_spec=dict(
                type='prioritized',
                capacity=100000,
                next_states=True
            )
        )
        worker = SingleThreadedWorker(
            env_spec=lambda: self.env,
            agent=agent,
            frameskip=1
        )

        result = worker.execute_timesteps(num_timesteps=1000000, use_exploration=True)
        print('Agent throughput = {} ops/s'.format(result['ops_per_second']))
        print('Environment throughput = {} frames/s'.format(result['env_frames_per_second']))
    def test_double_dueling_dqn_on_cart_pole(self):
        """
        Creates a double and dueling DQNAgent and runs it via a Runner on the CartPole Env.
        """
        gym_env = "CartPole-v0"
        dummy_env = OpenAIGymEnv(gym_env)
        config_ = config_from_path("configs/dqn_agent_for_cartpole.json")
        # Add dueling config to agent.
        config_["policy_spec"] = {
            "units_state_value_stream": 3,
            "action_adapter_spec": {
                "pre_network_spec": [{
                    "type": "dense",
                    "units": 3
                }]
            }
        }
        agent = DQNAgent.from_spec(config_,
                                   double_q=True,
                                   dueling_q=True,
                                   state_space=dummy_env.state_space,
                                   action_space=dummy_env.action_space,
                                   execution_spec=dict(seed=13),
                                   update_spec=dict(update_interval=4,
                                                    batch_size=64,
                                                    sync_interval=16),
                                   optimizer_spec=dict(type="adam",
                                                       learning_rate=0.01),
                                   store_last_q_table=True)

        time_steps = 3000
        worker = SingleThreadedWorker(
            env_spec=lambda: OpenAIGymEnv(gym_env, seed=10),
            agent=agent,
            render=self.is_windows,
            worker_executes_preprocessing=False)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], 25)
        self.assertLessEqual(results["episodes_executed"], 150)
Exemplo n.º 20
0
 def test_ppo_compilation(self):
     """
     Tests PPO agent compilation.
     """
     env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True)
     agent_config = config_from_path("configs/ppo_agent_for_pong.json")
     agent = PPOAgent.from_spec(
         agent_config,
         state_space=env.state_space,
         action_space=env.action_space
     )
     print("Compiled {}".format(agent))
Exemplo n.º 21
0
    def test_apex_compilation(self):
        """
        Tests agent compilation without Ray to ease debugging on Windows.
        """
        agent_config = config_from_path("configs/ray_apex_for_pong.json")
        agent_config["execution_spec"].pop("ray_spec")
        environment = OpenAIGymEnv("Pong-v0", frameskip=4)

        agent = ApexAgent.from_spec(agent_config,
                                    state_space=environment.state_space,
                                    action_space=environment.action_space)
        print("Compiled {}".format(agent))
Exemplo n.º 22
0
 def test_dqn_compilation(self):
     """
     Creates a DQNAgent and runs it via a Runner on an openAI Pong Env.
     """
     env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True)
     agent_config = config_from_path("configs/dqn_pytorch_test.json")
     agent = DQNAgent.from_spec(
         # Uses 2015 DQN parameters as closely as possible.
         agent_config,
         state_space=env.state_space,
         # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
         action_space=env.action_space
     )
Exemplo n.º 23
0
 def test_dqn_compilation(self):
     """
     Tests DQN Agent compilation.
     """
     env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True)
     agent_config = config_from_path("configs/dqn_agent_for_pong.json")
     agent = DQNAgent.from_spec(
         # Uses 2015 DQN parameters as closely as possible.
         agent_config,
         state_space=env.state_space,
         # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
         action_space=env.action_space
     )
Exemplo n.º 24
0
 def test_actor_critic_compilation(self):
     """
     Tests Policy gradient agent compilation.
     """
     env = OpenAIGymEnv("Pong-v0",
                        frameskip=4,
                        max_num_noops=30,
                        episodic_life=True)
     agent_config = config_from_path(
         "configs/actor_critic_agent_for_pong.json")
     agent = ActorCriticAgent.from_spec(agent_config,
                                        state_space=env.state_space,
                                        action_space=env.action_space)
Exemplo n.º 25
0
    def test_multi_gpu_apex_agent_compilation(self):
        """
        Tests if the multi gpu strategy can compile successfully on a multi gpu system, but
        also runs on a CPU-only system using fake-GPU logic for testing purposes.
        """
        root_logger.setLevel(DEBUG)
        agent_config = config_from_path("configs/multi_gpu_ray_apex_for_pong.json")
        agent_config["execution_spec"].pop("ray_spec")
        environment = OpenAIGymEnv("Pong-v0", frameskip=4)

        agent = ApexAgent.from_spec(
            agent_config, state_space=environment.state_space, action_space=environment.action_space
        )
        print("Compiled Apex agent")
Exemplo n.º 26
0
    def test_multi_gpu_apex_agent_compilation(self):
        """
        Tests if the multi gpu strategy can compile successfully on a multi gpu system.

        THIS TEST REQUIRES A MULTI GPU SYSTEM.
        """
        root_logger.setLevel(DEBUG)
        agent_config = config_from_path("configs/multi_gpu_ray_apex_for_pong.json")
        agent_config["execution_spec"].pop("ray_spec")
        environment = OpenAIGymEnv("Pong-v0", frameskip=4)

        agent = ApexAgent.from_spec(
            agent_config, state_space=environment.state_space, action_space=environment.action_space
        )
        print("Compiled Apex agent")
Exemplo n.º 27
0
    def test_apex_compilation(self):
        """
        Tests agent compilation without Ray to ease debugging on Windows.
        """
        agent_config = config_from_path("configs/ray_apex_for_pong.json")
        agent_config["execution_spec"].pop("ray_spec")
        # TODO remove after unified.
        if get_backend() == "pytorch":
            agent_config["memory_spec"]["type"] = "mem_prioritized_replay"
        environment = OpenAIGymEnv("Pong-v0", frameskip=4)

        agent = ApexAgent.from_spec(agent_config,
                                    state_space=environment.state_space,
                                    action_space=environment.action_space)
        print('Compiled apex agent')
Exemplo n.º 28
0
class TestSingleThreadedWorker(unittest.TestCase):

    environment = OpenAIGymEnv(gym_env='CartPole-v0')

    def test_timesteps(self):
        """
        Simply tests if timestep execution loop works and returns a result.
        """
        agent = RandomAgent(
            action_space=self.environment.action_space,
            state_space=self.environment.state_space
        )
        worker = SingleThreadedWorker(
            env_spec=lambda: self.environment,
            agent=agent,
            frameskip=1,
            worker_executes_preprocessing=False
        )

        result = worker.execute_timesteps(100)
        self.assertEqual(result['timesteps_executed'], 100)
        self.assertGreater(result['episodes_executed'], 0)
        self.assertLessEqual(result['episodes_executed'], 100)
        self.assertGreaterEqual(result['env_frames'], 100)
        self.assertGreaterEqual(result['runtime'], 0.0)

    def test_episodes(self):
        """
        Simply tests if episode execution loop works and returns a result.
        """
        agent = RandomAgent(
            action_space=self.environment.action_space,
            state_space=self.environment.state_space
        )
        worker = SingleThreadedWorker(
            env_spec=lambda: self.environment,
            agent=agent,
            frameskip=1,
            worker_executes_preprocessing=False
        )

        result = worker.execute_episodes(5, max_timesteps_per_episode=10)
        # Max 5 * 10.
        self.assertLessEqual(result['timesteps_executed'], 50)
        self.assertEqual(result['episodes_executed'], 5)
        self.assertLessEqual(result['env_frames'], 50)
        self.assertGreaterEqual(result['runtime'], 0.0)
Exemplo n.º 29
0
    def test_ppo_on_pendulum(self):
        """
        Creates a PPO Agent and runs it via a Runner on the Pendulum env.
        """
        env = OpenAIGymEnv("Pendulum-v0")
        agent = PPOAgent.from_spec(
            config_from_path("configs/ppo_agent_for_pendulum.json"),
            state_space=env.state_space,
            action_space=env.action_space)

        worker = SingleThreadedWorker(env_spec=lambda: env,
                                      agent=agent,
                                      worker_executes_preprocessing=False,
                                      render=self.is_windows)
        results = worker.execute_episodes(500, use_exploration=True)

        print(results)
Exemplo n.º 30
0
    def test_sac_on_cartpole(self):
        """
        Creates an SAC-Agent and runs it on CartPole.
        """
        env = OpenAIGymEnv("CartPole-v0")
        agent = SACAgent.from_spec(
            config_from_path("configs/sac_agent_for_cartpole.json"),
            state_space=env.state_space,
            action_space=env.action_space)

        worker = SingleThreadedWorker(env_spec=lambda: env,
                                      agent=agent,
                                      worker_executes_preprocessing=False,
                                      render=self.is_windows)

        time_steps = 10000
        results = worker.execute_timesteps(time_steps)

        print(results)