Python OpenAIGymEnv.reset примеры использования

Язык программирования: Python

Пространство имен/Пакет: rlgraph.environments

Класс/Тип: OpenAIGymEnv

Метод/Функция: reset

Примеров на hotexamples.com: 3

Python OpenAIGymEnv.reset - 3 примера найдено. Это лучшие примеры Python кода для rlgraph.environments.OpenAIGymEnv.reset, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

OpenAIGymEnv(30)

from_spec(16)

reset(3)

step(2)

terminate(1)

Пример #1

Показать файл

    def test_act(self):
        env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True)
        agent_config = config_from_path("configs/ray_apex_for_pong.json")
        if get_backend() == "pytorch":
            agent_config["memory_spec"]["type"] = "mem_prioritized_replay"
        agent = DQNAgent.from_spec(
            # Uses 2015 DQN parameters as closely as possible.
            agent_config,
            state_space=env.state_space,
            # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
            action_space=env.action_space
        )
        state = env.reset()
        action = agent.get_action(state)
        print("Component call count = {}".format(Component.call_count))

        state_space = env.state_space
        count = 200

        samples = state_space.sample(count)
        start = time.perf_counter()
        for s in samples:
            action = agent.get_action(s)
        end = time.perf_counter() - start

        print("Took {} s for {} separate actions, mean = {}".format(end, count, end / count))

        # Now instead test 100 batch actions
        samples = state_space.sample(count)
        start = time.perf_counter()
        action = agent.get_action(samples)
        end = time.perf_counter() - start
        print("Took {} s for {} batched actions.".format(end, count))
        profile = Component.call_times
        print_call_chain(profile, False, 0.03)

Пример #2

Показать файл

Файл: test_readme_example.py Проект: MegaYEye/rlgraph

    def test_readme_example(self):
        """
        Tests deterministic functionality of RandomEnv.
        """
        from rlgraph.agents import DQNAgent
        from rlgraph.environments import OpenAIGymEnv

        environment = OpenAIGymEnv('CartPole-v0')
        config = config_from_path("../../examples/configs/dqn_cartpole.json")

        # Create from .json file or dict, see agent API for all
        # possible configuration parameters.
        agent = DQNAgent.from_spec(config,
                                   state_space=environment.state_space,
                                   action_space=environment.action_space)

        # Get an action, take a step, observe reward.
        state = environment.reset()
        preprocessed_state, action = agent.get_action(
            states=state, extra_returns="preprocessed_states")

        # Execute step in environment.
        next_state, reward, terminal, info = environment.step(action)

        # Observe result.
        agent.observe(preprocessed_states=preprocessed_state,
                      actions=action,
                      internals=[],
                      next_states=next_state,
                      rewards=reward,
                      terminals=terminal)

        # Call update when desired:
        loss = agent.update()

Пример #3

Показать файл

    def test_openai_atari_env(self):
        env = OpenAIGymEnv("Pong-v0")

        # Simple test runs with fixed actions.
        s = env.reset()
        # Assert we have pixels.
        self.assertGreaterEqual(np.mean(s), 0)
        self.assertLessEqual(np.mean(s), 255)
        accum_reward = 0.0
        for _ in range(100):
            s, r, t, _ = env.step(env.action_space.sample())
            assert isinstance(r, np.ndarray)
            assert r.dtype == np.float32
            assert isinstance(t, bool)
            self.assertGreaterEqual(np.mean(s), 0)
            self.assertLessEqual(np.mean(s), 255)
            accum_reward += r

        print("Accumulated Reward: ".format(accum_reward))