Python generate_episodes примеры использования

Язык программирования: Python

Пространство имен/Пакет: tst.utilities

Метод/Функция: generate_episodes

Примеров на hotexamples.com: 7

Python generate_episodes - 7 примеров найдено. Это лучшие примеры Python кода для tst.utilities.generate_episodes, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: cumulative_test.py Проект: djjh/reinforcement-learning-labs

    def test_works(self):
        advantage_function = Cumulative()
        episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1], [0, 0, 0], ])

        advantages = advantage_function.get_advantages(episodes)

        assert advantages == [0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0]

Пример #2

Показать файл

    def test_works(self, discount, rewards, expected_advantages):
        advantage_function = RewardToGo(discount=discount)
        episodes = generate_episodes(rewards)

        advantages = advantage_function.get_advantages(episodes)

        assert advantages == expected_advantages

Пример #3

Показать файл

Файл: gae_test.py Проект: djjh/reinforcement-learning-labs

    def test_update(self):
        value_function = Mock()
        value_function.__enter__ = Mock(return_value=(Mock(), None))
        value_function.__exit__ = Mock(return_value=None)

        advantage_function = Gae(value_function=value_function, gamma=1, lambduh=1)
        episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1]])

        advantage_function.update(episodes)

Пример #4

Показать файл

Файл: gae_test.py Проект: djjh/reinforcement-learning-labs

    def test_works(self, gamma, lambduh, rewards, expected_advantages):
        value_function = Mock()
        value_function.__enter__ = Mock(return_value=(Mock(), None))
        value_function.__exit__ = Mock(return_value=None)
        value_function.get_values = Mock()
        value_function.get_values.side_effect = np.asarray(rewards)


        advantage_function = Gae(value_function=value_function, gamma=gamma, lambduh=lambduh)
        episodes = generate_episodes(rewards)

        advantages = advantage_function.get_advantages(episodes)

        assert advantages == expected_advantages

Пример #5

Показать файл

Файл: cumulative_test.py Проект: djjh/reinforcement-learning-labs

    def test_update(self):
        advantage_function = Cumulative()
        episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1]])

        advantage_function.update(episodes)

Пример #6

Показать файл

    def test_update(self):
        advantage_function = RewardToGo(discount=1)
        episodes = generate_episodes([[0, 0, 0], [0, 1, 2], [2, 1, 0], [1, 1, 1]])

        advantage_function.update(episodes)

Пример #7

Показать файл

Файл: advantage_function_test.py Проект: djjh/reinforcement-learning-labs

    def test_update(self):
        advantage_function = AdvantageFunctionImpl()
        episodes = generate_episodes([[0]])

        with pytest.raises(NotImplementedError):
            advantage_function.update(episodes)