Python Environment примеры использования

Язык программирования: Python

Пространство имен/Пакет: bandit.environment

Класс/Тип: Environment

Примеров на hotexamples.com: 6

Python Environment - 6 примеров найдено. Это лучшие примеры Python кода для bandit.environment.Environment, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Environment(5)

expected_rewards(3)

action(1)

moments(1)

Пример #1

Показать файл

Файл: test_environment.py Проект: tmcclintock/MultiArmedBandits

 def test_action(self):
     rewards = [PoissonReward(mu) for mu in np.random.rand(10) + 1]
     e = Environment(rewards)
     with pytest.raises(AssertionError):
         e.action(11)
     r = e.action(0)
     print(r)
     assert type(r) in [float, int]

Пример #2

Показать файл

Файл: test_environment.py Проект: tmcclintock/MultiArmedBandits

 def test_poisson_rewards(self):
     n = 10
     rewards = []
     mus = np.linspace(1, 5, n)
     for mu in mus:
         rewards.append(PoissonReward(mu))
     e = Environment(rewards)
     assert len(e) == n
     er = e.expected_rewards()
     for i, r in enumerate(er):
         assert r == rewards[i].expected_reward()

Пример #3

Показать файл

Файл: test_environment.py Проект: tmcclintock/MultiArmedBandits

 def test_gaussian_rewards(self):
     n = 10
     rewards = []
     ms = np.linspace(-1, 1, n)
     vs = np.linspace(0.1, 3, n)
     for m, v in zip(ms, vs):
         rewards.append(GaussianReward(m, v))
     e = Environment(rewards)
     assert len(e) == n
     er = e.expected_rewards()
     for i, r in enumerate(er):
         assert r == rewards[i].expected_reward()

Пример #4

Показать файл

 def setUp(self):
     super().setUp()
     N = 5
     self.n_rewards = N
     self.env = Environment([GaussianReward() for _ in range(N)])

Пример #5

Показать файл

class PosseTestCase(TestCase):
    def setUp(self):
        super().setUp()
        Nr = 5
        self.n_rewards = Nr
        self.env = Environment([GaussianReward() for _ in range(Nr)])

    def test_smoke(self):
        posse = Posse(self.env, GreedyBandit, n_bandits=20)
        assert isinstance(posse, Posse)

    def test_numbers(self):
        N_bandits = 20
        posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits)
        assert len(posse.bandits) == N_bandits
        assert posse.n_actions_taken == 0
        assert posse.len_env == len(self.env)
        assert posse.n_rewards == self.n_rewards

    def test_actions(self):
        N_bandits = 20
        posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits)
        N_actions = 2
        posse.take_actions(N_actions)
        assert posse.n_actions_taken == 2
        assert len(posse.bandits[0].choice_history) == 2

    def test_bandit_kwargs(self):
        N_bandits = 20
        eps = 0.1
        posse = Posse(self.env, EpsGreedyBandit, n_bandits=N_bandits, eps=eps)
        assert posse.bandits[0].eps == eps

    def test_mean_var_reward(self):
        N_bandits = 20
        posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits)
        N_actions = 100
        posse.take_actions(N_actions)
        assert posse.mean_reward().shape == (100, )
        assert posse.var_reward().shape == (100, )

    def test_mean_best_choice_with_int(self):
        N_bandits = 20
        posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits)
        N_actions = 100
        posse.take_actions(N_actions)
        bc = self.env.expected_rewards().argmax()
        assert posse.mean_best_choice(bc).shape == (100, )
        assert posse.var_best_choice(bc).shape == (100, )

    def test_mean_best_choice(self):
        N_bandits = 20
        posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits)
        N_actions = 100
        posse.take_actions(N_actions)
        bc = np.zeros(100)
        assert posse.mean_best_choice(bc).shape == (100, )
        assert posse.mean_best_choice(0).shape == (100, )
        assert posse.mean_best_choice(bc.tolist()).shape == (100, )
        with pytest.raises(TypeError):
            posse.mean_best_choice(3.1415)

    def test_var_best_choice(self):
        N_bandits = 20
        posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits)
        N_actions = 100
        posse.take_actions(N_actions)
        bc = np.zeros(100)
        assert posse.var_best_choice(bc).shape == (100, )
        assert posse.var_best_choice(0).shape == (100, )
        assert posse.var_best_choice(bc.tolist()).shape == (100, )
        with pytest.raises(TypeError):
            posse.var_best_choice(3.1415)

Пример #6

Показать файл

Файл: test_environment.py Проект: tmcclintock/MultiArmedBandits

 def test_moments(self):
     rewards = [PoissonReward(mu) for mu in [4] * 5]
     e = Environment(rewards)
     moments = e.moments()
     assert np.all(moments == 4)