def test_action(self):
     rewards = [PoissonReward(mu) for mu in np.random.rand(10) + 1]
     e = Environment(rewards)
     with pytest.raises(AssertionError):
         e.action(11)
     r = e.action(0)
     print(r)
     assert type(r) in [float, int]
 def test_poisson_rewards(self):
     n = 10
     rewards = []
     mus = np.linspace(1, 5, n)
     for mu in mus:
         rewards.append(PoissonReward(mu))
     e = Environment(rewards)
     assert len(e) == n
     er = e.expected_rewards()
     for i, r in enumerate(er):
         assert r == rewards[i].expected_reward()
 def test_gaussian_rewards(self):
     n = 10
     rewards = []
     ms = np.linspace(-1, 1, n)
     vs = np.linspace(0.1, 3, n)
     for m, v in zip(ms, vs):
         rewards.append(GaussianReward(m, v))
     e = Environment(rewards)
     assert len(e) == n
     er = e.expected_rewards()
     for i, r in enumerate(er):
         assert r == rewards[i].expected_reward()
Пример #4
0
 def setUp(self):
     super().setUp()
     N = 5
     self.n_rewards = N
     self.env = Environment([GaussianReward() for _ in range(N)])
Пример #5
0
class PosseTestCase(TestCase):
    def setUp(self):
        super().setUp()
        Nr = 5
        self.n_rewards = Nr
        self.env = Environment([GaussianReward() for _ in range(Nr)])

    def test_smoke(self):
        posse = Posse(self.env, GreedyBandit, n_bandits=20)
        assert isinstance(posse, Posse)

    def test_numbers(self):
        N_bandits = 20
        posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits)
        assert len(posse.bandits) == N_bandits
        assert posse.n_actions_taken == 0
        assert posse.len_env == len(self.env)
        assert posse.n_rewards == self.n_rewards

    def test_actions(self):
        N_bandits = 20
        posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits)
        N_actions = 2
        posse.take_actions(N_actions)
        assert posse.n_actions_taken == 2
        assert len(posse.bandits[0].choice_history) == 2

    def test_bandit_kwargs(self):
        N_bandits = 20
        eps = 0.1
        posse = Posse(self.env, EpsGreedyBandit, n_bandits=N_bandits, eps=eps)
        assert posse.bandits[0].eps == eps

    def test_mean_var_reward(self):
        N_bandits = 20
        posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits)
        N_actions = 100
        posse.take_actions(N_actions)
        assert posse.mean_reward().shape == (100, )
        assert posse.var_reward().shape == (100, )

    def test_mean_best_choice_with_int(self):
        N_bandits = 20
        posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits)
        N_actions = 100
        posse.take_actions(N_actions)
        bc = self.env.expected_rewards().argmax()
        assert posse.mean_best_choice(bc).shape == (100, )
        assert posse.var_best_choice(bc).shape == (100, )

    def test_mean_best_choice(self):
        N_bandits = 20
        posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits)
        N_actions = 100
        posse.take_actions(N_actions)
        bc = np.zeros(100)
        assert posse.mean_best_choice(bc).shape == (100, )
        assert posse.mean_best_choice(0).shape == (100, )
        assert posse.mean_best_choice(bc.tolist()).shape == (100, )
        with pytest.raises(TypeError):
            posse.mean_best_choice(3.1415)

    def test_var_best_choice(self):
        N_bandits = 20
        posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits)
        N_actions = 100
        posse.take_actions(N_actions)
        bc = np.zeros(100)
        assert posse.var_best_choice(bc).shape == (100, )
        assert posse.var_best_choice(0).shape == (100, )
        assert posse.var_best_choice(bc.tolist()).shape == (100, )
        with pytest.raises(TypeError):
            posse.var_best_choice(3.1415)
 def test_moments(self):
     rewards = [PoissonReward(mu) for mu in [4] * 5]
     e = Environment(rewards)
     moments = e.moments()
     assert np.all(moments == 4)