def test_action(self): rewards = [PoissonReward(mu) for mu in np.random.rand(10) + 1] e = Environment(rewards) with pytest.raises(AssertionError): e.action(11) r = e.action(0) print(r) assert type(r) in [float, int]
def test_poisson_rewards(self): n = 10 rewards = [] mus = np.linspace(1, 5, n) for mu in mus: rewards.append(PoissonReward(mu)) e = Environment(rewards) assert len(e) == n er = e.expected_rewards() for i, r in enumerate(er): assert r == rewards[i].expected_reward()
def test_gaussian_rewards(self): n = 10 rewards = [] ms = np.linspace(-1, 1, n) vs = np.linspace(0.1, 3, n) for m, v in zip(ms, vs): rewards.append(GaussianReward(m, v)) e = Environment(rewards) assert len(e) == n er = e.expected_rewards() for i, r in enumerate(er): assert r == rewards[i].expected_reward()
def setUp(self): super().setUp() N = 5 self.n_rewards = N self.env = Environment([GaussianReward() for _ in range(N)])
class PosseTestCase(TestCase): def setUp(self): super().setUp() Nr = 5 self.n_rewards = Nr self.env = Environment([GaussianReward() for _ in range(Nr)]) def test_smoke(self): posse = Posse(self.env, GreedyBandit, n_bandits=20) assert isinstance(posse, Posse) def test_numbers(self): N_bandits = 20 posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits) assert len(posse.bandits) == N_bandits assert posse.n_actions_taken == 0 assert posse.len_env == len(self.env) assert posse.n_rewards == self.n_rewards def test_actions(self): N_bandits = 20 posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits) N_actions = 2 posse.take_actions(N_actions) assert posse.n_actions_taken == 2 assert len(posse.bandits[0].choice_history) == 2 def test_bandit_kwargs(self): N_bandits = 20 eps = 0.1 posse = Posse(self.env, EpsGreedyBandit, n_bandits=N_bandits, eps=eps) assert posse.bandits[0].eps == eps def test_mean_var_reward(self): N_bandits = 20 posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits) N_actions = 100 posse.take_actions(N_actions) assert posse.mean_reward().shape == (100, ) assert posse.var_reward().shape == (100, ) def test_mean_best_choice_with_int(self): N_bandits = 20 posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits) N_actions = 100 posse.take_actions(N_actions) bc = self.env.expected_rewards().argmax() assert posse.mean_best_choice(bc).shape == (100, ) assert posse.var_best_choice(bc).shape == (100, ) def test_mean_best_choice(self): N_bandits = 20 posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits) N_actions = 100 posse.take_actions(N_actions) bc = np.zeros(100) assert posse.mean_best_choice(bc).shape == (100, ) assert posse.mean_best_choice(0).shape == (100, ) assert posse.mean_best_choice(bc.tolist()).shape == (100, ) with pytest.raises(TypeError): posse.mean_best_choice(3.1415) def test_var_best_choice(self): N_bandits = 20 posse = Posse(self.env, GreedyBandit, n_bandits=N_bandits) N_actions = 100 posse.take_actions(N_actions) bc = np.zeros(100) assert posse.var_best_choice(bc).shape == (100, ) assert posse.var_best_choice(0).shape == (100, ) assert posse.var_best_choice(bc.tolist()).shape == (100, ) with pytest.raises(TypeError): posse.var_best_choice(3.1415)
def test_moments(self): rewards = [PoissonReward(mu) for mu in [4] * 5] e = Environment(rewards) moments = e.moments() assert np.all(moments == 4)