def test_should_update_an_arm_with_feedback(self): arm_names = ['arm1', 'arm2'] arm = [EpsilonGreedyArm(name, n=1, reward_sum=1) for name in arm_names] egreedy = EGreedy('my_bandit', arm, epsilon=0.1) feedback = Feedback('arm1', reward=1) egreedy.update(feedback) assert egreedy.arms_dict['arm1'].reward_sum == 2 and egreedy.arms_dict[ 'arm1'].n == 2
def update(self, bandit_id: str, arm_id: str, reward: float, payload: str = None) -> str: """ Updates bandit strategy given the feedback :param bandit_id: [str], bandit id :param arm_id: [str], arm name :param reward: [float], reward of the chosen arm name :param context: [dict, default=None], context used when choose was called. """ bandit: OnlineBandit = self.bandits[bandit_id] bandit.update(Feedback(arm_id, reward, payload))
def test_should_update_an_arm_with_feedback(self): arm_names = ['arm1', 'arm2', 'arm3'] arms = [BetaArm(name, n=2, n_rewards=1) for name in arm_names] beta_bandit = BetaThompsonBandit('my_bandit', arms) feedback = Feedback('arm2', reward=1) beta_bandit.update(feedback) assert beta_bandit.arms_dict['arm2'].n_rewards == 2 assert beta_bandit.arms_dict['arm2'].n == 3
def test_should_update_an_arm_with_feedback(self): arm_names = ['arm1', 'arm2'] arms = [GaussianArm(name, 0.0001, 0, 1) for name in arm_names] gaussian_bandit = GaussianThompsonBandit('my_bandit', arms) feedback = Feedback('arm2', reward=10) gaussian_bandit.update(feedback) assert gaussian_bandit.arms_dict['arm2'].reward_sum == 10 assert gaussian_bandit.arms_dict['arm2'].squared_reward_sum == 101 assert gaussian_bandit.arms_dict['arm2'].n == 1.0001
def test_should_run_for_several_iterations_and_sample_all_arms(self): arm_names = ['arm1', 'arm2', 'arm3'] arm = [EpsilonGreedyArm(name, n=1, reward_sum=1) for name in arm_names] egreedy = EGreedy('my_bandit', arm, epsilon=0.1) num_steps = 100 sampled_arm_ids = set() for _ in range(num_steps): arm = egreedy.choose() sampled_arm_ids.add(arm.arm_id) reward = 1.0 if random.random() < 0.2 else 0.0 egreedy.update(Feedback(arm.arm_id, reward)) assert sampled_arm_ids == set(arm_names)
def test_should_reset_value_to_default(self): arm_names = ['arm1', 'arm2', 'arm3'] arms = [BetaArm(name, n=2, n_rewards=1) for name in arm_names] beta_bandit = BetaThompsonBandit('my_bandit', arms) num_steps = 20 for _ in range(num_steps): arm = beta_bandit.choose() reward = 1.0 if random.random() < 0.2 else 0.0 beta_bandit.update(Feedback(arm_id=arm.arm_id, reward=reward)) beta_bandit.reset() assert all([arm.n_rewards == 1 and arm.n == 2 for arm in beta_bandit.arms_dict.values()])
def test_should_reset_value_to_default(self): arm_names = ['arm1', 'arm2', 'arm3'] arms = [GaussianArm(name, 2, 2, 2) for name in arm_names] gaussian_bandit = GaussianThompsonBandit('my_bandit', arms) num_steps = 20 for _ in range(num_steps): arm = gaussian_bandit.choose() reward = 1.0 if random.random() < 0.2 else 0.0 gaussian_bandit.update(Feedback(arm_id=arm.arm_id, reward=reward)) gaussian_bandit.reset() assert all([arm.squared_reward_sum == 2 and arm.reward_sum == 2 and arm.n == 2 for arm in gaussian_bandit.arms_dict.values()])
def test_should_run_for_several_iterations_and_sample_all_arms(self): arm_names = ['arm1', 'arm2', 'arm3'] arms = [GaussianArm(name, n=5, reward_sum=5, squared_reward_sum=5) for name in arm_names] gaussian_bandit = GaussianThompsonBandit('my_bandit', arms) num_steps = 1000 sampled_arm_ids = set() for _ in range(num_steps): arm = gaussian_bandit.choose() sampled_arm_ids.add(arm.arm_id) reward = 1.0 if random.random() < 0.2 else 0.0 gaussian_bandit.update(Feedback(arm.arm_id, reward)) assert sampled_arm_ids == set(arm_names)
def test_should_run_for_several_iterations_and_sample_all_arms(self): arm_names = ['arm1', 'arm2', 'arm3'] arms = [BetaArm(name, n=2, n_rewards=1) for name in arm_names] beta_bandit = BetaThompsonBandit('my_bandit', arms) num_steps = 1000 sampled_arm_ids = set() for _ in range(num_steps): arm = beta_bandit.choose() sampled_arm_ids.add(arm.arm_id) reward = 1.0 if random.random() < 0.2 else 0.0 beta_bandit.update(Feedback(arm.arm_id, reward)) assert sampled_arm_ids == set(arm_names)
def test_should_reset_value_to_default(self): arm_names = ['arm1', 'arm2', 'arm3'] arm = [EpsilonGreedyArm(name, n=1, reward_sum=1) for name in arm_names] egreedy = EGreedy('my_bandit', arm, epsilon=0.1) num_steps = 20 for _ in range(num_steps): arm = egreedy.choose() reward = 1.0 if random.random() < 0.2 else 0.0 egreedy.update(Feedback(arm_id=arm.arm_id, reward=reward)) egreedy.reset() assert all([ arm.reward_sum == 0 and arm.n == 0.001 for arm in egreedy.arms_dict.values() ])