def test_features_manipulated_to_maximum_limit_no_control_epsilon_greedy( self): env = college_admission.CollegeAdmissionsEnv( user_params={ 'num_applicants': 5, 'gaming_control': np.inf, 'group_cost': { 0: 2, 1: 4, } }) agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, gaming_control=env.initial_params.gaming_control, epsilon_greedy=True, initial_epsilon_prob=0.2) observations = { 'test_scores_y': np.asarray([0.2, 0.3, 0.4, 0.5, 0.4]), 'selected_applicants': np.asarray([0, 1, 0, 1, 1]), 'selected_ground_truth': np.asarray([2, 0, 2, 1, 1]), 'applicant_groups': np.asarray([0, 1, 1, 0, 1]) } self.assertTrue( np.all( np.isclose( agent._get_maximum_manipulated_features(observations), [0.5, 0.9, 0.6], atol=1e-4)))
def test_features_manipulated_to_maximum_limit_with_no_control(self): env = college_admission.CollegeAdmissionsEnv( user_params={ 'num_applicants': 5, 'gaming_control': np.inf, 'group_cost': { 0: 2, 1: 4 } }) agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost) observations = { 'test_scores_y': np.asarray([0.2, 0.3, 0.4, 0.5, 0.4]), 'selected_applicants': np.asarray([0, 1, 0, 1, 1]), 'selected_ground_truth': np.asarray([2, 0, 2, 1, 1]), 'applicant_groups': np.asarray([0, 1, 1, 0, 1]) } agent.act(observations, done=False) self.assertTrue( np.all( np.isclose( agent._get_maximum_manipulated_features(observations), [0.55, 1.0, 0.65], atol=1e-4))) self.assertEqual(agent._features, agent._get_maximum_manipulated_features(observations))
def build_scenario(self): """Returns agent and env according to provided params.""" env = college_admission.CollegeAdmissionsEnv( user_params=self.env_config) if self.agent_type == 'robust': agent = college_admission_jury.RobustJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, group_cost=env.initial_params.group_cost, subsidize=env.initial_params.subsidize, subsidy_beta=env.initial_params.subsidy_beta, gaming_control=env.initial_params.gaming_control, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, decay_steps=self.decay_steps, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, burnin=self.burnin) elif self.agent_type == 'static': agent = college_admission_jury.NaiveJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=0, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, decay_steps=self.decay_steps, freeze_classifier_after_burnin=True, burnin=self.burnin) elif self.agent_type == 'continuous': agent = college_admission_jury.NaiveJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=0, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, freeze_classifier_after_burnin=False, decay_steps=self.decay_steps, burnin=self.burnin) else: agent = college_admission_jury.FixedJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=self.agent_threshold, epsilon_greedy=self.epsilon_greedy, decay_steps=self.decay_steps, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate) return env, agent
def test_robust_classifier_simulation_runs_successfully(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, burnin=10) test_util.run_test_simulation(env=env, agent=agent, stackelberg=True)
def test_assertion_raised_when_burnin_less_than_2(self): env = college_admission.CollegeAdmissionsEnv() with self.assertRaises(ValueError): college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, burnin=1)
def test_correct_robust_threshold_returned(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost) agent._features = [0.1, 0.2, 0.4, 0.4, 0.5, 0.6, 0.7, 0.8] agent._labels = [0, 0, 1, 0, 0, 1, 1, 1] agent._train_model() self.assertEqual(agent._threshold, 0.6)
def test_correct_max_score_change_calculated_with_subsidy(self): """Tests that the max gaming steps gives output as expected.""" env = college_admission.CollegeAdmissionsEnv( user_params={ 'group_cost': { 0: 2, 1: 4 }, 'subsidize': True, 'subsidy_beta': 0.8, 'gaming_control': np.inf }) agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, subsidize=env.initial_params.subsidize, subsidy_beta=env.initial_params.subsidy_beta, gaming_control=env.initial_params.gaming_control) obs, _, _, _ = env.step(agent.initial_action()) max_change = agent._get_max_allowed_score_change(obs) self.assertEqual(max_change, [0.5, 0.3125])