def build_scenario(self): """Returns agent and env according to provided params.""" env = college_admission.CollegeAdmissionsEnv( user_params=self.env_config) if self.agent_type == 'robust': agent = college_admission_jury.RobustJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, group_cost=env.initial_params.group_cost, subsidize=env.initial_params.subsidize, subsidy_beta=env.initial_params.subsidy_beta, gaming_control=env.initial_params.gaming_control, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, decay_steps=self.decay_steps, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, burnin=self.burnin) elif self.agent_type == 'static': agent = college_admission_jury.NaiveJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=0, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, decay_steps=self.decay_steps, freeze_classifier_after_burnin=True, burnin=self.burnin) elif self.agent_type == 'continuous': agent = college_admission_jury.NaiveJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=0, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, freeze_classifier_after_burnin=False, decay_steps=self.decay_steps, burnin=self.burnin) else: agent = college_admission_jury.FixedJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=self.agent_threshold, epsilon_greedy=self.epsilon_greedy, decay_steps=self.decay_steps, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate) return env, agent
def test_agent_returns_correct_threshold(self): env = college_admission.CollegeAdmissionsEnv( user_params={ 'gaming': False, 'subsidize': False, 'noise_params': params.BoundedGaussian(max=0.3, min=0, sigma=0, mu=0.1), 'feature_params': params.GMM( mix_weight=[0.5, 0.5], mu=[0.5, 0.5], sigma=[0.1, 0.1]) }) agent = college_admission_jury.NaiveJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0, burnin=9, freeze_classifier_after_burnin=True) test_util.run_test_simulation(env=env, agent=agent, num_steps=10, stackelberg=True) learned_threshold = env.history[-1].action['threshold'] self.assertTrue(np.isclose(learned_threshold, 0.55, atol=1e-2))
def test_simple_classifier_simulation_runs_successfully(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.NaiveJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7) test_util.run_test_simulation(env=env, agent=agent, stackelberg=True)
def test_jury_successfully_initializes(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.NaiveJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7) self.assertEqual(agent.initial_action()['threshold'], 0.7) self.assertEqual(agent.initial_action()['epsilon_prob'], 0)
def test_label_fn_returns_correct_labels(self): """Checks that the label function works as expected.""" observations = { 'test_scores_y': [0.2, 0.3, 0.4, 0.5, 0.6], 'selected_ground_truth': [1, 0, 2, 1, 2], 'selected_applicants': [1, 1, 0, 1, 0] } env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.NaiveJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7) labels = agent._label_fn(observations) self.assertListEqual(labels, [1, 0, 1])
def test_get_default_features_returns_same_features(self): """Checks that the feature selection fn works as expected.""" observations = { 'test_scores_y': [0.2, 0.3, 0.4, 0.5, 0.6], 'selected_ground_truth': [1, 0, 2, 1, 2], 'selected_applicants': [1, 1, 0, 1, 0] } env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.NaiveJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7) features = agent._get_default_features(observations) self.assertListEqual(features, [0.2, 0.3, 0.5])
def test_agent_returns_same_threshold_till_burnin_learns_and_freezes(self): """Tests that agent returns same threshold till burnin and freezes after.""" env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.NaiveJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.3, burnin=4, freeze_classifier_after_burnin=True) test_util.run_test_simulation(env=env, agent=agent, num_steps=10, stackelberg=True) actions = [float(action['threshold']) for _, action in env.history] self.assertEqual(set(actions[:4]), {0.3}) self.assertLen(set(actions), 3)