def scenario_builder(self): """Returns an agent and environment pair.""" env_params = lending_params.DelayedImpactParams( applicant_distribution=lending_params.two_group_credit_clusters( cluster_probabilities=self.cluster_probabilities, group_likelihoods=[self.group_0_prob, 1 - self.group_0_prob]), bank_starting_cash=self.bank_starting_cash, interest_rate=self.interest_rate, cluster_shift_increment=self.cluster_shift_increment, ) env = lending.DelayedImpactEnv(env_params) agent_params = classifier_agents.ScoringAgentParams( feature_keys=['applicant_features'], group_key='group', default_action_fn=(lambda: 1), burnin=self.burnin, convert_one_hot_to_integer=True, threshold_policy=self.threshold_policy, skip_retraining_fn=lambda action, observation: action == 0, cost_matrix=params.CostMatrix( fn=0, fp=-1, tp=env_params.interest_rate, tn=0)) agent = oracle_lending_agent.OracleThresholdAgent( action_space=env.action_space, reward_fn=rewards.BinarizedScalarDeltaReward( 'bank_cash', baseline=env.initial_params.bank_starting_cash), observation_space=env.observation_space, params=agent_params, env=env) agent.seed(100) return env, agent
class ScoringAgentParams(core.Params): """Parameter class for ScoringAgents.""" default_action_fn = attr.ib() # type: Callable[[], Any] feature_keys = attr.ib(factory=list) # type: List[Text] # Some environment use features which are one-hot and can be "thresholded" # by converting the one-hot vector to an integer and applying the threshold # in that way. convert_one_hot_to_integer = attr.ib(default=False) # Whether to continue training. Classifiers will still accumulate labeled # data while they are frozen. freeze_classifier_after_burnin = attr.ib(default=False) cost_matrix = attr.ib(default=params.CostMatrix(tp=1, tn=1, fp=-1, fn=-1)) burnin = attr.ib(default=-1) threshold_policy = attr.ib( default=threshold_policies.ThresholdPolicy.SINGLE_THRESHOLD) use_propensity_score_weighting = attr.ib(default=False) group_key = attr.ib(default="") # A function which takes last action and last observation as inputs and # determines whether to skip training the classifier on this step. skip_retraining_fn = attr.ib( default=None) # Optional[Callable[[Any, Dict[Text, Any]], bool]]
def test_oracle_maxutil_classifier_is_stable(self): env = lending.DelayedImpactEnv() agent_params = classifier_agents.ScoringAgentParams( feature_keys=['applicant_features'], group_key='group', default_action_fn=(lambda: 1), burnin=1, threshold_policy=threshold_policies.ThresholdPolicy.SINGLE_THRESHOLD, convert_one_hot_to_integer=True, cost_matrix=params.CostMatrix( fn=0, fp=-1, tp=env.initial_params.interest_rate, tn=0)) agent = oracle_lending_agent.OracleThresholdAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('bank_cash'), params=agent_params, env=env) test_util.run_test_simulation(env=env, agent=agent) # Drop 0 threshold associated with burn-in. first_nonzero_threshold = None for thresh in agent.global_threshold_history: if thresh > 0: if first_nonzero_threshold is None: first_nonzero_threshold = thresh self.assertAlmostEqual(first_nonzero_threshold, thresh) # Make sure there is at least one non-zero threshold. self.assertIsNotNone(first_nonzero_threshold)
def test_confusion_as_array_and_cost_as_array_line_up(self): cost_matrix = params.CostMatrix(tp=1, tn=2, fp=3, fn=4).as_array() confusion_matrix = error_metrics.ConfusionMatrix(tp=1, tn=2, fp=3, fn=4).as_array() self.assertAlmostEqual(np.linalg.norm(cost_matrix - confusion_matrix), 0)
def __init__( self, action_space, reward_fn, observation_space, feature_selection_fn=None, label_fn=None, freeze_classifier_after_burnin=False, threshold=0.5, burnin=-1, cost_matrix=None, epsilon_greedy=False, initial_epsilon_prob=0.7, decay_steps=10, epsilon_prob_decay_rate=0.02, ): """Initializes the jury. Args: action_space: a `gym.Space` that contains valid actions. reward_fn: a `RewardFn` object. observation_space: a `gym.Space` that contains valid observations. feature_selection_fn: Function that returns a feature vector suitable for training from observations. label_fn: Function that returns a label from observations and reward. freeze_classifier_after_burnin: If True, the classifier will freeze classifier after learning a model after burnin steps. threshold: Initial threshold. burnin: Number of steps before using a learned policy. cost_matrix: a fairness_policies.CostMatrix object. epsilon_greedy: Bool. Whether we want to have an epsilon greedy agent. initial_epsilon_prob: Float. Initial value of probablity for an epsilon greedy agent. decay_steps: A positive integer. epsilon_prob_decay_rate: A positive float. """ super(NaiveJury, self).__init__( action_space, reward_fn, observation_space, threshold=threshold, epsilon_greedy=epsilon_greedy, initial_epsilon_prob=initial_epsilon_prob, decay_steps=decay_steps, epsilon_prob_decay_rate=epsilon_prob_decay_rate, ) self._initial_threshold = threshold self._features = [] self._labels = [] self._burnin = burnin self._freeze_classifier_after_burnin = freeze_classifier_after_burnin self._feature_selection_fn = feature_selection_fn or self._get_default_features self._label_fn = label_fn or self._label_fn if not cost_matrix: self._cost_matrix = params.CostMatrix(tp=1, tn=1, fp=-1, fn=-1) else: self._cost_matrix = cost_matrix
def test_weighted_single_threshold(self): # With a vanilla cost matrix, this should result in accepting # only predictions >= 0.75 predictions = np.array([0.25, 0.25, 0.33, 0.33, 0.75, 0.75]) labels = np.array([0, 1, 0, 1, 0, 1]) weights = np.array([3., 1., 2., 1., 1., 3.]) weights = weights / sum(weights) vanilla_cost_matrix = params.CostMatrix(tn=0., fp=-1., fn=0., tp=1.) weighted_threshold = threshold_policies.single_threshold( predictions, labels, weights, vanilla_cost_matrix) self.assertAlmostEqual(weighted_threshold, 0.75)
def test_weighted_single_threshold(self): # Weighted labels/predictions are perfectly calibrated. # With a vanilla cost matrix, this should result in threshold > 0.5 predictions = np.array([0.25, 0.25, 0.5, 0.5, 0.75, 0.75]) labels = np.array([0, 1, 0, 1, 0, 1]) weights = np.array([3., 1., 1., 1., 1., 3.]) weights = weights / sum(weights) vanilla_cost_matrix = params.CostMatrix(tn=1., fp=-1., fn=-1., tp=1.) weighted_threshold = threshold_policies.single_threshold( predictions, labels, weights, vanilla_cost_matrix) self.assertEqual(weighted_threshold, 0.75)
def test_oracle_lending_agent_interacts(self): env = lending.DelayedImpactEnv() agent_params = classifier_agents.ScoringAgentParams( feature_keys=['applicant_features'], group_key='group', default_action_fn=(lambda: 1), burnin=1, convert_one_hot_to_integer=True, cost_matrix=params.CostMatrix( fn=0, fp=-1, tp=env.initial_params.interest_rate, tn=0)) agent = oracle_lending_agent.OracleThresholdAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('bank_cash'), params=agent_params, env=env) test_util.run_test_simulation(env=env, agent=agent)
def __init__( self, action_space, reward_fn, observation_space, group_cost, subsidize=False, subsidy_beta=0.8, gaming_control=np.inf, label_fn=None, burnin=10, epsilon_greedy=False, initial_epsilon_prob=0.7, decay_steps=10, epsilon_prob_decay_rate=0.02, ): super(RobustJury, self).__init__( action_space, reward_fn, observation_space, threshold=0, feature_selection_fn=self._get_maximum_manipulated_features, label_fn=label_fn, cost_matrix=params.CostMatrix(tp=1, tn=1, fp=-1, fn=-1), freeze_classifier_after_burnin=True, burnin=burnin, epsilon_greedy=epsilon_greedy, initial_epsilon_prob=initial_epsilon_prob, decay_steps=decay_steps, epsilon_prob_decay_rate=epsilon_prob_decay_rate, ) self._group_cost = group_cost self._subsidize = subsidize self._subsidy_beta = subsidy_beta self._gaming_control = gaming_control if burnin < 2: raise ValueError( "This agent expects a longer burnin period, to work as expected." )
def test_cost_metric_correct_for_atomic_prediction_rule(self): def _ground_truth_fn(history_item): state, _ = history_item return state.x[0] env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1)) env.set_scalar_reward(rewards.NullReward()) cost_metric = error_metrics.CostedConfusionMetric( env=env, prediction_fn=lambda x: 1, ground_truth_fn=_ground_truth_fn, stratify_fn=lambda x: 1, cost_matrix=params.CostMatrix(tp=1, fp=-2, tn=-1, fn=-1)) measurement = test_util.run_test_simulation( env=env, agent=None, metric=cost_metric) logging.info('Cost measurement: %s.', measurement) self.assertEqual(measurement[1], -5) self.assertNotIn(0, measurement)
def test_as_array(self): array = params.CostMatrix(tp=1, tn=10, fp=3, fn=100).as_array() self.assertAlmostEqual( np.linalg.norm(array - np.array([[10, 3], [100, 1]])), 0)
# Lint as: python2, python3 """Tests for fairness_gym.agents.threshold_policies.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from absl.testing import absltest import params from agents import threshold_policies import numpy as np from six.moves import range from sklearn import metrics as sklearn_metrics COST_MATRIX = params.CostMatrix(tp=1.5, fp=-1.0, fn=-0.3, tn=2.0) EPSILON = 1e-6 class ThresholdPoliciesTest(absltest.TestCase): def test_equality_of_opportunity_holds(self): rng = np.random.RandomState(100) group_a_predictions = rng.rand(100000) group_a_labels = rng.choice([0, 1], p=[0.5, 0.5], size=100000) group_b_predictions = rng.normal(size=100000) group_b_labels = rng.choice([0, 1], p=[0.2, 0.8], size=100000) thresholds = threshold_policies.equality_of_opportunity_thresholds( group_predictions={