def test_mitigation_quick_start_example(self): # Data labels = [1, 1, 0, 1, 0, 0, 1, 0] predictions = [0, 0, 0, 1, 1, 1, 1, 0] likelihoods = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.1] is_member = [0, 0, 0, 0, 1, 1, 1, 1] # Bias Mitigation mitigation = BinaryMitigation.EqualizedOdds() # Training: Learn mixing rates from the labeled data mitigation.fit(labels, predictions, likelihoods, is_member) # Testing: Mitigate bias in predictions fair_predictions, fair_likelihoods = mitigation.transform( predictions, likelihoods, is_member) # Results: Fairness before and after before_scores = BinaryFairnessMetrics().get_all_scores( labels, predictions, is_member) after_scores = BinaryFairnessMetrics().get_all_scores( labels, fair_predictions, is_member) before_scores_check = { 'Average Odds': 0.667, 'Disparate Impact': 3.0, 'Equal Opportunity': 0.667, 'FNR difference': -0.667, 'Generalized Entropy Index': 0.25, 'Predictive Equality': 0.667, 'Statistical Parity': 0.5, 'Theil Index': 0.347 } after_scores_check = { 'Average Odds': 0.0, 'Disparate Impact': 1.0, 'Equal Opportunity': 0.333, 'FNR difference': -0.333, 'Generalized Entropy Index': 0.14, 'Predictive Equality': -0.333, 'Statistical Parity': 0.0, 'Theil Index': 0.193 } self.assertDictEqual(before_scores["Value"].to_dict(), before_scores_check) self.assertDictEqual(after_scores["Value"].to_dict(), after_scores_check)
def test_gei_alpha_more_than_one(self): np.random.seed(1) # test bounds metric = BinaryFairnessMetrics.GeneralizedEntropyIndex() my_dict = {} for _ in range(1000): random = np.random.choice([0, 1], 10) if str(random) in my_dict: continue else: my_dict[str(random)] = (random[:5], random[5:]) with self.assertWarns( RuntimeWarning): # division by 0 in certain instances. # enumerate through all the combinations alpha = 3 results = [] vals = [] for y_true, y_pred in my_dict.values(): vals.append([y_true, y_pred]) results.append(metric.get_score(y_true, y_pred, alpha=alpha)) assert min(results) == 0 assert max(results) == (np.power(5, alpha - 1) - 1) / (alpha * (alpha - 1))
def test_theil_boundaries(self): np.random.seed(1) # test bounds metric = BinaryFairnessMetrics.TheilIndex() my_dict = {} for _ in range(1000): random = np.random.choice([0, 1], 10) if str(random) in my_dict: continue else: my_dict[str(random)] = (random[:5], random[5:]) with self.assertWarns( RuntimeWarning): # division by 0 in certain instances. # enumerate through all the combinations # test theil as well results_thiel = [] vals = [] for y_true, y_pred in my_dict.values(): vals.append([y_true, y_pred]) results_thiel.append(metric.get_score(y_true, y_pred)) assert min(results_thiel) == 0 assert max(results_thiel) == np.log(5)
def test_fairness_quick_start_example(self): # Data binary_predictions = [1, 1, 0, 1, 0, 0] multi_class_predictions = ["a", "b", "c", "b", "a", "a"] multi_class_multi_label_predictions = [["a", "b"], ["b", "c"], ["b"], ["a", "b"], ["c", "a"], ["c"]] is_member = [0, 0, 0, 1, 1, 1] classes = ["a", "b", "c"] # Metric (see also other available metrics) metric = BinaryFairnessMetrics.StatisticalParity() multi_metric = MultiClassFairnessMetrics.StatisticalParity( list_of_classes=classes) # Score binary_score = metric.get_score(binary_predictions, is_member) multi_scores = multi_metric.get_scores(multi_class_predictions, is_member) multi_label_scores = multi_metric.get_scores( multi_class_multi_label_predictions, is_member) # Results self.assertEqual( metric.description, "Measures the difference in statistical parity between two groups") self.assertEqual(metric.lower_bound, -0.2) self.assertEqual(metric.upper_bound, 0.2) self.assertEqual(metric.ideal_value, 0) self.assertEqual(binary_score, -0.3333333333333333) self.assertListEqual(multi_scores, [0.3333333333333333, 0.0, -0.3333333333333333]) self.assertListEqual( multi_label_scores, [0.3333333333333333, -0.6666666666666667, 0.3333333333333333])
def test_disp_impact_multilabel(self): # Group membership is_member = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0] # Predictions - 3 classes y_pred = [['a', 'b'], ['b', 'c'], ['b'], ['a', 'b'], ['c', 'a'], ['c'], ['a', 'b'], [], ['a', 'b'], ['c']] # classes for multi-class classification classes = ['a', 'b', 'c'] # Multiclass Fairness Metric multi_metric = MultiClassFairnessMetrics.DisparateImpact( list_of_classes=classes) result = multi_metric.get_scores(y_pred, is_member) one_hot = multi_metric._one_hot_encode_classes(y_pred) binary_metric = BinaryFairnessMetrics.DisparateImpact() assert np.isclose(binary_metric.get_score(one_hot['a'], is_member), result[0], atol=0.001) assert np.isclose(binary_metric.get_score(one_hot['b'], is_member), result[1], atol=0.001) assert np.isclose(binary_metric.get_score(one_hot['c'], is_member), result[2], atol=0.001)
def test_disp_impact_normal_list(self): # Group membership is_member = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0] # Predictions - 3 classes y_pred = ['a', 'b', 'b', 'a', 'c', 'c', 'a', 'b', 'a', 'c'] # classes for multi-class classification classes = ['a', 'b', 'c'] # Multiclass Fairness Metric multi_metric = MultiClassFairnessMetrics.DisparateImpact( list_of_classes=classes) result = multi_metric.get_scores(y_pred, is_member) # get one-hot encoded 0-1 like arrays for each class y_pred_a = convert_one_vs_rest('a', y_pred) y_pred_b = convert_one_vs_rest('b', y_pred) y_pred_c = convert_one_vs_rest('c', y_pred) # create a binary metric to test whether binary and multiclass give the same output binary_metric = BinaryFairnessMetrics.DisparateImpact() assert binary_metric.get_score(y_pred_a, is_member) == result[0] assert binary_metric.get_score(y_pred_b, is_member) == result[1] assert binary_metric.get_score(y_pred_c, is_member) == result[2]
def test_all_scores_valid(self): # test standard pandas table creation y_true = np.array([1, 1, 1, 0, 1, 0, 1, 1, 1, 1]) y_pred = np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 1]) is_member = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) df = BinaryFairnessMetrics.get_all_scores(y_true, y_pred, is_member) assert type(df) == pd.DataFrame assert self.extract_metric_from_df('Statistical Parity', df) == 0. assert self.extract_metric_from_df('Average Odds', df) == 0.375 assert self.extract_metric_from_df('Disparate Impact', df) == 1. assert self.extract_metric_from_df('FNR difference', df) == 0.25 assert self.extract_metric_from_df('Predictive Equality', df) == 1.00 assert self.extract_metric_from_df('Generalized Entropy Index', df) == 1.375 assert np.isclose(self.extract_metric_from_df('Theil Index', df), 1.263, atol=0.01) attr = 'Ideal Value' assert self.extract_metric_from_df('Average Odds', df, attr) == 0 assert self.extract_metric_from_df('Disparate Impact', df, attr) == 1 assert self.extract_metric_from_df('Equal Opportunity', df, attr) == 0 assert self.extract_metric_from_df('FNR difference', df, attr) == 0 assert self.extract_metric_from_df('Generalized Entropy Index', df, attr) == 0 assert self.extract_metric_from_df('Predictive Equality', df, attr) == 0 assert self.extract_metric_from_df('Statistical Parity', df, attr) == 0 assert self.extract_metric_from_df('Theil Index', df, attr) == 0 attr = 'Lower Bound' assert self.extract_metric_from_df('Average Odds', df, attr) == -.2 assert self.extract_metric_from_df('Disparate Impact', df, attr) == .8 assert self.extract_metric_from_df('Equal Opportunity', df, attr) == -.2 assert self.extract_metric_from_df('FNR difference', df, attr) == -.2 assert self.extract_metric_from_df('Generalized Entropy Index', df, attr) == 0. assert self.extract_metric_from_df('Predictive Equality', df, attr) == -.2 assert self.extract_metric_from_df('Statistical Parity', df, attr) == -.2 assert self.extract_metric_from_df('Theil Index', df, attr) == 0. attr = 'Upper Bound' assert self.extract_metric_from_df('Average Odds', df, attr) == .2 assert self.extract_metric_from_df('Disparate Impact', df, attr) == 1.2 assert self.extract_metric_from_df('Equal Opportunity', df, attr) == .2 assert self.extract_metric_from_df('FNR difference', df, attr) == .2 assert not np.isfinite( self.extract_metric_from_df('Generalized Entropy Index', df, attr)) assert self.extract_metric_from_df('Predictive Equality', df, attr) == .2 assert self.extract_metric_from_df('Statistical Parity', df, attr) == .2 assert not np.isfinite( self.extract_metric_from_df('Theil Index', df, attr))
def test_theil_normal_invalid(self): # Metric metric = BinaryFairnessMetrics.TheilIndex() y_true = [0, 1, 1, 0, 1, 1, 1, 0, 1, -1] y_pred = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] with self.assertRaises(ValueError): metric.get_score(y_true, y_pred)
def test_gei_normal_invalid(self): # Metric metric = BinaryFairnessMetrics.GeneralizedEntropyIndex() y_true = np.array([0, 1, 1, 0, 1, 1, 1, 0, 1, -1]) y_pred = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) with self.assertRaises(ValueError): metric.get_score(y_true, y_pred)
def test_pred_equality_edge_5(self): # edge case of - 1 y_pred = np.array([0, 0, 0, 0, 1, 1, 1, 1]) y_true = np.array([0, 0, 0, 1, 1, 1, 1, 0]) is_member = np.array([1, 1, 1, 1, 0, 0, 0, 0]) metric = BinaryFairnessMetrics.PredictiveEquality() assert metric.get_score(y_true, y_pred, is_member) == -1
def test_stat_parity_edge_2(self): # Data: edge case stat parity == -1 y_pred = np.array([1, 1, 1, 1, 0, 0, 0, 0]) is_member = np.array([0, 0, 0, 0, 1, 1, 1, 1]) # Metric metric = BinaryFairnessMetrics.StatisticalParity() score = metric.get_score(y_pred, is_member) assert score == -1
def test_disp_impact_normal_list(self): # Metric metric = BinaryFairnessMetrics.DisparateImpact() # Data is_member = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) # test a medium number y_pred = np.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0]) assert metric.get_score(y_pred, is_member) == 2
def test_theil_normal_list(self): # Metric metric = BinaryFairnessMetrics.TheilIndex() y_true = [0, 1, 1, 0, 1, 1, 1, 0, 1, 0] y_pred = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] score = metric.get_score(y_true, y_pred) assert isinstance(score, float) assert np.isclose(score, 0.413, atol=0.01)
def test_disp_impact_edge3(self): # Metric metric = BinaryFairnessMetrics.DisparateImpact() # Data is_member = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) # test 1 y_pred = np.array([0, 0, 0, 0, 1, 1, 0, 0, 0, 0]) assert metric.get_score(y_pred, is_member) == 1
def test_gei_normal_list(self): # Metric metric = BinaryFairnessMetrics.GeneralizedEntropyIndex() y_true = [0, 1, 1, 0, 1, 1, 1, 0, 1, 0] y_pred = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] score = metric.get_score(y_true, y_pred) assert isinstance(score, float) assert np.isclose(score, 0.302, atol=0.01)
def test_avg_odds_diff_edge_4(self): # Data: edge case of 1 y_true = np.array([0, 0, 0, 1, 1, 1, 1, 0]) y_pred = np.array([1, 1, 1, 1, 0, 0, 0, 0]) is_member = np.array([1, 1, 1, 1, 0, 0, 0, 0]) # Metric metric = BinaryFairnessMetrics.AverageOdds() # Score assert metric.get_score(y_true, y_pred, is_member) == 1
def test_fnr_diff_edge2(self): # Metric metric = BinaryFairnessMetrics.FNRDifference() # edge case of -1 y_true = np.array([0, 1, 1, 0, 1, 1, 1, 0, 1, 0]) y_pred = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) is_member = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) assert metric.get_score(y_true, y_pred, is_member) == -1
def test_fnr_diff_normal_invalid(self): # Metric metric = BinaryFairnessMetrics.FNRDifference() # Data y_true = np.array([0, 1, 1, 0, 1, 1, 1, 0, 1, 2]) y_pred = np.array([0, 0, 1, 0, 0, 1, 1, 1, 0, 0]) is_member = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) with self.assertRaises(ValueError): metric.get_score(y_true, y_pred, is_member)
def test_stat_parity_invalid_np(self): # Data y_pred = np.array([1, 1, 0, 1, 0]) is_member = np.array([0, 0, 0, 1, 1, 1]) # Metric metric = BinaryFairnessMetrics.StatisticalParity() # Score with self.assertRaises(InputShapeError): metric.get_score(y_pred, is_member)
def test_pred_equality_edge_1(self): # Data: edge case - homogeneous ground truth within group - returns None # unprivileged homogeneous y_true = np.array([0, 0, 0, 1, 1, 1, 1, 1]) y_pred = np.array([0, 0, 0, 0, 0, 0, 0, 0]) is_member = np.array([1, 1, 1, 1, 0, 0, 0, 0]) # Metric metric = BinaryFairnessMetrics.PredictiveEquality() with self.assertWarns(UserWarning): assert metric.get_score(y_true, y_pred, is_member) is None
def test_pred_equality_edge_4(self): # Data: edge case - homogeneous ground truth within group - returns None # edge case of 1 y_true = np.array([0, 0, 0, 1, 1, 1, 1, 0]) y_pred = np.array([1, 1, 1, 1, 0, 0, 0, 0]) is_member = np.array([1, 1, 1, 1, 0, 0, 0, 0]) # Metric metric = BinaryFairnessMetrics.PredictiveEquality() # Score assert metric.get_score(y_true, y_pred, is_member) == 1
def test_disp_impact_edge2(self): # Metric metric = BinaryFairnessMetrics.DisparateImpact() # Data is_member = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) # test no positives in unprotected y_pred = np.array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0]) with self.assertWarns(UserWarning): assert metric.get_score(y_pred, is_member) == 1
def test_equal_opp_normal_invalid(self): # Data y_true = np.array([1, 0, 0, 0, 1, 1, 0, 2]) y_pred = np.array([0, 1, 1, 1, 1, 1, 1, 0]) is_member = np.array([1, 1, 1, 1, 0, 0, 0, 0]) # Metric metric = BinaryFairnessMetrics.EqualOpportunity() # Score with self.assertRaises(ValueError): metric.get_score(y_true, y_pred, is_member)
def test_fnr_diff_normal_list(self): # Metric metric = BinaryFairnessMetrics.FNRDifference() # Data y_true = [0, 1, 1, 0, 1, 1, 1, 0, 1, 0] y_pred = [0, 0, 1, 0, 0, 1, 1, 1, 0, 0] is_member = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) assert np.isclose(metric.get_score(y_true, y_pred, is_member), 0.333, atol=0.001)
def test_disp_impact_normal_df(self): # Metric metric = BinaryFairnessMetrics.DisparateImpact() # medium number my_df = pd.DataFrame.from_dict({ 'y_pred': [0, 0, 0, 1, 1, 1, 0, 0, 0, 0], 'is_member': [1, 1, 1, 1, 1, 0, 0, 0, 0, 0] }) # Score assert metric.get_score(my_df['y_pred'], my_df['is_member']) == 2
def test_stat_parity_normal_list(self): # Data y_pred = [1, 1, 0, 1, 0, 0] is_member = [0, 0, 0, 1, 1, 1] # Metric metric = BinaryFairnessMetrics.StatisticalParity() # Score score = metric.get_score(y_pred, is_member) assert np.isclose(score, -0.3333, atol=0.001)
def test_binary_matches_multiclass_disp_impact(self): binary_predictions = [0, 1, 0, 0, 1, 1] is_member = [0, 1, 1, 0, 0, 1] metric = BinaryFairnessMetrics.DisparateImpact() score = metric.get_score(binary_predictions, is_member) classes = [0, 1] multi_metric = MultiClassFairnessMetrics.DisparateImpact( list_of_classes=classes) multi_score = multi_metric.get_scores(binary_predictions, is_member) assert score == multi_score[1]
def test_equal_opp_edge_3(self): # Data: homogeneous both groups in ground truth - returns nan y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1]) y_pred = np.array([0, 1, 1, 1, 1, 1, 1, 0]) is_member = np.array([1, 1, 1, 1, 0, 0, 0, 0]) # Metric metric = BinaryFairnessMetrics.EqualOpportunity() with self.assertWarns( UserWarning): # division by zero caught inside numpy metric.get_score(y_true, y_pred, is_member)
def test_stat_parity_invalid_df(self): # Data my_df = pd.DataFrame.from_dict({ 'y_pred': [1, 1, 0, 1, 0, 2], 'is_member': [0, 0, 0, 1, 1, 1] }) # Metric metric = BinaryFairnessMetrics.StatisticalParity() # Score with self.assertRaises(ValueError): metric.get_score(my_df['y_pred'], my_df['is_member'])
def test_theil_normal_df(self): my_df = pd.DataFrame.from_dict({ 'y_true': [0, 1, 1, 0, 1, 1, 1, 0, 1, 0], 'y_pred': [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] }) # Metric metric = BinaryFairnessMetrics.TheilIndex() # Score score = metric.get_score(my_df['y_true'], my_df['y_pred']) assert isinstance(score, float) assert np.isclose(score, 0.413, atol=0.01)