def test_mitigation_quick_start_example(self): # Data labels = [1, 1, 0, 1, 0, 0, 1, 0] predictions = [0, 0, 0, 1, 1, 1, 1, 0] likelihoods = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.1] is_member = [0, 0, 0, 0, 1, 1, 1, 1] # Bias Mitigation mitigation = BinaryMitigation.EqualizedOdds() # Training: Learn mixing rates from the labeled data mitigation.fit(labels, predictions, likelihoods, is_member) # Testing: Mitigate bias in predictions fair_predictions, fair_likelihoods = mitigation.transform( predictions, likelihoods, is_member) # Results: Fairness before and after before_scores = BinaryFairnessMetrics().get_all_scores( labels, predictions, is_member) after_scores = BinaryFairnessMetrics().get_all_scores( labels, fair_predictions, is_member) before_scores_check = { 'Average Odds': 0.667, 'Disparate Impact': 3.0, 'Equal Opportunity': 0.667, 'FNR difference': -0.667, 'Generalized Entropy Index': 0.25, 'Predictive Equality': 0.667, 'Statistical Parity': 0.5, 'Theil Index': 0.347 } after_scores_check = { 'Average Odds': 0.0, 'Disparate Impact': 1.0, 'Equal Opportunity': 0.333, 'FNR difference': -0.333, 'Generalized Entropy Index': 0.14, 'Predictive Equality': -0.333, 'Statistical Parity': 0.0, 'Theil Index': 0.193 } self.assertDictEqual(before_scores["Value"].to_dict(), before_scores_check) self.assertDictEqual(after_scores["Value"].to_dict(), after_scores_check)
def _binary_score(self, predictions, is_member): from jurity.fairness import BinaryFairnessMetrics return BinaryFairnessMetrics().DisparateImpact().get_score(predictions, is_member)
def _binary_score(self, predictions, is_member): from jurity.fairness import BinaryFairnessMetrics return BinaryFairnessMetrics().StatisticalParity().get_score(predictions, is_member)
def test_usage_pickle(self): # Fit a mitigation object, serialize to pickle # Then unserialize pickle and transform input # It should give the same results from using without pickling df = self.generate_random_population(num_individuals=500, threshold=0.5) np.random.seed(Constants.default_seed) # Randomly split the data into two sets, one for computing the mixing rate and one evaluating the fairness order = np.random.permutation(len(df)) train_indices = order[0::2] test_indices = order[1::2] train_data = df.iloc[train_indices].copy() test_data = df.iloc[test_indices].copy() # Mitigation object mitigation = BinaryMitigation.EqualizedOdds( seed=Constants.default_seed) # Train data train_labels = train_data.label.to_numpy() train_predictions = train_data.y_pred_binary.to_numpy() train_likelihoods = train_data.prediction.to_numpy() train_is_member = train_data.group.to_numpy() # Fit mitigation.fit(train_labels, train_predictions, train_likelihoods, train_is_member) # Pickle model object bytes_io = io.BytesIO() pickle.dump(mitigation, bytes_io, pickle.HIGHEST_PROTOCOL) del mitigation gc.collect() bytes_io.seek(0) mitigation = pickle.load(bytes_io) # Test data test_labels = test_data.label.to_numpy() test_predictions = test_data.y_pred_binary.to_numpy() test_likelihoods = test_data.prediction.to_numpy() test_is_member = test_data.group.to_numpy() fair_predictions, fair_likelihoods = mitigation.transform( test_predictions, test_likelihoods, test_is_member) fair_df = pd.DataFrame.from_dict({ 'label': test_labels, 'group': test_is_member, 'y_pred': fair_predictions.astype(int) }) fairness_metrics = BinaryFairnessMetrics().get_all_scores( fair_df.label.tolist(), fair_df.y_pred.tolist(), fair_df.group.tolist()).reset_index() ao = fairness_metrics.loc[fairness_metrics.Metric == 'Average Odds', 'Value'].to_numpy().item() di = fairness_metrics.loc[fairness_metrics.Metric == 'Disparate Impact', 'Value'].to_numpy().item() eq = fairness_metrics.loc[fairness_metrics.Metric == 'Equal Opportunity', 'Value'].to_numpy().item() fnr = fairness_metrics.loc[fairness_metrics.Metric == 'FNR difference', 'Value'].to_numpy().item() gei = fairness_metrics.loc[fairness_metrics.Metric == 'Generalized Entropy Index', 'Value'].to_numpy().item() pe = fairness_metrics.loc[fairness_metrics.Metric == 'Predictive Equality', 'Value'].to_numpy().item() sp = fairness_metrics.loc[fairness_metrics.Metric == 'Statistical Parity', 'Value'].to_numpy().item() ti = fairness_metrics.loc[fairness_metrics.Metric == 'Theil Index', 'Value'].to_numpy().item() self.assertEqual(ao, 0.049) self.assertEqual(di, 1.072) self.assertEqual(eq, 0.060) self.assertEqual(fnr, -0.060) self.assertEqual(gei, 0.102) self.assertEqual(pe, 0.037) self.assertEqual(sp, 0.062) self.assertEqual(ti, 0.133)
def test_mitigation_fit_transform(self): # Randomly generated 500 data points with biased outcome df = self.generate_random_population(num_individuals=500, threshold=0.5) # Mitigation mitigation = BinaryMitigation.EqualizedOdds( seed=Constants.default_seed) # Train data labels = df.label.to_numpy() predictions = df.y_pred_binary.to_numpy() likelihoods = df.prediction.to_numpy() is_member = df.group.to_numpy() np.random.seed(Constants.default_seed) fair_predictions, fair_likelihoods = mitigation.fit_transform( labels, predictions, likelihoods, is_member) fair_df = pd.DataFrame.from_dict({ 'label': labels, 'group': is_member, 'y_pred': (fair_likelihoods > 0.5) * 1 }) fairness_metrics = BinaryFairnessMetrics().get_all_scores( fair_df.label.tolist(), fair_df.y_pred.tolist(), fair_df.group.tolist()).reset_index() ao = fairness_metrics.loc[fairness_metrics.Metric == 'Average Odds', 'Value'].to_numpy().item() di = fairness_metrics.loc[fairness_metrics.Metric == 'Disparate Impact', 'Value'].to_numpy().item() eq = fairness_metrics.loc[fairness_metrics.Metric == 'Equal Opportunity', 'Value'].to_numpy().item() fnr = fairness_metrics.loc[fairness_metrics.Metric == 'FNR difference', 'Value'].to_numpy().item() gei = fairness_metrics.loc[fairness_metrics.Metric == 'Generalized Entropy Index', 'Value'].to_numpy().item() pe = fairness_metrics.loc[fairness_metrics.Metric == 'Predictive Equality', 'Value'].to_numpy().item() sp = fairness_metrics.loc[fairness_metrics.Metric == 'Statistical Parity', 'Value'].to_numpy().item() ti = fairness_metrics.loc[fairness_metrics.Metric == 'Theil Index', 'Value'].to_numpy().item() self.assertEqual(ao, 0.056) self.assertEqual(di, 1.059) self.assertEqual(eq, 0.046) self.assertEqual(fnr, -0.046) self.assertEqual(gei, 0.091) self.assertEqual(pe, 0.066) self.assertEqual(sp, 0.052) self.assertEqual(ti, 0.113)
def test_numerical_stability_bias_mitigation(self): # Randomly generated 500 data points with biased outcome df = self.generate_random_population(num_individuals=500, threshold=0.5) np.random.seed(Constants.default_seed) # Randomly split the data into two sets, one for computing the mixing rate, and one evaluating the fairness order = np.random.permutation(len(df)) train_indices = order[0::2] test_indices = order[1::2] train_data = df.iloc[train_indices].copy() test_data = df.iloc[test_indices].copy() # Mitigation object mitigation = BinaryMitigation.EqualizedOdds( seed=Constants.default_seed) # Train data train_labels = train_data.label.to_numpy() train_predictions = train_data.y_pred_binary.to_numpy() train_likelihoods = train_data.prediction.to_numpy() train_is_member = train_data.group.to_numpy() # Fit mitigation.fit(train_labels, train_predictions, train_likelihoods, train_is_member) # Test data test_labels = test_data.label.to_numpy() test_predictions = test_data.y_pred_binary.to_numpy() test_likelihoods = test_data.prediction.to_numpy() test_is_member = test_data.group.to_numpy() fair_predictions, fair_likelihoods = mitigation.transform( test_predictions, test_likelihoods, test_is_member) # Evaluate prior to mitigation test_data['y_pred'] = test_data['prediction'].apply(lambda x: (x > 0.5) * 1) fairness_metrics = BinaryFairnessMetrics().get_all_scores( test_data.label.tolist(), test_data.y_pred.tolist(), test_data.group.tolist()).reset_index() ao = fairness_metrics.loc[fairness_metrics.Metric == 'Average Odds', 'Value'].to_numpy().item() di = fairness_metrics.loc[fairness_metrics.Metric == 'Disparate Impact', 'Value'].to_numpy().item() eq = fairness_metrics.loc[fairness_metrics.Metric == 'Equal Opportunity', 'Value'].to_numpy().item() fnr = fairness_metrics.loc[fairness_metrics.Metric == 'FNR difference', 'Value'].to_numpy().item() gei = fairness_metrics.loc[fairness_metrics.Metric == 'Generalized Entropy Index', 'Value'].to_numpy().item() pe = fairness_metrics.loc[fairness_metrics.Metric == 'Predictive Equality', 'Value'].to_numpy().item() sp = fairness_metrics.loc[fairness_metrics.Metric == 'Statistical Parity', 'Value'].to_numpy().item() ti = fairness_metrics.loc[fairness_metrics.Metric == 'Theil Index', 'Value'].to_numpy().item() self.assertEqual(ao, -0.302) self.assertEqual(di, 0.579) self.assertEqual(eq, -0.285) self.assertEqual(fnr, 0.285) self.assertEqual(gei, 0.199) self.assertEqual(pe, -0.318) self.assertEqual(sp, -0.311) self.assertEqual(ti, 0.276) # Evaluate post mitigation fair_df = pd.DataFrame.from_dict({ 'label': test_labels, 'group': test_is_member, 'y_pred': (fair_likelihoods > 0.5) * 1 }) fairness_metrics = BinaryFairnessMetrics().get_all_scores( fair_df.label.tolist(), fair_df.y_pred.tolist(), fair_df.group.tolist()).reset_index() ao = fairness_metrics.loc[fairness_metrics.Metric == 'Average Odds', 'Value'].to_numpy().item() di = fairness_metrics.loc[fairness_metrics.Metric == 'Disparate Impact', 'Value'].to_numpy().item() eq = fairness_metrics.loc[fairness_metrics.Metric == 'Equal Opportunity', 'Value'].to_numpy().item() fnr = fairness_metrics.loc[fairness_metrics.Metric == 'FNR difference', 'Value'].to_numpy().item() gei = fairness_metrics.loc[fairness_metrics.Metric == 'Generalized Entropy Index', 'Value'].to_numpy().item() pe = fairness_metrics.loc[fairness_metrics.Metric == 'Predictive Equality', 'Value'].to_numpy().item() sp = fairness_metrics.loc[fairness_metrics.Metric == 'Statistical Parity', 'Value'].to_numpy().item() ti = fairness_metrics.loc[fairness_metrics.Metric == 'Theil Index', 'Value'].to_numpy().item() self.assertEqual(ao, 0.049) self.assertEqual(di, 1.072) self.assertEqual(eq, 0.060) self.assertEqual(fnr, -0.060) self.assertEqual(gei, 0.102) self.assertEqual(pe, 0.037) self.assertEqual(sp, 0.062) self.assertEqual(ti, 0.133) # Use fair predictions instead of fair likelihoods (get the same results) fair_df = pd.DataFrame.from_dict({ 'label': test_labels, 'group': test_is_member, 'y_pred': fair_predictions.astype(int) }) fairness_metrics = BinaryFairnessMetrics().get_all_scores( fair_df.label.tolist(), fair_df.y_pred.tolist(), fair_df.group.tolist()).reset_index() ao = fairness_metrics.loc[fairness_metrics.Metric == 'Average Odds', 'Value'].to_numpy().item() di = fairness_metrics.loc[fairness_metrics.Metric == 'Disparate Impact', 'Value'].to_numpy().item() eq = fairness_metrics.loc[fairness_metrics.Metric == 'Equal Opportunity', 'Value'].to_numpy().item() fnr = fairness_metrics.loc[fairness_metrics.Metric == 'FNR difference', 'Value'].to_numpy().item() gei = fairness_metrics.loc[fairness_metrics.Metric == 'Generalized Entropy Index', 'Value'].to_numpy().item() pe = fairness_metrics.loc[fairness_metrics.Metric == 'Predictive Equality', 'Value'].to_numpy().item() sp = fairness_metrics.loc[fairness_metrics.Metric == 'Statistical Parity', 'Value'].to_numpy().item() ti = fairness_metrics.loc[fairness_metrics.Metric == 'Theil Index', 'Value'].to_numpy().item() self.assertEqual(ao, 0.049) self.assertEqual(di, 1.072) self.assertEqual(eq, 0.060) self.assertEqual(fnr, -0.060) self.assertEqual(gei, 0.102) self.assertEqual(pe, 0.037) self.assertEqual(sp, 0.062) self.assertEqual(ti, 0.133)