def entropy_index(random_data, predicted_data, target_variable, protected_variable, unprivileged_input): random_data['Pred'] = np.random.binomial(1, .5, 1000) dataset = BinaryLabelDataset(df=random_data, label_names=[target_variable], protected_attribute_names=[protected_variable]) classified_dataset = BinaryLabelDataset(df=predicted_data, label_names=[target_variable], protected_attribute_names=[protected_variable]) privileged_group = [] for v in predicted_data[protected_variable].unique()[predicted_data[protected_variable].unique() != unprivileged_input]: privileged_group.append({protected_variable: v}) unprivileged_group = [{protected_variable: unprivileged_input}] #female=0 metric = ClassificationMetric(dataset, classified_dataset, unprivileged_group, privileged_group) print(metric.between_all_groups_generalized_entropy_index(alpha=2)) if abs(metric.between_all_groups_generalized_entropy_index(alpha=2).round(3)) < 0.2: print('The algorithm can be considered to be not biased') else: print('There is a potential bias')
def test_between_all_groups(): data = np.array([[0, 1], [0, 0], [1, 0], [1, 1], [1, 0], [1, 0], [2, 1], [2, 0], [2, 1], [2, 1]]) pred = data.copy() pred[[3, 9], -1] = 0 pred[[4, 5], -1] = 1 df = pd.DataFrame(data, columns=['feat', 'label']) df2 = pd.DataFrame(pred, columns=['feat', 'label']) bld = BinaryLabelDataset(df=df, label_names=['label'], protected_attribute_names=['feat']) bld2 = BinaryLabelDataset(df=df2, label_names=['label'], protected_attribute_names=['feat']) cm = ClassificationMetric(bld, bld2) b = np.array([1, 1, 1.25, 1.25, 1.25, 1.25, 0.75, 0.75, 0.75, 0.75]) assert cm.between_all_groups_generalized_entropy_index( ) == 1 / 20 * np.sum(b**2 - 1)
# (3) think about a way to visualize these values # Statistical Parity difference (SPD) spd_pre_race = fairness_metrics.statistical_parity_difference() # Disparate Impact Ratio dir_pre_race = fairness_metrics.disparate_impact() # Average Odds Difference and Average absolute odds difference aod_pre_race = fairness_metrics.average_odds_difference() aaod_pre_race = fairness_metrics.average_abs_odds_difference() # Equal Opportunity Difference aka true positive rate difference eod_pre_race = fairness_metrics.equal_opportunity_difference() # Generealized entropy index with various alpha's fairness_metrics.between_all_groups_generalized_entropy_index(alpha=2) ClassificationMetric(dataset=bld_true, classified_dataset=bld_pred, unprivileged_groups=None, privileged_groups=None).false_positive_rate() df_fm.head() # TO DELETE # ============================================================================= # bld_pred.align_datasets # bld_true.temporarily_ignore('score_cat') # ============================================================================= # TO DELETE: NOT YET NECESSARY # =============================================================================