def __init__(self, step_name, step, df, sensitive_att, target_col, input_score=True, clf_threshold=0.5): """ :param step_name: str, name of the current input step. :param step: object of the initialized class. :param df: pandas dataframe, stores the data. :param sensitive_att: str, the name of a sensitive attribute. :param target_col: str, the name of the target attribute. :param input_score: boolean, represent whether the post-processor takes predicted score as input. Default is True. :param clf_threshold: float in [0, 1], represents the threshold to categorize class labels from predicted scores. """ if "pred_"+target_col not in df.columns: print("Require the predictions for ",target_col, " existing in the data!") raise ValueError super().__init__(step_name=step_name, df=df, sensitive_att=sensitive_att, target_col=target_col) # assume the data set has been encoded to numerical values, # intitialize a BinaryLabelDataset from AIF 360 aif_true_df = BinaryLabelDataset(df=df.drop(columns=["pred_"+target_col]), label_names=[target_col], protected_attribute_names=[sensitive_att]) aif_pred_df = aif_true_df.copy() if input_score: aif_pred_df.scores = df["pred_"+target_col] else: aif_pred_df.labels = np.array([int(x >= clf_threshold) for x in df["pred_"+target_col]]) self.input_score = input_score self.step = step.fit(aif_true_df, aif_pred_df) self.clf_threshold = clf_threshold
def fairness_IBM(y_pred, Ztr, ytr, verbose=0): from aif360.datasets import BinaryLabelDataset from aif360.metrics import ClassificationMetric assert np.array_equal(np.unique(Ztr), np.array([0, 1])), "Z must contain either 0 or 1" # if len(ytr.shape) == 1: # ytr = np.expand_dims(ytr, -1) Ztr = np.squeeze(Ztr) if verbose: print(ytr.shape) print(Ztr.shape) unprivileged_groups = [{"zs": [0]}] privileged_groups = [{"zs": [1]}] metric_arrs = defaultdict(list) dict_ = {"y_true": ytr, "zs": Ztr} df = pd.DataFrame(dict_) dataset = BinaryLabelDataset(df=df, label_names=["y_true"], protected_attribute_names=["zs"], unprivileged_protected_attributes=[[0]], privileged_protected_attributes=[[1]]) dataset_pred = dataset.copy() dataset_pred.labels = y_pred metric = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) # metric_arrs['bal_acc'].append((metric.true_positive_rate() # + metric.true_negative_rate()) / 2) metric_arrs["EA"].append( metric.accuracy(privileged=False) - metric.accuracy(privileged=True)) # ASSUMING ALL OTHER METRICS RETURN U - P metric_arrs['EO'].append(metric.average_odds_difference()) # The ideal value of this metric is 1.0 # A value < 1 implies higher benefit for the privileged group # and a value >1 implies a higher metric_arrs['DI'].append(metric.disparate_impact() - 1) metric_arrs['DP'].append(metric.statistical_parity_difference()) metric_arrs['EQ'].append(metric.equal_opportunity_difference()) metric_arrs['TH'].append(metric.between_group_theil_index() * 10) results = pd.DataFrame(metric_arrs) return results