def calc_stat_parity(data, target_variable, protected_variable, unprivileged_input): df_aif = BinaryLabelDataset(df=data, label_names=[target_variable], protected_attribute_names=[protected_variable]) privileged_group = [] for v in data[protected_variable].unique()[data[protected_variable].unique() != unprivileged_input]: privileged_group.append({protected_variable: v}) unprivileged_group = [{protected_variable: unprivileged_input}] #female=0 metric_orig = BinaryLabelDatasetMetric(df_aif, unprivileged_group, privileged_group) print(metric_orig.statistical_parity_difference().round(3)) if abs(metric_orig.statistical_parity_difference().round(3)) < 0.1: print('The algorithm can be considered to be not biased') else: print('There is a potential bias')
def compute_statistical_parity(data, unpriv_group, priv_group): if isinstance(data, pd.DataFrame): transformed_data = BinaryLabelDataset( df=data, label_names=["two_year_recid"], protected_attribute_names=["race"], favorable_label=0, unfavorable_label=1) else: transformed_data = data metric_test_data = BinaryLabelDatasetMetric( transformed_data, unprivileged_groups=unpriv_group, privileged_groups=priv_group) parity_difference = metric_test_data.statistical_parity_difference() print( f"Mean difference (statistical parity difference) = {parity_difference}" ) return parity_difference
def explain(self, request: Dict) -> Dict: inputs = request["instances"] predictions = np.array(request["outputs"]) dataframe_predicted = pd.DataFrame(inputs, columns=self.feature_names) dataframe_predicted[self.label_names[0]] = predictions dataset_predicted = BinaryLabelDataset( favorable_label=self.favorable_label, unfavorable_label=self.unfavorable_label, df=dataframe_predicted, label_names=self.label_names, protected_attribute_names=['age']) metrics = BinaryLabelDatasetMetric( dataset_predicted, unprivileged_groups=self.unprivileged_groups, privileged_groups=self.privileged_groups) return { "predictions": predictions.tolist(), "metrics": { "base_rate": metrics.base_rate(), "consistency": metrics.consistency().tolist(), "disparate_impact": metrics.disparate_impact(), "num_instances": metrics.num_instances(), "num_negatives": metrics.num_negatives(), "num_positives": metrics.num_positives(), "statistical_parity_difference": metrics.statistical_parity_difference(), } }
% (train_data.privileged_protected_attributes, train_data.unprivileged_protected_attributes)) print("Feature names: %s" % train_data.feature_names) # %% [markdown] # Now, let's take a look at the test data and compute the following difference: # # $$𝑃(𝑌=favorable|𝐷=unprivileged)−𝑃(𝑌=favorable|𝐷=privileged)$$ # # %% metric_test_data = BinaryLabelDatasetMetric(test_data, unprivileged_groups=unpriv_group, privileged_groups=priv_group) print("Mean difference (statistical parity difference) = %f" % metric_test_data.statistical_parity_difference()) def compute_statistical_parity(data, unpriv_group, priv_group): if isinstance(data, pd.DataFrame): transformed_data = BinaryLabelDataset( df=data, label_names=["two_year_recid"], protected_attribute_names=["race"], favorable_label=0, unfavorable_label=1) else: transformed_data = data metric_test_data = BinaryLabelDatasetMetric( transformed_data,
def get_bldm_metrics(): metric_BLDM = BinaryLabelDatasetMetric( dataset, unprivileged_group, privileged_group) return {"Statistical Parity Difference": metric_BLDM.statistical_parity_difference(), "Disparate Impact": metric_BLDM.disparate_impact()}