def explain(self, request: Dict) -> Dict: inputs = request["instances"] predictions = np.array(request["outputs"]) dataframe_predicted = pd.DataFrame(inputs, columns=self.feature_names) dataframe_predicted[self.label_names[0]] = predictions dataset_predicted = BinaryLabelDataset( favorable_label=self.favorable_label, unfavorable_label=self.unfavorable_label, df=dataframe_predicted, label_names=self.label_names, protected_attribute_names=['age']) metrics = BinaryLabelDatasetMetric( dataset_predicted, unprivileged_groups=self.unprivileged_groups, privileged_groups=self.privileged_groups) return { "predictions": predictions.tolist(), "metrics": { "base_rate": metrics.base_rate(), "consistency": metrics.consistency().tolist(), "disparate_impact": metrics.disparate_impact(), "num_instances": metrics.num_instances(), "num_negatives": metrics.num_negatives(), "num_positives": metrics.num_positives(), "statistical_parity_difference": metrics.statistical_parity_difference(), } }
def main() -> None: # read from inventor filepath = ait_input.get_inventory_path('Data') # prepare column names as given by german.doc column_names = [ 'status', 'month', 'credit_history', 'purpose', 'credit_amount', 'savings', 'employment', 'investment_as_income_percentage', 'personal_status', 'other_debtors', 'residence_since', 'property', 'age', 'installment_plans', 'housing', 'number_of_credits', 'skill_level', 'people_liable_for', 'telephone', 'foreign_worker', 'credit' ] # load into a dataframe df = data_loading(filepath=filepath, column_names=column_names, na_values=None) # prepare for mappings mappings = { 'label_maps': [{ 1.0: 'Good Credit', 2.0: 'Bad Credit' }], 'protected_attribute_maps': [{ 1.0: 'Male', 0.0: 'Female' }, { 1.0: 'Old', 0.0: 'Young' }] } # prepare for categorical features categorical_features = [ 'status', 'credit_history', 'purpose', 'savings', 'employment', 'other_debtors', 'property', 'installment_plans', 'housing', 'skill_level', 'telephone', 'foreign_worker' ] # load param protected_attribute = ait_input.get_method_param_value( 'protected_attribute') privileged_classes = ait_input.get_method_param_value('privileged_classes') # input check ait_input_check(protected_attribute, privileged_classes) # prepare for structure from dataframe and edit data features setting dataset = StandardDataset( df=df, label_name='credit', favorable_classes=[1], protected_attribute_names=[protected_attribute], privileged_classes=[lambda x: x >= privileged_classes], instance_weights_name=None, categorical_features=categorical_features, features_to_keep=None, features_to_drop=['personal_status', 'sex'], na_values=None, custom_preprocessing=preprocessing, metadata=mappings) # set two variables for the privileged (1) and unprivileged (0) values for the age attribute. privileged_groups = [{protected_attribute: 1}] unprivileged_groups = [{protected_attribute: 0}] # compute fairness metric on original training dataset metric_fairness = BinaryLabelDatasetMetric( dataset, unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups) print("Original training dataset: German Credit Data") print( "Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_fairness.mean_difference()) print("unprivileged groups = %f" % metric_fairness.base_rate(privileged=False)) print("privileged groups = %f" % metric_fairness.base_rate(privileged=True)) # resource observed_predicted_plot save_metric_fairness_plot(metric_fairness, protected_attribute) # measures measure_mean_difference(metric_fairness.mean_difference()) # ait.log move_log()
unprivileged_groups=unpriv_group, privileged_groups=priv_group, ) tpr_difference = metric_test_data.true_positive_rate_difference() tpr_priviledged = metric_test_data.true_positive_rate(True) tpr_unpriviledged = metric_test_data.true_positive_rate(False) return tpr_difference, tpr_priviledged, tpr_unpriviledged compute_statistical_parity(test_data.convert_to_dataframe()[0], unpriv_group, priv_group) # %% [markdown] # To be clear, because we're looking at the original label distribution this is the base rate difference between the two groups # %% metric_test_data.base_rate(False) # Base rate of the unprivileged group # %% metric_test_data.base_rate(True) # Base rate of the privileged group # %% [markdown] # To explore the data, it can also help to convert it to a dataframe. # Note that we get the same numbers as the reported base rates above, # but because when calculating base rates the favorable label is taken (which is actually 0), it's 1-... # %% [markdown] # **Report** # # Report basic statistics in your report, such as the size of the training and test set. # # Now let's explore the *training* data further.