def aequitas_group(df, score_column, label_column, protected_class): # To measure Bias towards protected_class, filter DataFrame # to score, label (ground truth), and protected class data_scored = df[[ score_column, label_column, protected_class, ]] # Aequitas expects ground truth under 'label_value' data_scored = data_scored.rename(columns={label_column: "label_value"}) # Process DataFrame data_scored_processed, _ = preprocess_input_df(data_scored) # Group Metrics g = Group() xtab, _ = g.get_crosstabs(data_scored_processed) # Absolute metrics, such as 'tpr', 'tnr','precision', etc. absolute_metrics = g.list_absolute_metrics(xtab) # DataFrame of calculated absolute metrics for each sample population group absolute_metrics_df = xtab[["attribute_name", "attribute_value"] + absolute_metrics].round(2) # For example: """ attribute_name attribute_value tpr tnr ... precision 0 gender female 0.60 0.88 ... 0.75 1 gender male 0.49 0.90 ... 0.64 """ return absolute_metrics_df
def get_bias_metrics(data): bias = Bias() group = Group() old_columns = ['predictions', 'loan_status', 'forty_plus_indicator'] new_columns = ['score', 'label_value', 'forty_plus_indicator'] scored_data = data.loc[:, old_columns] renamer = dict(zip(scored_data.columns, new_columns)) scored_data = scored_data.rename(columns = renamer) data_processed, _ = preprocess_input_df(scored_data) xtab, _ = group.get_crosstabs(data_processed) attribute_columns = ['attribute_name', 'attribute_value'] absolute_metrics = group.list_absolute_metrics(xtab) absolute_metrics_df = xtab[attribute_columns + absolute_metrics].round(2) bias_df = bias.get_disparity_predefined_groups( xtab, original_df=data_processed, ref_groups_dict={'forty_plus_indicator': 'Under Forty'}, alpha=0.05, mask_significance=True ) calculated_disparities = bias.list_disparities(bias_df) disparity_metrics_df = bias_df[attribute_columns + calculated_disparities] abs_metrics = absolute_metrics_df.where(pd.notnull(absolute_metrics_df), None).to_dict(orient='records') disp_metrics = disparity_metrics_df.where(pd.notnull(disparity_metrics_df), None).to_dict(orient='records') return dict(absolute_metrics = abs_metrics, disparity_metrics = disp_metrics)
def metrics(data): data = pd.DataFrame(data) # To measure Bias towards gender, filter DataFrame # to "score", "label_value" (ground truth), and # "gender" (protected attribute) data_scored = data[["score", "label_value", "gender"]] # Process DataFrame data_scored_processed, _ = preprocess_input_df(data_scored) # Group Metrics g = Group() xtab, _ = g.get_crosstabs(data_scored_processed) # Absolute metrics, such as 'tpr', 'tnr','precision', etc. absolute_metrics = g.list_absolute_metrics(xtab) # DataFrame of calculated absolute metrics for each sample population group absolute_metrics_df = xtab[['attribute_name', 'attribute_value'] + absolute_metrics].round(2) # For example: """ attribute_name attribute_value tpr tnr ... precision 0 gender female 0.60 0.88 ... 0.75 1 gender male 0.49 0.90 ... 0.64 """ # Bias Metrics b = Bias() # Disparities calculated in relation gender for "male" and "female" bias_df = b.get_disparity_predefined_groups( xtab, original_df=data_scored_processed, ref_groups_dict={'gender': 'male'}, alpha=0.05, mask_significance=True) # Disparity metrics added to bias DataFrame calculated_disparities = b.list_disparities(bias_df) disparity_metrics_df = bias_df[['attribute_name', 'attribute_value'] + calculated_disparities] # For example: """ attribute_name attribute_value ppr_disparity precision_disparity 0 gender female 0.714286 1.41791 1 gender male 1.000000 1.000000 """ output_metrics_df = disparity_metrics_df # or absolute_metrics_df # Output a JSON object of calculated metrics yield output_metrics_df.to_dict(orient="records")
def preprocess_df(df_pandas, cheat = 1): if cheat: df_pandas = df_pandas.append(df_pandas).append(df_pandas).append(df_pandas).append(df_pandas) df_pandas['label_value'] = np.random.choice([0,1], df_pandas.shape[0]) df_pandas['score'] = np.random.choice([0,1], df_pandas.shape[0]) # preprocess df_pandas['originwac'] = df_pandas['originwac'].astype(str) df_pandas['distance'] = df_pandas['distance'].astype(int) df, _ = preprocess_input_df(df_pandas) return df
def audit(df, configs, model_id=1, preprocessed=False): """ :param df: :param ref_groups_method: :param model_id: :param configs: :param report: :param preprocessed: :return: """ if not preprocessed: df, attr_cols_input = preprocess_input_df(df) if not configs.attr_cols: configs.attr_cols = attr_cols_input g = Group() print('Welcome to Aequitas-Audit') print('Fairness measures requested:', ','.join(configs.fair_measures_requested)) groups_model, attr_cols = g.get_crosstabs( df, score_thresholds=configs.score_thresholds, model_id=model_id, attr_cols=configs.attr_cols) print('audit: df shape from the crosstabs:', groups_model.shape) b = Bias() # todo move this to the new configs object / the attr_cols now are passed through the configs object... ref_groups_method = configs.ref_groups_method if ref_groups_method == 'predefined' and configs.ref_groups: bias_df = b.get_disparity_predefined_groups(groups_model, configs.ref_groups) elif ref_groups_method == 'majority': bias_df = b.get_disparity_major_group(groups_model) else: bias_df = b.get_disparity_min_metric(groups_model) print('Any NaN?: ', bias_df.isnull().values.any()) print('bias_df shape:', bias_df.shape) f = Fairness(tau=configs.fairness_threshold) print('Fairness Threshold:', configs.fairness_threshold) print('Fairness Measures:', configs.fair_measures_requested) group_value_df = f.get_group_value_fairness( bias_df, fair_measures_requested=configs.fair_measures_requested) group_attribute_df = f.get_group_attribute_fairness( group_value_df, fair_measures_requested=configs.fair_measures_requested) fair_results = f.get_overall_fairness(group_attribute_df) print(fair_results) report = None if configs.report is True: report = audit_report_markdown(configs, group_value_df, f.fair_measures_depend, fair_results) return group_value_df, report
def aequitas_bias(df, score_column, label_column, protected_class, reference_group): # To measure Bias towards protected_class, filter DataFrame # to score, label (ground truth), and protected class data_scored = df[ [ score_column, label_column, protected_class, ] ] data_scored = data_scored.rename(columns={label_column: "label_value"}) # Process DataFrame data_scored_processed, _ = preprocess_input_df(data_scored) # Bias Metrics b = Bias() g = Group() xtab, _ = g.get_crosstabs(data_scored_processed) # Disparities calculated in relation <protected_class> for class groups bias_df = b.get_disparity_predefined_groups( xtab, original_df=data_scored_processed, ref_groups_dict={protected_class: reference_group}, alpha=0.05, mask_significance=True, ) # Disparity metrics added to bias DataFrame calculated_disparities = b.list_disparities(bias_df) disparity_metrics_df = bias_df[ ["attribute_name", "attribute_value"] + calculated_disparities ] # For example: """ attribute_name attribute_value ppr_disparity precision_disparity 0 gender female 0.714286 1.41791 1 gender male 1.000000 1.000000 """ return disparity_metrics_df
def get_bias_metrics(data): # To measure Bias towards gender, filter DataFrame # to "score", "label_value" (ground truth), and # "gender" (protected attribute) data_scored = data[["score", "label_value", "gender"]] # Process DataFrame data_scored_processed, _ = preprocess_input_df(data_scored) # Group Metrics g = Group() xtab, _ = g.get_crosstabs(data_scored_processed) # Absolute metrics, such as 'tpr', 'tnr','precision', etc. absolute_metrics = g.list_absolute_metrics(xtab) # DataFrame of calculated absolute metrics for each sample population group absolute_metrics_df = xtab[["attribute_name", "attribute_value"] + absolute_metrics].round(2) # Bias Metrics b = Bias() # Disparities calculated in relation gender for "male" and "female" bias_df = b.get_disparity_predefined_groups( xtab, original_df=data_scored_processed, ref_groups_dict={"gender": "male"}, alpha=0.05, mask_significance=True, ) # Disparity metrics added to bias DataFrame calculated_disparities = b.list_disparities(bias_df) disparity_metrics_df = bias_df[["attribute_name", "attribute_value"] + calculated_disparities] output_metrics_df = disparity_metrics_df # or absolute_metrics_df # Output a JSON object of calculated metrics return output_metrics_df.to_dict(orient="records")
def _preprocess(self, df): df = self._create_groups(df) score_col = self.config['score_col'] label_col = self.config['label_col'] # Rename columns to use Aequitas' names if label_col != 'label_value': df = df.assign(label_value=df[label_col]) df = df.drop(columns=[label_col]) if score_col != 'score': df = df.assign(score=df[score_col]) df = df.drop(columns=[score_col]) # Make sure group col is a string df.majority_demo = df.majority_demo.astype(str) # Filter to only what Aequitas needs columns = ['label_value', 'score', 'majority_demo'] df, _ = preprocess_input_df(df[columns]) return df
def audit_file(name, dirname): upload_path = os.path.join(tempfile.gettempdir(), dirname) data_path = os.path.join(upload_path, name + '.csv') if not os.path.exists(data_path): abort(404) try: df = pd.read_csv(data_path) except pd.errors.ParserError: flash('Bad CSV file – could not parse', 'warning') return redirect(url_for('home')) (df, groups) = preprocess_input_df(df) if "submit" not in request.form: subgroups = {col: list(set(df[col])) for col in groups} # set defaults for (key, values) in ( ('race', ('White', 'Caucasian')), ('sex', ('Male',)), ('gender', ('Male',)), ('age_cat', ('25 - 45',)), ('education', ('HS-grad',)), ): if key in subgroups: subgroups[key].sort(key=lambda value: int(value not in values)) supported_fairness_measures = Fairness().get_fairness_measures_supported(df) fairness_measures = [x for x in FAIR_MAP_ORDER if FAIR_MAP[x].issubset(set(supported_fairness_measures))] return render_template('audit.html', categories=groups, subcategories=subgroups, fairness=fairness_measures) rgm = request.form["ref_groups_method"] if rgm == 'predefined': group_variables = request.form.getlist('group_variable1') else: group_variables = request.form.getlist('group_variable2') # check if user forgot to select anything; return all if len(group_variables) == 0: group_variables = groups # remove unwanted cols from df subgroups = {g: request.form[g] for g in group_variables} # majority_groups = request.form.getlist('use_majority_group') raw_fairness_measures = request.form.getlist('fairness_measures') if len(raw_fairness_measures) == 0: fairness_measures = list(Fairness().get_fairness_measures_supported(df)) else: # map selected measures to input fairness_measures = [y for x in raw_fairness_measures for y in FAIR_MAP[x]] try: fv = float(request.form['fairness_pct']) except (KeyError, ValueError): fv = None fp = fv / 100.0 if fv else 0.8 configs = Configs(ref_groups=subgroups, ref_groups_method=rgm, fairness_threshold=fp, fairness_measures=fairness_measures, attr_cols=group_variables) (_gv_df, report) = audit(df, # model_id=1, configs=configs, preprocessed=True) for reportid in itertools.count(1): report_path = os.path.join(upload_path, str(reportid)) if not os.path.exists(report_path): break with open(report_path, 'w') as fd: fd.write(report) return redirect(url_for("report", dirname=dirname, name=name, reportid=reportid))
def _write_audit_to_db(self, model_id, protected_df, predictions_proba, labels, tie_breaker, subset_hash, matrix_type, evaluation_start_time, evaluation_end_time, matrix_uuid): """ Runs the bias audit and saves the result in the bias table. Args: model_id (int) primary key of the model protected_df (pandas.DataFrame) A dataframe with protected group attributes: predictions_proba (np.array) List of prediction probabilities labels (pandas.Series): List of labels tie_breaker: 'best' or 'worst' case tiebreaking rule that the predictions and labels were sorted by subset_hash (str) the hash of the subset, if any, that the evaluation is made on matrix_type (triage.component.catwalk.storage.MatrixType) The type of matrix used evaluation_start_time (pandas._libs.tslibs.timestamps.Timestamp) first as_of_date included in the evaluation period evaluation_end_time (pandas._libs.tslibs.timestamps.Timestamp) last as_of_date included in the evaluation period matrix_uuid: the uuid of the matrix Returns: """ if protected_df.empty: return # to preprocess aequitas requires the following columns: # score, label value, model_id, protected attributes # fill out the protected_df, which just has protected attributes at this point protected_df = protected_df.copy() protected_df['model_id'] = model_id protected_df['score'] = predictions_proba protected_df['label_value'] = labels aequitas_df, attr_cols_input = preprocess_input_df(protected_df) # create group crosstabs g = Group() score_thresholds = {} score_thresholds['rank_abs'] = self.bias_config['thresholds'].get( 'top_n', []) # convert 0-100 percentile to 0-1 that Aequitas expects score_thresholds['rank_pct'] = [ value / 100.0 for value in self.bias_config['thresholds'].get('percentiles', []) ] groups_model, attr_cols = g.get_crosstabs( aequitas_df, score_thresholds=score_thresholds, attr_cols=attr_cols_input) # analyze bias from reference groups bias = Bias() ref_groups_method = self.bias_config.get('ref_groups_method', None) if ref_groups_method == 'predefined' and self.bias_config['ref_groups']: bias_df = bias.get_disparity_predefined_groups( groups_model, aequitas_df, self.bias_config['ref_groups']) elif ref_groups_method == 'majority': bias_df = bias.get_disparity_major_group(groups_model, aequitas_df) else: bias_df = bias.get_disparity_min_metric(groups_model, aequitas_df) # analyze fairness for each group f = Fairness(tau=0.8) # the default fairness threshold is 0.8 group_value_df = f.get_group_value_fairness(bias_df) group_value_df['subset_hash'] = subset_hash group_value_df['tie_breaker'] = tie_breaker group_value_df['evaluation_start_time'] = evaluation_start_time group_value_df['evaluation_end_time'] = evaluation_end_time group_value_df['matrix_uuid'] = matrix_uuid group_value_df = group_value_df.rename( index=str, columns={"score_threshold": "parameter"}) if group_value_df.empty: raise ValueError(f""" Bias audit: aequitas_audit() failed. Returned empty dataframe for model_id = {model_id}, and subset_hash = {subset_hash} and matrix_type = {matrix_type}""") with scoped_session(self.db_engine) as session: for index, row in group_value_df.iterrows(): session.query(matrix_type.aequitas_obj).filter_by( model_id=row['model_id'], evaluation_start_time=row['evaluation_start_time'], evaluation_end_time=row['evaluation_end_time'], subset_hash=row['subset_hash'], parameter=row['parameter'], tie_breaker=row['tie_breaker'], matrix_uuid=row['matrix_uuid'], attribute_name=row['attribute_name'], attribute_value=row['attribute_value']).delete() session.bulk_insert_mappings( matrix_type.aequitas_obj, group_value_df.to_dict(orient="records"))
def get_bias_metrics(data): bias = Bias() group = Group() old_columns = ['predictions', 'loan_status', 'forty_plus_indicator'] new_columns = ['score', 'label_value', 'forty_plus_indicator'] scored_data = data.loc[:, old_columns] renamer = dict(zip(scored_data.columns, new_columns)) scored_data = scored_data.rename(columns=renamer) data_processed, _ = preprocess_input_df(scored_data) xtab, _ = group.get_crosstabs(data_processed) attribute_columns = ['attribute_name', 'attribute_value'] absolute_metrics = group.list_absolute_metrics(xtab) absolute_metrics_df = xtab[attribute_columns + absolute_metrics].round(2) bias_df = bias.get_disparity_predefined_groups( xtab, original_df=data_processed, ref_groups_dict={'forty_plus_indicator': 'Under Forty'}, alpha=0.05, mask_significance=True) calculated_disparities = bias.list_disparities(bias_df) disparity_metrics_df = bias_df[attribute_columns + calculated_disparities] abs_metrics = absolute_metrics_df.where(pd.notnull(absolute_metrics_df), None).to_dict(orient='records') disp_metrics = disparity_metrics_df.where(pd.notnull(disparity_metrics_df), None).to_dict(orient='records') return { "attributeAudited": "forty_plus_indicator", "referenceGroup": "Under Forty", "fairnessThreshold": "80%", "fairnessMeasures": [{ "label": "Predicted Positive Group Rate Parity", "result": "Passed", "group": "Over Forty", "disparity": disp_metrics[0]['pprev_disparity'] }, { "label": "Predicted Positive Rate Parity", "result": "Failed", "group": "Over Forty", "disparity": disp_metrics[0]['ppr_disparity'] }, { "label": "Proportional Parity", "result": "Passed", "group": "Over Forty", "disparity": disp_metrics[0]['precision_disparity'] }, { "label": "False Positive Rate Parity", "result": "Passed", "group": "Over Forty", "disparity": disp_metrics[0]['fpr_disparity'] }, { "label": "False Discovery Rate Parity", "result": "Passed", "group": "Over Forty", "disparity": disp_metrics[0]['fdr_disparity'] }, { "label": "False Negative Rate Parity", "result": "Passed", "group": "Over Forty", "disparity": disp_metrics[0]['fnr_disparity'] }, { "label": "False Omission Rate Parity", "result": "Passed", "group": "Over Forty", "disparity": disp_metrics[0]['for_disparity'] }] }
return joblib.load(path)[0] def load_features(path: str) -> pd.DataFrame: return pd.read_csv(path) if __name__ == "__main__": model_path = "best_models_DecisionTreeClassifier-max_depth1.joblib" feature_path = "best_model_test.csv" model = load_model(model_path) df = load_features(feature_path) df = (df.rename(columns={ "not_renewed_2yrs": "label_value" }).drop(columns=['Unnamed: 0', 'ACCOUNT NUMBER', 'SITE NUMBER', 'YEAR' ]).fillna(0)) fixed = df[df.columns[df.columns != "label_value"]] scores = model.predict_proba(fixed) df["score"] = [_[1] for _ in scores] df.to_csv("aequitas_scored.csv") df, cols = preprocess_input_df(df) g = Group() xtab, _ = g.get_crosstabs(df, attr_cols=["num_renewals"]) print(xtab) absolute_metrics = g.list_absolute_metrics(xtab) print(xtab[[col for col in xtab.columns if col not in absolute_metrics]])
def audit(df, configs, preprocessed=False): """ :param df: :param configs: :param preprocessed: :return: """ if not preprocessed: df, attr_cols_input = preprocess_input_df(df) if not configs.attr_cols: configs.attr_cols = attr_cols_input g = Group() print('Welcome to Aequitas-Audit') print('Fairness measures requested:', ','.join(configs.fair_measures_requested)) groups_model, attr_cols = g.get_crosstabs( df, score_thresholds=configs.score_thresholds, attr_cols=configs.attr_cols) print('audit: df shape from the crosstabs:', groups_model.shape) b = Bias() # todo move this to the new configs object / the attr_cols now are passed through the configs object... ref_groups_method = configs.ref_groups_method if ref_groups_method == 'predefined' and configs.ref_groups: bias_df = b.get_disparity_predefined_groups( groups_model, df, configs.ref_groups, check_significance=configs.check_significance, alpha=configs.alpha, selected_significance=configs.selected_significance, mask_significance=configs.mask_significance) elif ref_groups_method == 'majority': bias_df = b.get_disparity_major_group( groups_model, df, check_significance=configs.check_significance, alpha=configs.alpha, selected_significance=configs.selected_significance, mask_significance=configs.mask_significance) else: bias_df = b.get_disparity_min_metric( df=groups_model, original_df=df, check_significance=configs.check_significance, alpha=configs.alpha, label_score_ref='fpr', selected_significance=configs.selected_significance, mask_significance=configs.mask_significance) print('Any NaN?: ', bias_df.isnull().values.any()) print('bias_df shape:', bias_df.shape) aqp = Plot() if configs.plot_bias_metrics: if len(configs.plot_bias_metrics) == 1: fig1 = aqp.plot_group_metric( bias_df, group_metric=configs.plot_bias_metrics[0]) elif len(configs.plot_bias_metrics) > 1: fig1 = aqp.plot_group_metric_all( bias_df, metrics=configs.plot_disparity_attributes) if (len(configs.plot_bias_disparities) == 1) and (len( configs.plot_disparity_attributes) == 1): fig2 = aqp.plot_disparity( bias_df, group_metric=configs.plot_bias_disparities[0], attribute_name=configs.plot_disparity_attributes[0]) elif (len(configs.plot_bias_disparities) > 1) or (len( configs.plot_disparity_attributes) > 1): fig2 = aqp.plot_disparity_all( bias_df, metrics=configs.plot_bias_disparities, attributes=configs.plot_disparity_attributes) f = Fairness(tau=configs.fairness_threshold) print('Fairness Threshold:', configs.fairness_threshold) print('Fairness Measures:', configs.fair_measures_requested) group_value_df = f.get_group_value_fairness( bias_df, fair_measures_requested=configs.fair_measures_requested) group_attribute_df = f.get_group_attribute_fairness( group_value_df, fair_measures_requested=configs.fair_measures_requested) fair_results = f.get_overall_fairness(group_attribute_df) if configs.plot_bias_metrics: if len(configs.plot_bias_metrics) == 1: fig3 = aqp.plot_fairness_group( group_value_df, group_metric=configs.plot_bias_metrics[0]) elif len(configs.plot_bias_metrics) > 1: fig3 = aqp.plot_fairness_group_all( group_value_df, metrics=configs.plot_bias_metrics) if (len(configs.plot_bias_disparities) == 1) and (len( configs.plot_disparity_attributes) == 1): fig4 = aqp.plot_fairness_disparity( group_value_df, group_metric=configs.plot_bias_disparities[0], attribute_name=configs.plot_disparity_attributes[0]) elif (len(configs.plot_bias_disparities) > 1) or (len( configs.plot_disparity_attributes) > 1): fig4 = aqp.plot_fairness_disparity_all( group_value_df, metrics=configs.plot_bias_disparities, attributes=configs.plot_disparity_attributes) print(fair_results) report = None if configs.report is True: report = audit_report_markdown(configs, group_value_df, f.fair_measures_depend, fair_results) return group_value_df, report
# ## Prepare Data For Aequitas Bias Toolkit # Using the gender and race fields, we will prepare the data for the Aequitas Toolkit. # In[165]: # Aequitas from aequitas.preprocessing import preprocess_input_df from aequitas.group import Group from aequitas.plotting import Plot from aequitas.bias import Bias from aequitas.fairness import Fairness ae_subset_df = pred_test_df[['race', 'gender', 'score', 'label_value']] ae_df, _ = preprocess_input_df(ae_subset_df) g = Group() xtab, _ = g.get_crosstabs(ae_df) absolute_metrics = g.list_absolute_metrics(xtab) clean_xtab = xtab.fillna(-1) aqp = Plot() b = Bias() # ## Reference Group Selection # Below we have chosen the reference group for our analysis but feel free to select another one. # In[166]: