def f1_score_test_sets(location, indices_info, n_splits, k_info, threshold_info, best_lambda_euclidean, best_lambda_mahalanobis): loaded_train_data = load_data(location, "train") loaded_optimization_info = load_optimization_info(location, best_lambda_euclidean, best_lambda_mahalanobis) splitted_test_sets = split_test_sets(n_splits, location) i = 0 f1_results = [] for test_set in splitted_test_sets: print("Split: " + str(i)) f1_baseline, f1_luong, f1_zhang, f1_euclidean, f1_mahalanobis = f1_score_all_approaches( loaded_train_data, test_set, loaded_optimization_info, indices_info, k_info, threshold_info) f1_results.append({ 'baseline': f1_baseline, 'luong': f1_luong, 'zhang': f1_zhang, 'euclidean': f1_euclidean, 'mahalanobis': f1_mahalanobis }) print(f1_results) i += 1 print("F1") utils.print_avg_results_from_dictionary(f1_results)
def f1_score_val_set(location, indices_info, k_info, possible_thresholds, best_lambda_euclidean, best_lambda_mahalanobis): loaded_train_data = load_data(location, "train") loaded_val_data = load_data(location, "val") loaded_optimization_info = load_optimization_info(location, best_lambda_euclidean, best_lambda_mahalanobis) for threshold in possible_thresholds: print("threshold: " + str(threshold)) threshold_info = { 'baseline': threshold, 'luong': threshold, 'zhang': threshold, 'euclidean': threshold, 'mahalanobis': threshold } f1_baseline, f1_luong, f1_zhang, f1_euclidean, f1_mahalanobis = f1_score_all_approaches( loaded_train_data, loaded_val_data, loaded_optimization_info, indices_info, k_info, threshold_info) print(f1_baseline) print(f1_luong) print(f1_zhang) print(f1_euclidean) print(f1_mahalanobis) return
def give_disc_scores_validation_set(location, indices_info, k_info, best_lambda_euclidean, best_lambda_mahalanobis): loaded_train_data = load_data(location, "train") loaded_val_data = load_data(location, "val") loaded_optimization_info = load_optimization_info(location, best_lambda_euclidean, best_lambda_mahalanobis) protected_info_val = loaded_val_data['protected_info'] protected_indices_val = np.where(protected_info_val == 1)[0] baseline_scores, luong_scores, zhang_scores, weighted_euclidean_scores, mahalanobis_scores = give_all_disc_scores( loaded_train_data, loaded_val_data, loaded_optimization_info, indices_info, k_info, "adult") data_frame_protected_indices_disc_scores = pd.DataFrame( list( zip(baseline_scores, luong_scores, zhang_scores, weighted_euclidean_scores, mahalanobis_scores)), index=protected_indices_val, columns=[ 'Baseline', 'Luong', 'Zhang', 'Weighted Euclidean', 'Mahalanobis' ]) return data_frame_protected_indices_disc_scores
def inspect_mistakes(location, k, threshold): loaded_train_data = load_data(location, "train") loaded_val_data = load_data(location, "val") loaded_optimization_info = load_optimization_info(location) train_data = loaded_train_data['data'] train_data_standardized = loaded_train_data['standardized_data'] train_protected_info = loaded_train_data['protected_info'] train_class_label = loaded_train_data['class_label'] train_protected_indices = list(np.where(train_protected_info == 1)[0]) train_unprotected_indices = list(np.where(train_protected_info == 2)[0]) val_data = loaded_val_data['data'] val_data_standardized = loaded_val_data['standardized_data'] val_ground_truth = loaded_val_data['ground_truth'] val_protected_info = loaded_val_data['protected_info'] val_class_label = loaded_val_data['class_label'] val_protected_indices = list(np.where(val_protected_info == 1)[0]) indices_info = loaded_optimization_info['indices_info'] weights_euclidean = loaded_optimization_info['weights_euclidean'] discrimination_scores = give_all_disc_scores_Luong( k, class_info_train=train_class_label, protected_indices_train=train_protected_indices, unprotected_indices_train=train_unprotected_indices, training_set=train_data_standardized, protected_indices_test=val_protected_indices, class_info_test=val_class_label, test_set=val_data_standardized, indices_info=indices_info) disc_labels = utils.give_disc_label(discrimination_scores, threshold) print(confusion_matrix(val_ground_truth, disc_labels))
def find_best_reject_threshold_based_on_k_distance_plot(location, indices_info, k, technique, lambda_l1, title): loaded_train_data = load_data(location, "train") loaded_test_data = load_data(location, "val") train_data_standardized = loaded_train_data['standardized_data'] train_protected_info = loaded_train_data['protected_info'] train_unprotected_indices = list(np.where(train_protected_info == 2)[0]) val_data_standardized = loaded_test_data['standardized_data'] val_class_label = loaded_train_data['class_label'] val_protected_info = loaded_test_data['protected_info'] val_indices_negative_class_label = set(np.where(val_class_label == 0)[0]) val_protected_indices = set(np.where(val_protected_info == 1)[0]) val_indices_protected_and_negative = val_indices_negative_class_label.intersection(val_protected_indices) train_data_standardized_unprotected = train_data_standardized.iloc[train_unprotected_indices] val_data_standardized_protected = val_data_standardized.iloc[list(val_indices_protected_and_negative)] loaded_optimization_info = load_optimization_info(location, lambda_l1, lambda_l1) if (technique == 'euclidean'): weights_euclidean = loaded_optimization_info['weights_euclidean'] distance_matrix = cdist(val_data_standardized_protected.values, train_data_standardized_unprotected.values, weighted_euclidean_distance, weights= weights_euclidean, indices_info=indices_info) elif (technique == 'mahalanobis'): mahalanobis_matrix = loaded_optimization_info['mahalanobis_matrix'] distance_matrix = cdist(val_data_standardized_protected.values, train_data_standardized_unprotected.values, mahalanobis_distance, weights= mahalanobis_matrix, indices_info=indices_info) sorted_distances = get_sorted_distances_to_k_neighbour(distance_matrix, k) best_threshold = find_knee_point_of_sorted_k_distance_plot_geometric(sorted_distances) #best_threshold = find_knee_point_of_sorted_k_distance_plot_largest_slope(sorted_distances) print(best_threshold) sorted_k_distance_plot(sorted_distances, len(sorted_distances), k, title) return best_threshold
def find_best_threshold_based_on_demographic_parity(location, indices_info, k, technique, lambda_l1=0): loaded_train_data = load_data(location, "train") loaded_val_data = load_data(location, "val") loaded_optimization_info = load_optimization_info(location, lambda_l1, lambda_l1) discrimination_scores = np.array(give_disc_scores_one_technique(loaded_train_data, loaded_val_data, loaded_optimization_info, technique, indices_info, k)) discrimination_scores_negative_class_labels_only = discrimination_scores[discrimination_scores != -1] sorted_discrimination_scores_negative_class_labels = np.sort(discrimination_scores_negative_class_labels_only) reverse_sorted_disc_scores_neg_class_labels = sorted_discrimination_scores_negative_class_labels[::-1] loaded_test_data = load_data(location, "val") val_protected_info = loaded_test_data['protected_info'] val_class_label = loaded_test_data['class_label'] val_protected_indices = list(np.where(val_protected_info == 1)[0]) val_unprotected_indices = list(np.where(val_protected_info == 2)[0]) class_labels_unprotected = val_class_label[val_unprotected_indices] amount_of_positive_class_labels_unprotected = sum(class_labels_unprotected) print("Amount of positive class labels unprotected: " + str(amount_of_positive_class_labels_unprotected)) class_labels_protected = val_class_label[val_protected_indices] amount_of_positive_class_labels_protected = sum(class_labels_protected) print("Amount of positive class labels protected: " + str(amount_of_positive_class_labels_protected)) estimate_amount_of_discriminated_people = amount_of_positive_class_labels_unprotected - amount_of_positive_class_labels_protected print("Estimated amount of discriminated people: " + str(estimate_amount_of_discriminated_people)) best_threshold = find_best_threshold_helper_function(reverse_sorted_disc_scores_neg_class_labels, estimate_amount_of_discriminated_people) print(best_threshold) return best_threshold
def compare_disc_detection_with_and_without_reject_option( location, indices_info, lambda_l1, k, reject_threshold, technique): loaded_train_data = load_data(location, "train") loaded_val_data = load_data(location, "val") loaded_optimization_info = load_optimization_info(location, lambda_l1, lambda_l1) val_ground_truth = np.array(loaded_val_data['ground_truth']) disc_scores_without_reject, _ = give_disc_scores_one_technique( loaded_train_data, loaded_val_data, loaded_optimization_info, technique, indices_info, k) disc_scores_with_reject, rejected_indices = give_disc_scores_with_reject_one_technique( loaded_train_data, loaded_val_data, loaded_optimization_info, technique, indices_info, k, reject_threshold) rejected_indices = np.array(rejected_indices) print("Number of rejected indices: " + str(len(rejected_indices))) print(rejected_indices) disc_scores_without_reject = np.array(disc_scores_without_reject) disc_scores_with_reject = np.array(disc_scores_with_reject) rejected_protected_info_indices = np.where( disc_scores_with_reject == -1000)[0] not_rejected_protected_info_indices = np.where( disc_scores_with_reject != -1000)[0] val_ground_truth_of_non_rejected_indices = val_ground_truth[ not_rejected_protected_info_indices] disc_scores_of_non_rejected_indices = disc_scores_without_reject[ not_rejected_protected_info_indices] print("AUC Scores with reject option") print( utils.get_auc_scores(val_ground_truth_of_non_rejected_indices, disc_scores_of_non_rejected_indices)) print("AUC Scores without reject option") print(utils.get_auc_scores(val_ground_truth, disc_scores_without_reject)) ground_truth_of_rejected_indices = np.array( val_ground_truth[rejected_protected_info_indices]) disc_scores_normally_given_to_rejected_indices = disc_scores_without_reject[ rejected_protected_info_indices] indices_where_ground_truth_discriminated = np.where( ground_truth_of_rejected_indices == 1)[0] indices_where_ground_truth_not_discriminated = np.where( ground_truth_of_rejected_indices == 0)[0] disc_scores_normally_given_to_discriminated_instances = disc_scores_normally_given_to_rejected_indices[ indices_where_ground_truth_discriminated] disc_scores_normally_given_to_not_discriminated_instances = disc_scores_normally_given_to_rejected_indices[ indices_where_ground_truth_not_discriminated] print(disc_scores_normally_given_to_discriminated_instances) print(disc_scores_normally_given_to_not_discriminated_instances) #print(sum(disc_scores_normally_given_to_discriminated_instances)/len(disc_scores_normally_given_to_discriminated_instances)) #print(sum(disc_scores_normally_given_to_not_discriminated_instances)/len(disc_scores_normally_given_to_not_discriminated_instances)) return rejected_indices
def comparing_algorithms_on_specific_indices(location, indices_info, k_info, best_lambda_euclidean, best_lambda_mahalanobis): loaded_train_data = load_data(location, "train") loaded_val_data = load_data(location, "test") loaded_optimization_info = load_optimization_info(location, best_lambda_euclidean, best_lambda_mahalanobis) indices_of_interest = [772] luong_scores, zhang_scores, weighted_euclidean_scores, mahalanobis_scores = give_disc_scores_to_given_indices( loaded_train_data, loaded_val_data, loaded_optimization_info, indices_info, k_info, "adult", indices_of_interest) return
def discrimination_detection_with_reject_option(location, indices_info, lambda_l1, k, reject_threshold, technique): loaded_train_data = load_data(location, "train") loaded_val_data = load_data(location, "val") loaded_optimization_info = load_optimization_info(location, lambda_l1, lambda_l1) disc_scores_with_reject, rejected_indices = give_disc_scores_with_reject_one_technique( loaded_train_data, loaded_val_data, loaded_optimization_info, technique, indices_info, k, reject_threshold) print(len(rejected_indices)) return rejected_indices
def area_under_curve_validation_set(location, indices_info, k_info, best_lambda_euclidean, best_lambda_mahalanobis): loaded_train_data = load_data(location, "train") loaded_val_data = load_data(location, "val") loaded_optimization_info = load_optimization_info(location, best_lambda_euclidean, best_lambda_mahalanobis) print( area_under_curve_all_approaches(loaded_train_data, loaded_val_data, loaded_optimization_info, indices_info, k_info)) return
def find_best_threshold_based_on_unprotected_region( location, indices_info, k, technique, lambda_l1=0, adult_or_admission="admission"): loaded_train_data = load_data(location, "train") loaded_val_data = load_data(location, "val") loaded_optimization_info = load_optimization_info(location, lambda_l1, lambda_l1) disc_scores_protected = give_disc_scores_one_technique( loaded_train_data, loaded_val_data, loaded_optimization_info, technique, indices_info, k, 1, adult_or_admission) disc_scores_unprotected = give_disc_scores_one_technique( loaded_train_data, loaded_val_data, loaded_optimization_info, technique, indices_info, k, 2, adult_or_admission) disc_scores_protected = np.array(disc_scores_protected) disc_scores_unprotected = np.array(disc_scores_unprotected) disc_scores_protected_neg_class = disc_scores_protected[np.where( disc_scores_protected != -1)[0]] disc_scores_unprotected_neg_class = disc_scores_unprotected[np.where( disc_scores_unprotected != -1)[0]] first_quartile = np.quantile(disc_scores_unprotected_neg_class, 0.25) third_quartile = np.quantile(disc_scores_unprotected_neg_class, 0.75) inter_quartile_range = third_quartile - first_quartile max_non_outlier = third_quartile + (1.5 * inter_quartile_range) print(max_non_outlier) boxplot(data=[ disc_scores_unprotected_neg_class, disc_scores_protected_neg_class ], showmeans=True, meanprops={ "marker": "o", "markerfacecolor": "grey", "markeredgecolor": "black", "markersize": "8" }) stripplot(data=[ disc_scores_unprotected_neg_class, disc_scores_protected_neg_class ], color=".3") plt.show() return max_non_outlier
def find_best_reject_threshold_based_on_percentage_rejected(location, indices_info, k, technique, lambda_l1, desired_percent_rejected): loaded_train_data = load_data(location, "train") loaded_test_data = load_data(location, "val") loaded_optimization_info = load_optimization_info(location, lambda_l1, lambda_l1) disc_scores, dist_to_closest_neighbours = give_disc_scores_one_technique(loaded_train_data, loaded_test_data, loaded_optimization_info, technique, indices_info, k) dist_to_closest_neighbours = np.array(dist_to_closest_neighbours) print(disc_scores) print(len(disc_scores)) print(len(dist_to_closest_neighbours)) desired_amount_rejected = round(len(dist_to_closest_neighbours) * desired_percent_rejected) print(desired_amount_rejected) sorted_dist_to_closest_neighbours = -np.sort(-dist_to_closest_neighbours) print(sorted_dist_to_closest_neighbours) threshold = sorted_dist_to_closest_neighbours[desired_amount_rejected-1] return threshold
def visualize_mahalanobis(data_location, lambda_l1_norm, title): data_dict = load_data(data_location, "train") loaded_optimization_info = load_optimization_info( data_location, lambda_l1_norm_euclidean=lambda_l1_norm, lambda_l1_norm_mahalanobis=lambda_l1_norm) mahalanobis_matrix = loaded_optimization_info['mahalanobis_matrix'] standardized_data = data_dict['standardized_data'] protected_info = data_dict['protected_info'] class_label = data_dict['class_label'] discriminated_instances = data_dict['discriminated_instances'] # mahalanobis_array = loaded_optimization_info['mahalanobis_matrix'] print(mahalanobis_matrix) projected_data = utils.project_to_mahalanobis(standardized_data, mahalanobis_matrix) print(projected_data)
def area_under_curve_test_sets(location, indices_info, n_splits, k_info, best_lambda_euclidean, best_lambda_mahalanobis, adult_or_admission="admission"): loaded_train_data = load_data(location, "train") #loaded_optimization_info = {} loaded_optimization_info = load_optimization_info(location, best_lambda_euclidean, best_lambda_mahalanobis) splitted_test_sets = split_test_sets(n_splits, location) i = 0 roc_results = [] pr_results = [] for test_set in splitted_test_sets: print("Split: " + str(i)) pr_aucs, roc_aucs = area_under_curve_all_approaches( loaded_train_data, test_set, loaded_optimization_info, indices_info, k_info, adult_or_admission) roc_results.append({ 'baseline': roc_aucs['baseline'], 'luong': roc_aucs['luong'], 'zhang': roc_aucs['zhang'], 'euclidean': roc_aucs['euclidean'], 'mahalanobis': roc_aucs['mahalanobis'] }) pr_results.append({ 'baseline': pr_aucs['baseline'], 'luong': pr_aucs['luong'], 'zhang': pr_aucs['zhang'], 'euclidean': pr_aucs['euclidean'], 'mahalanobis': pr_aucs['mahalanobis'] }) # roc_results.append({'baseline': roc_aucs['baseline'], 'luong': roc_aucs['luong'], 'zhang': roc_aucs['zhang'], 'euclidean': roc_aucs['euclidean'], 'mahalanobis': roc_aucs['mahalanobis']}) # pr_results.append({'baseline': pr_aucs['baseline'], 'luong': pr_aucs['luong'], 'zhang': pr_aucs['zhang'], 'euclidean': pr_aucs['euclidean'], 'mahalanobis': pr_aucs['mahalanobis']}) i += 1 print(pr_aucs) print(roc_aucs) utils.print_avg_results_from_dictionary(roc_results) utils.print_avg_results_from_dictionary(pr_results) return
def find_best_threshold_based_on_estimated_number_of_discriminated_people(location, indices_info, k, technique, estimated_percent_of_discrimination, lambda_l1=0): loaded_train_data = load_data(location, "train") loaded_val_data = load_data(location, "val") loaded_optimization_info = load_optimization_info(location, lambda_l1, lambda_l1) discrimination_scores = np.array(give_disc_scores_one_technique(loaded_train_data, loaded_val_data, loaded_optimization_info, technique, indices_info, k)) print("Amount of women: " + str(len(discrimination_scores))) discrimination_scores_negative_class_labels_only = discrimination_scores[discrimination_scores != -1] amount_of_positive_class_labels = len(discrimination_scores) - len(discrimination_scores_negative_class_labels_only) print("Amount of positive class labels" + str(amount_of_positive_class_labels)) print("Amount of negative class labels" + str(len(discrimination_scores_negative_class_labels_only))) estimate_amount_of_actual_positive_class_labels = int(1/(1-estimated_percent_of_discrimination) * amount_of_positive_class_labels) estimate_amount_of_discriminated_people = estimate_amount_of_actual_positive_class_labels - amount_of_positive_class_labels print("Estimated number of discriminated people:" + str(estimate_amount_of_discriminated_people)) sorted_discrimination_scores_negative_class_labels = np.sort(discrimination_scores_negative_class_labels_only) reverse_sorted_disc_scores_neg_class_labels = sorted_discrimination_scores_negative_class_labels[::-1] best_threshold = find_best_threshold_helper_function(reverse_sorted_disc_scores_neg_class_labels, estimate_amount_of_discriminated_people) print(best_threshold) return best_threshold
def visualize_euclidean(data_location, lambda_l1_norm, title): data_dict = load_data(data_location, "train") loaded_optimization_info = load_optimization_info( data_location, lambda_l1_norm_euclidean=lambda_l1_norm, lambda_l1_norm_mahalanobis=lambda_l1_norm) standardized_data = data_dict['standardized_data'] protected_info = data_dict['protected_info'] class_label = data_dict['class_label'] discriminated_instances = data_dict['discriminated_instances'] weights_euclidean = loaded_optimization_info['weights_euclidean'] projected_data = utils.project_to_weighted_euclidean( standardized_data, weights_euclidean) visualize_positive_vs_negative(projected_data, protected_info, class_label, title) return
def area_under_curve_validation_set_different_k(location, indices_info, possible_ks, best_lambda_euclidean, best_lambda_mahalanobis): loaded_train_data = load_data(location, "train") loaded_val_data = load_data(location, "val") loaded_optimization_info = load_optimization_info(location, best_lambda_euclidean, best_lambda_mahalanobis) for k in possible_ks: print(k) k_info = { 'baseline': k, 'luong': k, 'zhang': k, 'euclidean': k, 'mahalanobis': k } print( area_under_curve_all_approaches(loaded_train_data, loaded_val_data, loaded_optimization_info, indices_info, k_info)) return
def visualize_inter_and_intra_distances(location, lambda_l1_euclidean, lambda_l1_mahalanobis, indices_info): loaded_data = load_data(location, "train") loaded_optimization_info = load_optimization_info(location, lambda_l1_euclidean, lambda_l1_mahalanobis) standardized_data = loaded_data['standardized_data'] protected_info = loaded_data['protected_info'] euclidean_weights = loaded_optimization_info['weights_euclidean'] mahalanobis_matrix = loaded_optimization_info['mahalanobis_matrix'] luong_distances = utils.make_distance_matrix_based_on_distance_function( standardized_data, luong_distance, [], indices_info) weighted_euclidean_distances = utils.make_distance_matrix_based_on_distance_function( standardized_data, weighted_euclidean_distance, euclidean_weights, indices_info) mahalanobis_distances = utils.make_distance_matrix_based_on_distance_function( standardized_data, mahalanobis_distance, mahalanobis_matrix, indices_info) # print("BASELINE") # inter_prot_base, inter_unprot_base, intra_base = utils.get_inter_and_intra_sens_distances(baseline_distances, # protected_info, # protected_label) print("Luong") inter_prot_luong, inter_unprot_luong, intra_luong = utils.get_inter_and_intra_sens_distances( luong_distances, protected_info, 1) # print("Zhang") # inter_prot_zhang, inter_unprot_zhang, intra_zhang = utils.get_inter_and_intra_sens_distances(zhang_distances, # protected_info, # protected_label) print("Weighted Euclidean") inter_prot_euclidean, inter_unprot_euclidean, intra_euclidean = utils.get_inter_and_intra_sens_distances( weighted_euclidean_distances, protected_info, 1) print("Mahalanobis") inter_prot_mahalanobis, inter_unprot_mahalanobis, intra_mahalanobis = utils.get_inter_and_intra_sens_distances( mahalanobis_distances, protected_info, 1) distances_inter_prot = pd.DataFrame( columns=["Luong", "Euclidean", "Mahalanobis"]) # distances_inter_prot['Baseline'] = inter_prot_base distances_inter_prot['Luong'] = inter_prot_luong # distances_inter_prot['Zhang'] = inter_prot_zhang distances_inter_prot['Euclidean'] = inter_prot_euclidean distances_inter_prot['Mahalanobis'] = inter_prot_mahalanobis distances_inter_prot_melted = pd.melt(distances_inter_prot) distances_inter_prot_melted['Cluster'] = "Women vs. Women" distances_inter_unprot = pd.DataFrame( columns=["Luong", "Euclidean", "Mahalanobis"]) # distances_inter_unprot['Baseline'] = inter_unprot_base distances_inter_unprot['Luong'] = inter_unprot_luong # distances_inter_unprot['Zhang'] = inter_unprot_zhang distances_inter_unprot['Euclidean'] = inter_unprot_euclidean distances_inter_unprot['Mahalanobis'] = inter_unprot_mahalanobis distances_inter_unprot_melted = pd.melt(distances_inter_unprot) distances_inter_unprot_melted['Cluster'] = "Men vs. Men" distances_intra = pd.DataFrame( columns=["Luong", "Euclidean", "Mahalanobis"]) # distances_intra['Baseline'] = intra_base distances_intra['Luong'] = intra_luong # distances_intra['Zhang'] = intra_zhang distances_intra['Euclidean'] = intra_euclidean distances_intra['Mahalanobis'] = intra_mahalanobis distances_intra_melted = pd.melt(distances_intra) distances_intra_melted['Cluster'] = "Men vs. Women" all_distances = distances_inter_prot_melted.append( distances_inter_unprot_melted) all_distances = all_distances.append(distances_intra_melted) all_distances = all_distances.rename(columns={ 'variable': 'Measure', 'value': 'Distance' }, inplace=False) boxplot(x="Cluster", y="Distance", hue='Measure', data=all_distances, showmeans=True, meanprops={ "marker": "o", "markerfacecolor": "white", "markeredgecolor": "black", "markersize": "10" }) plt.xlabel("Gender clusters", size=14) plt.ylabel("Distance", size=14) plt.title("Inter- and intra distances within and between genders", size=18) plt.legend(loc='upper right') plt.show()
def decision_labels_properties_for_unprotected_group(location, indices_info, k, technique, unprotected_label, lambda_l1=0): loaded_train_data = load_data(location, "train") loaded_test_data = load_data(location, "val") train_data = loaded_train_data['data'] train_data_standardized = loaded_train_data['standardized_data'] train_protected_info = loaded_train_data['protected_info'] train_class_label = loaded_train_data['class_label'] train_unprotected_indices = list(np.where(train_protected_info == 2)[0]) val_data = loaded_test_data['data'] val_data_standardized = loaded_test_data['standardized_data'] val_protected_info = loaded_test_data['protected_info'] val_class_label = loaded_test_data['class_label'] val_unprotected_indices = list(np.where(val_protected_info == unprotected_label)[0]) if technique == 'baseline': predictions_negative_class, predictions_positive_class = give_decision_labels_unprotected_group(k, class_info_train=train_class_label, unprotected_indices_train=train_unprotected_indices, training_set=train_data, unprotected_indices_test=val_unprotected_indices, class_info_test=val_class_label, test_set=val_data, indices_info=indices_info, distance_function=luong_distance) return predictions_negative_class, predictions_positive_class elif technique == 'luong': predictions_negative_class, predictions_positive_class = give_decision_labels_unprotected_group(k, class_info_train=train_class_label, unprotected_indices_train=train_unprotected_indices, training_set=train_data_standardized, unprotected_indices_test=val_unprotected_indices, class_info_test=val_class_label, test_set=val_data_standardized, indices_info=indices_info, distance_function=luong_distance) return predictions_negative_class, predictions_positive_class elif technique == 'zhang': predictions_negative_class, predictions_positive_class = get_zhang_decision_scores_unprotected_group("adult", k=k, train_data=train_data, train_sens_attribute=train_protected_info, train_decision_attribute=train_class_label, test_data=val_data, test_sens_attribute=val_protected_info, test_decision_attribute=val_class_label) print(predictions_positive_class) print(predictions_negative_class) return predictions_negative_class, predictions_positive_class if technique == 'euclidean': weights_euclidean = load_optimization_info(location, lambda_l1, 0.09)['weights_euclidean'] predictions_negative_class, predictions_positive_class = give_decision_labels_unprotected_group(k, class_info_train=train_class_label, unprotected_indices_train=train_unprotected_indices, training_set=train_data_standardized, unprotected_indices_test=val_unprotected_indices, class_info_test=val_class_label, test_set=val_data_standardized, indices_info=indices_info, distance_function=weighted_euclidean_distance, weights=weights_euclidean) return predictions_negative_class, predictions_positive_class elif technique == 'mahalanobis': mahalanobis_matrix = load_optimization_info(location, lambda_l1, lambda_l1)['mahalanobis_matrix'] predictions_negative_class, predictions_positive_class = give_decision_labels_unprotected_group(k, class_info_train=train_class_label, unprotected_indices_train=train_unprotected_indices, training_set=train_data_standardized, unprotected_indices_test=val_unprotected_indices, class_info_test=val_class_label, test_set=val_data_standardized, indices_info=indices_info, distance_function=mahalanobis_distance, weights=mahalanobis_matrix) return predictions_negative_class, predictions_positive_class return 0