def test_synthetic_data(): """ Generate the synthetic data """ X, y, x_control = generate_synthetic_data(plot_data=False) ut.compute_p_rule(x_control["s1"], y) # compute the p-rule in the original data """ Classify the data without any constraints """ apply_fairness_constraints = 0 apply_accuracy_constraint = 0 sep_constraint = 0 loss_function = lf._logistic_loss X = ut.add_intercept(X) # add intercept to X before applying the linear classifier test_acc_arr, train_acc_arr, correlation_dict_test_arr, correlation_dict_train_arr, cov_dict_test_arr, cov_dict_train_arr = ut.compute_cross_validation_error(X, y, x_control, NUM_FOLDS, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, ['s1'], [{} for i in range(0,NUM_FOLDS)]) print print "== Unconstrained (original) classifier ==" ut.print_classifier_fairness_stats(test_acc_arr, correlation_dict_test_arr, cov_dict_test_arr, "s1") """ Now classify such that we achieve perfect fairness """ apply_fairness_constraints = 1 cov_factor = 0 test_acc_arr, train_acc_arr, correlation_dict_test_arr, correlation_dict_train_arr, cov_dict_test_arr, cov_dict_train_arr = ut.compute_cross_validation_error(X, y, x_control, NUM_FOLDS, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, ['s1'], [{'s1':cov_factor} for i in range(0,NUM_FOLDS)]) print print "== Constrained (fair) classifier ==" ut.print_classifier_fairness_stats(test_acc_arr, correlation_dict_test_arr, cov_dict_test_arr, "s1") """ Now plot a tradeoff between the fairness and accuracy """ ut.plot_cov_thresh_vs_acc_pos_ratio(X, y, x_control, NUM_FOLDS, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, ['s1'])
def test_synthetic_data(): """ Generate the synthetic data """ print(sys.path) X, y, x_control = generate_synthetic_data(plot_data=False) ut.compute_p_rule(x_control["s1"], y) # compute the p-rule in the original data """ Classify the data without any constraints """ apply_fairness_constraints = 0 apply_accuracy_constraint = 0 sep_constraint = 0 loss_function = lf._logistic_loss X = ut.add_intercept(X) # add intercept to X before applying the linear classifier test_acc_arr, train_acc_arr, correlation_dict_test_arr, correlation_dict_train_arr, cov_dict_test_arr, cov_dict_train_arr = ut.compute_cross_validation_error( X, y, x_control, NUM_FOLDS, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, ['s1'], [{} for i in range(0, NUM_FOLDS)]) print print "== Unconstrained (original) classifier ==" ut.print_classifier_fairness_stats(test_acc_arr, correlation_dict_test_arr, cov_dict_test_arr, "s1") """ Now classify such that we achieve perfect fairness """ apply_fairness_constraints = 1 cov_factor = 0 test_acc_arr, train_acc_arr, correlation_dict_test_arr, correlation_dict_train_arr, cov_dict_test_arr, cov_dict_train_arr = ut.compute_cross_validation_error( X, y, x_control, NUM_FOLDS, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, ['s1'], [{'s1': cov_factor} for i in range(0, NUM_FOLDS)]) print print "== Constrained (fair) classifier ==" ut.print_classifier_fairness_stats(test_acc_arr, correlation_dict_test_arr, cov_dict_test_arr, "s1") """ Now plot a tradeoff between the fairness and accuracy """ ut.plot_cov_thresh_vs_acc_pos_ratio(X, y, x_control, NUM_FOLDS, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, ['s1'])
def test_adult_data(): """ Load the adult data """ X, y, x_control = load_adult_data(load_data_size=10000) # set the argument to none, or no arguments if you want to test with the whole data -- we are subsampling for performance speedup ut.compute_p_rule(x_control["sex"], y) # compute the p-rule in the original data """ Split the data into train and test """ X = ut.add_intercept(X) # add intercept to X before applying the linear classifier train_fold_size = 0.7 x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(X, y, x_control, train_fold_size) apply_fairness_constraints = None apply_accuracy_constraint = None sep_constraint = None loss_function = lf._logistic_loss sensitive_attrs = ["sex"] sensitive_attrs_to_cov_thresh = {} gamma = None def train_test_classifier(): w = ut.train_model(x_train, y_train, x_control_train, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, sensitive_attrs, sensitive_attrs_to_cov_thresh, gamma) train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(w, x_train, y_train, x_test, y_test, None, None) distances_boundary_test = (np.dot(x_test, w)).tolist() all_class_labels_assigned_test = np.sign(distances_boundary_test) correlation_dict_test = ut.get_correlations(None, None, all_class_labels_assigned_test, x_control_test, sensitive_attrs) cov_dict_test = ut.print_covariance_sensitive_attrs(None, x_test, distances_boundary_test, x_control_test, sensitive_attrs) p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0]) return w, p_rule, test_score """ Classify the data while optimizing for accuracy """ print print "== Unconstrained (original) classifier ==" # all constraint flags are set to 0 since we want to train an unconstrained (original) classifier apply_fairness_constraints = 0 apply_accuracy_constraint = 0 sep_constraint = 0 w_uncons, p_uncons, acc_uncons = train_test_classifier() """ Now classify such that we optimize for accuracy while achieving perfect fairness """ apply_fairness_constraints = 1 # set this flag to one since we want to optimize accuracy subject to fairness constraints apply_accuracy_constraint = 0 sep_constraint = 0 sensitive_attrs_to_cov_thresh = {"sex":0} print print "== Classifier with fairness constraint ==" w_f_cons, p_f_cons, acc_f_cons = train_test_classifier() """ Classify such that we optimize for fairness subject to a certain loss in accuracy """ apply_fairness_constraints = 0 # flag for fairness constraint is set back to0 since we want to apply the accuracy constraint now apply_accuracy_constraint = 1 # now, we want to optimize fairness subject to accuracy constraints sep_constraint = 0 gamma = 0.5 # gamma controls how much loss in accuracy we are willing to incur to achieve fairness -- increase gamme to allow more loss in accuracy print "== Classifier with accuracy constraint ==" w_a_cons, p_a_cons, acc_a_cons = train_test_classifier() """ Classify such that we optimize for fairness subject to a certain loss in accuracy In addition, make sure that no points classified as positive by the unconstrained (original) classifier are misclassified. """ apply_fairness_constraints = 0 # flag for fairness constraint is set back to0 since we want to apply the accuracy constraint now apply_accuracy_constraint = 1 # now, we want to optimize accuracy subject to fairness constraints sep_constraint = 1 # set the separate constraint flag to one, since in addition to accuracy constrains, we also want no misclassifications for certain points (details in demo README.md) gamma = 1000.0 print "== Classifier with accuracy constraint (no +ve misclassification) ==" w_a_cons_fine, p_a_cons_fine, acc_a_cons_fine = train_test_classifier() return
def test_synthetic_data(): """ Generate the synthetic data """ X, y, x_control = generate_synthetic_data(plot_data=True) # set plot_data to False to skip the data plot ut.compute_p_rule(x_control["s1"], y) # compute the p-rule in the original data """ Split the data into train and test """ X = ut.add_intercept(X) # add intercept to X before applying the linear classifier train_fold_size = 0.7 x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test(X, y, x_control, train_fold_size) apply_fairness_constraints = None apply_accuracy_constraint = None sep_constraint = None loss_function = lf._logistic_loss sensitive_attrs = ["s1"] sensitive_attrs_to_cov_thresh = {} gamma = None def train_test_classifier(): w = ut.train_model(x_train, y_train, x_control_train, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, sensitive_attrs, sensitive_attrs_to_cov_thresh, gamma) train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(w, x_train, y_train, x_test, y_test, None, None) distances_boundary_test = (np.dot(x_test, w)).tolist() all_class_labels_assigned_test = np.sign(distances_boundary_test) correlation_dict_test = ut.get_correlations(None, None, all_class_labels_assigned_test, x_control_test, sensitive_attrs) cov_dict_test = ut.print_covariance_sensitive_attrs(None, x_test, distances_boundary_test, x_control_test, sensitive_attrs) p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0]) return w, p_rule, test_score def plot_boundaries(w1, w2, p1, p2, acc1, acc2, fname): num_to_draw = 200 # we will only draw a small number of points to avoid clutter x_draw = X[:num_to_draw] y_draw = y[:num_to_draw] x_control_draw = x_control["s1"][:num_to_draw] X_s_0 = x_draw[x_control_draw == 0.0] X_s_1 = x_draw[x_control_draw == 1.0] y_s_0 = y_draw[x_control_draw == 0.0] y_s_1 = y_draw[x_control_draw == 1.0] plt.scatter(X_s_0[y_s_0==1.0][:, 1], X_s_0[y_s_0==1.0][:, 2], color='green', marker='x', s=30, linewidth=1.5) plt.scatter(X_s_0[y_s_0==-1.0][:, 1], X_s_0[y_s_0==-1.0][:, 2], color='red', marker='x', s=30, linewidth=1.5) plt.scatter(X_s_1[y_s_1==1.0][:, 1], X_s_1[y_s_1==1.0][:, 2], color='green', marker='o', facecolors='none', s=30) plt.scatter(X_s_1[y_s_1==-1.0][:, 1], X_s_1[y_s_1==-1.0][:, 2], color='red', marker='o', facecolors='none', s=30) x1,x2 = max(x_draw[:,1]), min(x_draw[:,1]) y1,y2 = ut.get_line_coordinates(w1, x1, x2) plt.plot([x1,x2], [y1,y2], 'c-', linewidth=3, label = "Acc=%0.2f; p%% rule=%0.0f%% - Original"%(acc1, p1)) y1,y2 = ut.get_line_coordinates(w2, x1, x2) plt.plot([x1,x2], [y1,y2], 'b--', linewidth=3, label = "Acc=%0.2f; p%% rule=%0.0f%% - Constrained"%(acc2, p2)) plt.tick_params(axis='x', which='both', bottom='off', top='off', labelbottom='off') # dont need the ticks to see the data distribution plt.tick_params(axis='y', which='both', left='off', right='off', labelleft='off') plt.legend(loc=2, fontsize=15) plt.xlim((-15,10)) plt.ylim((-10,15)) plt.savefig(fname) plt.show() """ Classify the data while optimizing for accuracy """ print print "== Unconstrained (original) classifier ==" # all constraint flags are set to 0 since we want to train an unconstrained (original) classifier apply_fairness_constraints = 0 apply_accuracy_constraint = 0 sep_constraint = 0 w_uncons, p_uncons, acc_uncons = train_test_classifier() """ Now classify such that we optimize for accuracy while achieving perfect fairness """ apply_fairness_constraints = 1 # set this flag to one since we want to optimize accuracy subject to fairness constraints apply_accuracy_constraint = 0 sep_constraint = 0 sensitive_attrs_to_cov_thresh = {"s1":0} print print "== Classifier with fairness constraint ==" w_f_cons, p_f_cons, acc_f_cons = train_test_classifier() plot_boundaries(w_uncons, w_f_cons, p_uncons, p_f_cons, acc_uncons, acc_f_cons, "img/f_cons.png") """ Classify such that we optimize for fairness subject to a certain loss in accuracy """ apply_fairness_constraints = 0 # flag for fairness constraint is set back to0 since we want to apply the accuracy constraint now apply_accuracy_constraint = 1 # now, we want to optimize fairness subject to accuracy constraints sep_constraint = 0 gamma = 0.5 # gamma controls how much loss in accuracy we are willing to incur to achieve fairness -- increase gamme to allow more loss in accuracy print "== Classifier with accuracy constraint ==" w_a_cons, p_a_cons, acc_a_cons = train_test_classifier() plot_boundaries(w_uncons, w_a_cons, p_uncons, p_a_cons, acc_uncons, acc_a_cons, "img/a_cons.png") """ Classify such that we optimize for fairness subject to a certain loss in accuracy In addition, make sure that no points classified as positive by the unconstrained (original) classifier are misclassified. """ apply_fairness_constraints = 0 # flag for fairness constraint is set back to0 since we want to apply the accuracy constraint now apply_accuracy_constraint = 1 # now, we want to optimize accuracy subject to fairness constraints sep_constraint = 1 # set the separate constraint flag to one, since in addition to accuracy constrains, we also want no misclassifications for certain points (details in demo README.md) gamma = 2000.0 print "== Classifier with accuracy constraint (no +ve misclassification) ==" w_a_cons_fine, p_a_cons_fine, acc_a_cons_fine = train_test_classifier() plot_boundaries(w_uncons, w_a_cons_fine, p_uncons, p_a_cons_fine, acc_uncons, acc_a_cons_fine, "img/a_cons_fine.png") return
def test_adult_data(): """ Load the adult data """ X, y, x_control = load_adult_data( load_data_size=None ) # set the argument to none, or no arguments if you want to test with the whole data -- we are subsampling for performance speedup ut.compute_p_rule(x_control["sex"], y) # compute the p-rule in the original data """ Split the data into train and test """ X = ut.add_intercept( X) # add intercept to X before applying the linear classifier train_fold_size = 0.7 x_train, y_train, x_control_train, x_test, y_test, x_control_test = ut.split_into_train_test( X, y, x_control, train_fold_size) apply_fairness_constraints = None apply_accuracy_constraint = None sep_constraint = None loss_function = lf._logistic_loss sensitive_attrs = ["sex"] sensitive_attrs_to_cov_thresh = {} gamma = None def train_test_classifier(): w = ut.train_model(x_train, y_train, x_control_train, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, sensitive_attrs, sensitive_attrs_to_cov_thresh, gamma) train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy( w, x_train, y_train, x_test, y_test, None, None) distances_boundary_test = (np.dot(x_test, w)).tolist() all_class_labels_assigned_test = np.sign(distances_boundary_test) correlation_dict_test = ut.get_correlations( None, None, all_class_labels_assigned_test, x_control_test, sensitive_attrs) cov_dict_test = ut.print_covariance_sensitive_attrs( None, x_test, distances_boundary_test, x_control_test, sensitive_attrs) p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0]) eq_op_acc, chance_bin_zero, chance_bin_one = ut.get_eq_op_acc( w, x_train, y_train, x_control_train, None) eq_odds_acc = ut.get_eq_odds_acc(w, x_train, y_train, x_control_train, None) pred_rate_par_acc = ut.get_pred_rate_par_acc(w, x_train, y_train, x_control_train, None) demo_par_acc_f_cons = ut.get_dem_par_acc(w, x_train, y_train, x_control_train, None) return w, p_rule, test_score, eq_op_acc, eq_odds_acc, pred_rate_par_acc, demo_par_acc_f_cons """ Classify the data while optimizing for accuracy """ print() print("== Unconstrained (original) classifier ==") # all constraint flags are set to 0 since we want to train an unconstrained (original) classifier apply_fairness_constraints = 0 apply_accuracy_constraint = 0 sep_constraint = 0 w_uncons, p_uncons, acc_uncons, eq_op_acc_uncons, eq_odds_acc_uncons, pred_rate_par_acc_uncons, demo_par_acc_uncons = train_test_classifier( ) temp_eq_op_acc_f = [] temp_eq_odds_acc_f = [] temp_pred_rate_par_acc_f = [] temp_demo_par_acc_f = [] """ Now classify such that we optimize for accuracy while achieving perfect fairness """ apply_fairness_constraints = 1 # set this flag to one since we want to optimize accuracy subject to fairness constraints apply_accuracy_constraint = 0 sep_constraint = 0 for num in np.arange(0, 0.51, 0.1): sensitive_attrs_to_cov_thresh = {"sex": num} print() print("== Classifier with fairness constraint, cov: ", num, " ==") w_f_cons, p_f_cons, acc_f_cons, eq_op_acc_f_cons, eq_odds_acc_f_cons, pred_rate_par_acc_f_cons, demo_par_acc_f_cons = train_test_classifier( ) temp_eq_op_acc_f.append(eq_op_acc_f_cons) temp_eq_odds_acc_f.append(eq_odds_acc_f_cons) temp_pred_rate_par_acc_f.append(pred_rate_par_acc_f_cons) temp_demo_par_acc_f.append(demo_par_acc_f_cons) sensitive_attrs_to_cov_thresh = {"sex": 1} print() print("== Classifier with fairness constraint, cov: 1 ==") w_f_cons, p_f_cons, acc_f_cons, eq_op_acc_f_cons, eq_odds_acc_f_cons, pred_rate_par_acc_f_cons, demo_par_acc_f_cons = train_test_classifier( ) temp_eq_op_acc_f.append(eq_op_acc_f_cons) temp_eq_odds_acc_f.append(eq_odds_acc_f_cons) temp_pred_rate_par_acc_f.append(pred_rate_par_acc_f_cons) temp_demo_par_acc_f.append(demo_par_acc_f_cons) return eq_op_acc_uncons, eq_odds_acc_uncons, pred_rate_par_acc_uncons, demo_par_acc_uncons, temp_eq_op_acc_f, temp_eq_odds_acc_f, temp_pred_rate_par_acc_f, temp_demo_par_acc_f