def test_synthetic_data():
	
	""" Generate the synthetic data """
	X, y, x_control = generate_synthetic_data(plot_data=False)
	ut.compute_p_rule(x_control["s1"], y) # compute the p-rule in the original data

	""" Classify the data without any constraints """
	apply_fairness_constraints = 0
	apply_accuracy_constraint = 0
	sep_constraint = 0

	loss_function = lf._logistic_loss
	X = ut.add_intercept(X) # add intercept to X before applying the linear classifier
	test_acc_arr, train_acc_arr, correlation_dict_test_arr, correlation_dict_train_arr, cov_dict_test_arr, cov_dict_train_arr = ut.compute_cross_validation_error(X, y, x_control, NUM_FOLDS, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, ['s1'], [{} for i in range(0,NUM_FOLDS)])
	print
	print "== Unconstrained (original) classifier =="
	ut.print_classifier_fairness_stats(test_acc_arr, correlation_dict_test_arr, cov_dict_test_arr, "s1")


	""" Now classify such that we achieve perfect fairness """
	apply_fairness_constraints = 1
	cov_factor = 0
	test_acc_arr, train_acc_arr, correlation_dict_test_arr, correlation_dict_train_arr, cov_dict_test_arr, cov_dict_train_arr = ut.compute_cross_validation_error(X, y, x_control, NUM_FOLDS, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, ['s1'], [{'s1':cov_factor} for i in range(0,NUM_FOLDS)])		
	print
	print "== Constrained (fair) classifier =="
	ut.print_classifier_fairness_stats(test_acc_arr, correlation_dict_test_arr, cov_dict_test_arr, "s1")

	""" Now plot a tradeoff between the fairness and accuracy """
	ut.plot_cov_thresh_vs_acc_pos_ratio(X, y, x_control, NUM_FOLDS, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, ['s1'])
Пример #2
0
def test_synthetic_data():
    """ Generate the synthetic data """
    print(sys.path)
    X, y, x_control = generate_synthetic_data(plot_data=False)
    ut.compute_p_rule(x_control["s1"], y)  # compute the p-rule in the original data

    """ Classify the data without any constraints """
    apply_fairness_constraints = 0
    apply_accuracy_constraint = 0
    sep_constraint = 0

    loss_function = lf._logistic_loss
    X = ut.add_intercept(X)  # add intercept to X before applying the linear classifier
    test_acc_arr, train_acc_arr, correlation_dict_test_arr, correlation_dict_train_arr, cov_dict_test_arr, cov_dict_train_arr = ut.compute_cross_validation_error(
        X, y, x_control, NUM_FOLDS, loss_function, apply_fairness_constraints, apply_accuracy_constraint,
        sep_constraint, ['s1'], [{} for i in range(0, NUM_FOLDS)])
    print
    print "== Unconstrained (original) classifier =="
    ut.print_classifier_fairness_stats(test_acc_arr, correlation_dict_test_arr, cov_dict_test_arr, "s1")

    """ Now classify such that we achieve perfect fairness """
    apply_fairness_constraints = 1
    cov_factor = 0
    test_acc_arr, train_acc_arr, correlation_dict_test_arr, correlation_dict_train_arr, cov_dict_test_arr, cov_dict_train_arr = ut.compute_cross_validation_error(
        X, y, x_control, NUM_FOLDS, loss_function, apply_fairness_constraints, apply_accuracy_constraint,
        sep_constraint, ['s1'], [{'s1': cov_factor} for i in range(0, NUM_FOLDS)])
    print
    print "== Constrained (fair) classifier =="
    ut.print_classifier_fairness_stats(test_acc_arr, correlation_dict_test_arr, cov_dict_test_arr, "s1")

    """ Now plot a tradeoff between the fairness and accuracy """
    ut.plot_cov_thresh_vs_acc_pos_ratio(X, y, x_control, NUM_FOLDS, loss_function, apply_fairness_constraints,
                                        apply_accuracy_constraint, sep_constraint, ['s1'])
 def train_test_classifier():
     w = ut.train_model(x_train, y_train, x_control_train, loss_function,
                        apply_fairness_constraints,
                        apply_accuracy_constraint, sep_constraint,
                        sensitive_attrs, sensitive_attrs_to_cov_thresh,
                        gamma)
     train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(
         w, x_train, y_train, x_test, y_test, None, None)
     distances_boundary_test = (np.dot(x_test, w)).tolist()
     all_class_labels_assigned_test = np.sign(distances_boundary_test)
     correlation_dict_test = ut.get_correlations(
         None, None, all_class_labels_assigned_test, x_control_test,
         sensitive_attrs)
     cov_dict_test = ut.print_covariance_sensitive_attrs(
         None, x_test, distances_boundary_test, x_control_test,
         sensitive_attrs)
     p_rule = ut.print_classifier_fairness_stats([test_score],
                                                 [correlation_dict_test],
                                                 [cov_dict_test],
                                                 sensitive_attrs[0])
     eq_op_acc, chance_bin_zero, chance_bin_one = ut.get_eq_op_acc(
         w, x_train, y_train, x_control_train, None)
     eq_odds_acc = ut.get_eq_odds_acc(w, x_train, y_train, x_control_train,
                                      None)
     pred_rate_par_acc = ut.get_pred_rate_par_acc(w, x_train, y_train,
                                                  x_control_train, None)
     demo_par_acc_f_cons = ut.get_dem_par_acc(w, x_train, y_train,
                                              x_control_train, None)
     return w, p_rule, test_score, eq_op_acc, eq_odds_acc, pred_rate_par_acc, demo_par_acc_f_cons
def test_classifier(w, x, y, control, sensitive_attrs):
    distances_boundary_test = (np.dot(x, w)).tolist()
    all_class_labels_assigned_test = np.sign(distances_boundary_test)
    test_score, _ = check_accuracy(w, x, y)
    correlation_dict_test = ut.get_correlations(None, None, all_class_labels_assigned_test, control, sensitive_attrs)
    cov_dict_test = ut.print_covariance_sensitive_attrs(None, x, distances_boundary_test, control, sensitive_attrs)
    p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0])
    return p_rule, test_score
	def train_test_classifier():
		w = ut.train_model(x_train, y_train, x_control_train, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, sensitive_attrs, sensitive_attrs_to_cov_thresh, gamma)
		train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(w, x_train, y_train, x_test, y_test, None, None)
		distances_boundary_test = (np.dot(x_test, w)).tolist()
		all_class_labels_assigned_test = np.sign(distances_boundary_test)
		correlation_dict_test = ut.get_correlations(None, None, all_class_labels_assigned_test, x_control_test, sensitive_attrs)
		cov_dict_test = ut.print_covariance_sensitive_attrs(None, x_test, distances_boundary_test, x_control_test, sensitive_attrs)
		p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0])	
		return w, p_rule, test_score
Пример #6
0
 def train_test_classifier():
     w = ut.train_model(x_train, y_train, x_control_train, loss_function, apply_fairness_constraints, apply_accuracy_constraint, sep_constraint, sensitive_attrs, sensitive_attrs_to_cov_thresh, gamma)
     train_score, test_score, correct_answers_train, correct_answers_test = ut.check_accuracy(w, x_train, y_train, x_test, y_test, None, None)
     distances_boundary_test = np.dot(x_test, w)
     distances_boundary_train = np.dot(x_train, w)
     prob_test = [sigmoid(x) for x in distances_boundary_test]
     prob_train = [sigmoid(x) for x in distances_boundary_train]
     all_class_labels_assigned_test = np.sign(distances_boundary_test)
     correlation_dict_test = ut.get_correlations(None, None, all_class_labels_assigned_test, x_control_test, sensitive_attrs)
     cov_dict_test = ut.print_covariance_sensitive_attrs(None, x_test, distances_boundary_test, x_control_test, sensitive_attrs)
     p_rule = ut.print_classifier_fairness_stats([test_score], [correlation_dict_test], [cov_dict_test], sensitive_attrs[0])	
     # return w, p_rule, test_score
     return prob_train, prob_test