def run_clean(fairness_constraints):
    print(f"Start running experiment with clean data.")

    unmitigated_predictor = LogisticRegression(solver='liblinear',
                                               fit_intercept=True)

    # unmitigated_predictor.fit(X_train, Y_train)
    unmitigated_predictor.fit(X_train, Y_train)
    sweep = GridSearch(LogisticRegression(solver='liblinear',
                                          fit_intercept=True),
                       constraints=EqualizedOdds(),
                       grid_size=71)

    sweep.fit(X_train, Y_train, sensitive_features=A_train)
    predictors = [unmitigated_predictor
                  ] + [z.predictor for z in sweep.all_results]

    all_results_train, all_results_test = [], []
    for predictor in predictors:
        prediction_train = predictor.predict(X_train)
        prediction_test = predictor.predict(X_test)

        all_results_train.append({
            'accuracy':
            accuracy(prediction_train, Y_train),
            'violation':
            violation(prediction_train, Y_train, A_train)
        })
        all_results_test.append({
            'accuracy':
            accuracy(prediction_test, Y_test),
            'violation':
            violation(prediction_test, Y_test, A_test)
        })
    # print(all_results_train)
    # print(all_results_test)

    best_train, best_test = [], []
    for constraint in fairness_constraints:
        best = 0.0
        for result in all_results_train:
            if result['violation'] <= constraint and result['accuracy'] > best:
                best = result['accuracy']
        best_train.append(best)

        best = 0.0
        for result in all_results_test:
            if result['violation'] <= constraint and result['accuracy'] > best:
                best = result['accuracy']
        best_test.append(best)

    return best_train, best_test
示例#2
0
def run_corrupt(fairness_constraints):
    all_results = {}
    all_results['eps'] = fairness_constraints
    all_results['accuracy'] = {
        'train': [],
        'test': []
    }

    all_results['violation'] = {
        'train': [],
        'test': []
    }

    all_results['violation_male'] = {
        'train': [],
        'test': []            
    }

    all_results['violation_female'] = {
        'train': [],
        'test': []
    }

    for eps in fairness_constraints:
        begin = time.time()

        print(f"[INFO][RUN] Corrupt")
        sweep = ExponentiatedGradient(LogisticRegression(solver='liblinear', fit_intercept=True),
                            constraints=EqualizedOdds(),
                            eps=eps)        

        try:
            sweep.fit(X_train, Y_noised, sensitive_features=A_train)

            prediction_train = sweep.predict(X_train)
            prediction_test = sweep.predict(X_test)
        except:
            print(f"Fairlearn can't fit at fairness constraint {eps}")
            pass

        all_results['accuracy']['train'].append(accuracy(prediction_train, Y_train))
        all_results['accuracy']['test'].append(accuracy(prediction_test, Y_test))

        all_results['violation']['train'].append(violation(prediction_train, Y_train, A_train))
        all_results['violation']['test'].append(violation(prediction_test, Y_test, A_test))

        all_results['violation_male']['train'].append(violation(prediction_train, Y_train, A_train, grp=1))
        all_results['violation_male']['test'].append(violation(prediction_test, Y_test, A_test, grp=1))         

        all_results['violation_female']['train'].append(violation(prediction_train, Y_train, A_train, grp=0))
        all_results['violation_female']['test'].append(violation(prediction_test, Y_test, A_test, grp=0))
        print(f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train'][-1]}, Test Accuracy: {all_results['accuracy']['test'][-1]}, Training Violation: {all_results['violation']['train'][-1]}, Test Violation: {all_results['violation']['test'][-1]}, Time cost: {time.time() - begin}")

    acc = np.array(all_results['accuracy']['test'])
    v = np.array(all_results['violation']['test'])
    all_results['accuracy']['mean'] = acc.mean()
    all_results['accuracy']['std'] = acc.std()
    all_results['violation']['mean'] = v.mean()
    all_results['violation']['std'] = v.std()
    return all_results
def run(fairness_constraints, use_proxy=False):
    print(f"Start running experiment with Proxy: {use_proxy}.")
    all_results = {}
    all_results['eps'] = fairness_constraints
    all_results['accuracy'] = {'train': [], 'test': []}

    all_results['violation'] = {'train': [], 'test': []}

    all_results['violation_male'] = {'train': [], 'test': []}

    all_results['violation_female'] = {'train': [], 'test': []}

    for eps in fairness_constraints:
        begin = time.time()

        if use_proxy:
            sweep = ExponentiatedGradient(
                LogisticRegression(solver='liblinear', fit_intercept=True),
                constraints=ProxyEqualizedOdds(error_rate=error_rate),
                eps=eps)
        else:
            sweep = ExponentiatedGradient(LogisticRegression(
                solver='liblinear', fit_intercept=True),
                                          constraints=EqualizedOdds(),
                                          eps=eps)

        try:
            sweep.fit(X_train, Y_noised, sensitive_features=A_train)

            prediction_train = sweep.predict(X_train)
            prediction_test = sweep.predict(X_test)
        except:
            print(f"Fairlearn can't fit at fairness constraint {eps}")
            pass

        all_results['accuracy']['train'].append(
            accuracy(prediction_train, Y_train))
        all_results['accuracy']['test'].append(
            accuracy(prediction_test, Y_test))

        all_results['violation']['train'].append(
            violation(prediction_train, Y_train, A_train))
        all_results['violation']['test'].append(
            violation(prediction_test, Y_test, A_test))

        all_results['violation_male']['train'].append(
            violation(prediction_train, Y_train, A_train, grp=1))
        all_results['violation_male']['test'].append(
            violation(prediction_test, Y_test, A_test, grp=1))

        all_results['violation_female']['train'].append(
            violation(prediction_train, Y_train, A_train, grp=0))
        all_results['violation_female']['test'].append(
            violation(prediction_test, Y_test, A_test, grp=0))

        print(
            f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train']}, Test Accuracy: {all_results['accuracy']['test']}, Training Violation: {all_results['violation']['train']}, Test Violation: {all_results['violation']['test']}, Time cost: {time.time() - begin}"
        )

    return all_results
示例#4
0
def run_surrogate(fairness_constraints, est=False):
    print(f"[INFO][RUN] Surrogate Loss.")
    all_results = {}
    all_results['eps'] = fairness_constraints
    all_results['accuracy'] = {
        'train': [],
        'test': []
    }

    all_results['violation'] = {
        'train': [],
        'test': []
    }

    all_results['violation_male'] = {
        'train': [],
        'test': []            
    }

    all_results['violation_female'] = {
        'train': [],
        'test': []
    }
    
    for eps in fairness_constraints:
        begin = time.time()

        if not est:
            surrogate_clf = SurrogateLoss(clf=LogisticRegression(solver='liblinear', fit_intercept=True), noise_matrix=noise_matrix)
        else:
            surrogate_clf = SurrogateLoss(clf=LogisticRegression(solver='liblinear', fit_intercept=True))

        sweep = ExponentiatedGradient(surrogate_clf,
                    constraints=ProxyEqualizedOdds(error_rate=error_rate),
                    eps=eps)   

        sweep.fit(X_train, Y_noised, sensitive_features=A_train)

        prediction_train = sweep.predict(X_train)
        prediction_test = sweep.predict(X_test)

        all_results['accuracy']['train'].append(accuracy(prediction_train, Y_train))
        all_results['accuracy']['test'].append(accuracy(prediction_test, Y_test))

        all_results['violation']['train'].append(violation(prediction_train, Y_train, A_train))
        all_results['violation']['test'].append(violation(prediction_test, Y_test, A_test))

        all_results['violation_male']['train'].append(accuracy(prediction_train, Y_train))
        all_results['violation_male']['test'].append(accuracy(prediction_test, Y_test))         

        all_results['violation_female']['train'].append(accuracy(prediction_train, Y_train))
        all_results['violation_female']['test'].append(accuracy(prediction_test, Y_test))

        print(f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train'][-1]}, Test Accuracy: {all_results['accuracy']['test'][-1]}, Training Violation: {all_results['violation']['train'][-1]}, Test Violation: {all_results['violation']['test'][-1]}, Time cost: {time.time() - begin}")
    
    acc = np.array(all_results['accuracy']['test'])
    v = np.array(all_results['violation']['test'])
    all_results['accuracy']['mean'] = acc.mean()
    all_results['accuracy']['std'] = acc.std()
    all_results['violation']['mean'] = v.mean()
    all_results['violation']['std'] = v.std()
    return all_results
示例#5
0
def run_peerloss(fairness_constraints, alpha=0.5, est=False):
    print(f"[INFO][RUN] Peer Loss with alpha = {alpha}")
    all_results = {}
    all_results['eps'] = fairness_constraints
    all_results['accuracy'] = {
        'train': [],
        'test': []
    }

    all_results['violation'] = {
        'train': [],
        'test': []
    }

    all_results['violation_male'] = {
        'train': [],
        'test': []            
    }

    all_results['violation_female'] = {
        'train': [],
        'test': []
    }
    
    if est:
        delta = [1 - est_error_rate[i][0] - est_error_rate[i][1] for i in range(len(est_error_rate))]
    else:
        delta = [1 - error_rate[i][0] - error_rate[i][1] for i in range(len(error_rate))]

    for eps in fairness_constraints:
        begin = time.time()

        sweep = ExponentiatedGradient(PeerLoss(A_train, delta=delta, alpha=alpha),
                    constraints=EqualizedOdds(),
                    eps=eps)   
 
        sweep.fit(X_train, Y_noised, sensitive_features=A_train)

        prediction_train = sweep.predict(X_train)
        prediction_test = sweep.predict(X_test)

        all_results['accuracy']['train'].append(accuracy(prediction_train, Y_train))
        all_results['accuracy']['test'].append(accuracy(prediction_test, Y_test))

        all_results['violation']['train'].append(violation(prediction_train, Y_train, A_train))
        all_results['violation']['test'].append(violation(prediction_test, Y_test, A_test))

        all_results['violation_male']['train'].append(accuracy(prediction_train, Y_train))
        all_results['violation_male']['test'].append(accuracy(prediction_test, Y_test))         

        all_results['violation_female']['train'].append(accuracy(prediction_train, Y_train))
        all_results['violation_female']['test'].append(accuracy(prediction_test, Y_test))

        print(f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train'][-1]}, Test Accuracy: {all_results['accuracy']['test'][-1]}, Training Violation: {all_results['violation']['train'][-1]}, Test Violation: {all_results['violation']['test'][-1]}, Time cost: {time.time() - begin}")

    acc = np.array(all_results['accuracy']['test'])
    v = np.array(all_results['violation']['test'])
    all_results['accuracy']['mean'] = acc.mean()
    all_results['accuracy']['std'] = acc.std()
    all_results['violation']['mean'] = v.mean()
    all_results['violation']['std'] = v.std()
    return all_results
def run_estimation(fairness_constraints, isEstimate=True):
    def NearestNeighbor(X, A, i):
        # print(X_train.shape)
        distance = max(np.linalg.norm(X[i] - X[0]),
                       np.linalg.norm(X[i] - X[1]))
        nn = 0
        for j in range(len(X)):
            if i == j:
                continue
            if A[i] == A[j] and np.linalg.norm(X[i] - X[j]) < distance:
                distance = np.linalg.norm(X[i] - X[j])
                nn = j
        return nn

    def estimate_delta(X, A, Y):
        c1 = np.array([0., 0.])
        t = np.array([0., 0.])
        num = np.array([0., 0.])
        for i in range(len(X)):
            num[int(A[i])] += 1.
            if Y[i] == 1:
                j = NearestNeighbor(X, A, i)
                # print(i, j)
                t[int(A[i])] += Y[i] == Y[j]
                c1[int(A[i])] += 1
        c1 = 2 * c1 / num
        c2 = 2 * t / num
        print(f"c1: {c1}, c2: {c2}")
        return np.sqrt(2 * c2 - c1 * c1)

    if isEstimate:
        print(f"Start running proxy fairness constraint with estimated delta.")
        delta = estimate_delta(X_train.values, A_train.values, Y_noised)
        print(f"Estimated delta is {delta}.")
    else:
        print("Start running proxy fairness constraint with known delta.")
        delta = np.array([
            1 - error_rate[0][0] - error_rate[0][1],
            1 - error_rate[1][0] - error_rate[1][1]
        ])
        print(f"The known delta is {delta}.")

    all_results = {}
    all_results['eps'] = fairness_constraints
    all_results['accuracy'] = {'train': [], 'test': []}

    all_results['violation'] = {'train': [], 'test': []}

    all_results['violation_male'] = {'train': [], 'test': []}

    all_results['violation_female'] = {'train': [], 'test': []}

    for eps in fairness_constraints:
        begin = time.time()

        sweep = ExponentiatedGradient(
            LogisticRegression(solver='liblinear', fit_intercept=True),
            constraints=ProxyEqualizedOdds2(delta=delta),
            eps=eps)

        try:
            sweep.fit(X_train, Y_noised, sensitive_features=A_train)

            prediction_train = sweep.predict(X_train)
            prediction_test = sweep.predict(X_test)
        except:
            print(f"Fairlearn can't fit at fairness constraint {eps}")
            pass

        all_results['accuracy']['train'].append(
            accuracy(prediction_train, Y_train))
        all_results['accuracy']['test'].append(
            accuracy(prediction_test, Y_test))

        all_results['violation']['train'].append(
            violation(prediction_train, Y_train, A_train))
        all_results['violation']['test'].append(
            violation(prediction_test, Y_test, A_test))

        all_results['violation_male']['train'].append(
            violation(prediction_train, Y_train, A_train, grp=1))
        all_results['violation_male']['test'].append(
            violation(prediction_test, Y_test, A_test, grp=1))

        all_results['violation_female']['train'].append(
            violation(prediction_train, Y_train, A_train, grp=0))
        all_results['violation_female']['test'].append(
            violation(prediction_test, Y_test, A_test, grp=0))

        print(
            f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train']}, Test Accuracy: {all_results['accuracy']['test']}, Training Violation: {all_results['violation']['train']}, Test Violation: {all_results['violation']['test']}, Time cost: {time.time() - begin}"
        )

    return all_results
示例#7
0
for i in range(len(alphas)):
    alpha = alphas[i]
    for eps in fairness_constraints:
        sweep = ExponentiatedGradient(
            PeerLoss(A_train, delta, alpha=alpha),
            constraints=ProxyEqualizedOdds(error_rate=error_rate),
            # constraints=EqualizedOdds(),
            eps=eps)
        sweep.fit(X_train, Y_noised, sensitive_features=A_train)

        prediction_train = sweep.predict(X_train)
        prediction_test = sweep.predict(X_test)

        accuracy_train = accuracy(prediction_train, Y_train)
        accuracy_test = accuracy(prediction_test, Y_test)
        violation_train = violation(prediction_train, Y_train, A_train)
        violation_test = violation(prediction_test, Y_test, A_test)
        all_results_train[i].append(accuracy_train)
        all_results_test[i].append(accuracy_test)

        print(
            f"Running alpha {alpha}, fairness constraint {eps}, Train Accuracy {accuracy_train}, Test Accuracy {accuracy_test}, Train Violation {violation_train}, Test Violation {violation_test}."
        )
        fp.write(
            f"{alpha},{eps},{accuracy_train},{accuracy_test},{violation_train},{violation_test}\n"
        )
fp.close()

plt.style.use('seaborn')
for i in range(len(alphas)):
    plt.plot(fairness_constraints,