示例#1
0
def test_threshold_optimization_equalized_odds_e2e(
        sensitive_features, sensitive_feature_names, expected_positive_p0,
        expected_positive_p1, expected_negative_p0, expected_negative_p1,
        X_transform, y_transform, sensitive_features_transform):
    X = X_transform(_format_as_list_of_lists(sensitive_features))
    y = y_transform(labels_ex)
    sensitive_features_ = sensitive_features_transform(sensitive_features)
    adjusted_predictor = ThresholdOptimizer(
        unconstrained_predictor=ExamplePredictor(), constraints=EQUALIZED_ODDS)
    adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)

    predictions = adjusted_predictor._pmf_predict(
        X, sensitive_features=sensitive_features_)

    # assert equalized odds
    for a in sensitive_feature_names:
        positive_indices = (np.array(sensitive_features) == a) * \
            (np.array(labels_ex) == 1)
        negative_indices = (np.array(sensitive_features) == a) * \
            (np.array(labels_ex) == 0)
        average_probs_positive_indices = np.average(
            predictions[positive_indices], axis=0)
        average_probs_negative_indices = np.average(
            predictions[negative_indices], axis=0)
        assert np.isclose(average_probs_positive_indices[0],
                          expected_positive_p0)
        assert np.isclose(average_probs_positive_indices[1],
                          expected_positive_p1)
        assert np.isclose(average_probs_negative_indices[0],
                          expected_negative_p0)
        assert np.isclose(average_probs_negative_indices[1],
                          expected_negative_p1)
def test_predict_different_argument_lengths(data_X_y_sf, constraints):
    adjusted_predictor = ThresholdOptimizer(
        estimator=ExamplePredictor(scores_ex),
        constraints=constraints,
        predict_method="predict",
    )
    adjusted_predictor.fit(data_X_y_sf.X,
                           data_X_y_sf.y,
                           sensitive_features=data_X_y_sf.sensitive_features)

    with pytest.raises(
            ValueError,
            match="Found input variables with inconsistent numbers of samples"
    ):
        adjusted_predictor.predict(
            data_X_y_sf.X,
            sensitive_features=data_X_y_sf.sensitive_features[:-1])

    with pytest.raises(
            ValueError,
            match="Found input variables with inconsistent numbers of samples"
    ):
        adjusted_predictor.predict(
            data_X_y_sf.X[:-1],
            sensitive_features=data_X_y_sf.sensitive_features)
示例#3
0
def test_threshold_optimization_different_input_lengths(
        X_transform, y_transform, sensitive_features_transform, constraints):
    n = len(sensitive_features_ex1)
    for permutation in [(0, 1), (1, 0)]:
        with pytest.raises(ValueError,
                           match=DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE.format(
                               "X, sensitive_features, and y")):
            X = X_transform(
                _format_as_list_of_lists(sensitive_features_ex1)
                [:n - permutation[0]])
            y = y_transform(labels_ex[:n - permutation[1]])
            sensitive_features = sensitive_features_transform(
                sensitive_features_ex1)

            adjusted_predictor = ThresholdOptimizer(
                unconstrained_predictor=ExamplePredictor(),
                constraints=constraints)
            adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)

    # try providing empty lists in all combinations
    for permutation in [(0, n), (n, 0)]:
        X = X_transform(
            _format_as_list_of_lists(sensitive_features_ex1)[:n -
                                                             permutation[0]])
        y = y_transform(labels_ex[:n - permutation[1]])
        sensitive_features = sensitive_features_transform(
            sensitive_features_ex1)

        adjusted_predictor = ThresholdOptimizer(
            unconstrained_predictor=ExamplePredictor(),
            constraints=constraints)
        with pytest.raises(ValueError, match=EMPTY_INPUT_ERROR_MESSAGE):
            adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
def test_threshold_optimization_different_input_lengths(data_X_y_sf, constraints):
    n = len(X_ex)
    expected_exception_messages = {
        "inconsistent": 'Found input variables with inconsistent numbers of samples',
        "empty": 'Found array with 0 sample'
    }
    for permutation in [(0, 1), (1, 0)]:
        with pytest.raises(ValueError, match=expected_exception_messages['inconsistent']
                           .format("X, sensitive_features, and y")):
            adjusted_predictor = ThresholdOptimizer(
                estimator=ExamplePredictor(scores_ex),
                constraints=constraints,
                predict_method='predict')
            adjusted_predictor.fit(data_X_y_sf.X[:n - permutation[0]],
                                   data_X_y_sf.y[:n - permutation[1]],
                                   sensitive_features=data_X_y_sf.sensitive_features)

    # try providing empty lists in all combinations
    for permutation in [(0, n, 'inconsistent'), (n, 0, 'empty')]:
        adjusted_predictor = ThresholdOptimizer(
            estimator=ExamplePredictor(scores_ex),
            constraints=constraints,
            predict_method='predict')
        with pytest.raises(ValueError, match=expected_exception_messages[permutation[2]]):
            adjusted_predictor.fit(data_X_y_sf.X[:n - permutation[0]],
                                   data_X_y_sf.y[:n - permutation[1]],
                                   sensitive_features=data_X_y_sf.sensitive_features)
def test_constraints_objective_pairs(constraints, objective):
    X = pd.Series(
        [0, 1, 2, 3, 4, 0, 1, 2, 3]).to_frame()
    sf = pd.Series(
        [0, 0, 0, 0, 0, 1, 1, 1, 1])
    y = pd.Series(
        [1, 0, 1, 1, 1, 0, 1, 1, 1])
    thr_optimizer = ThresholdOptimizer(
        estimator=PassThroughPredictor(),
        constraints=constraints,
        objective=objective,
        grid_size=20,
        predict_method='predict')
    expected = results[constraints+", "+objective]
    if type(expected) is str:
        with pytest.raises(ValueError) as error_info:
            thr_optimizer.fit(X, y, sensitive_features=sf)
        assert str(error_info.value).startswith(expected)
    else:
        thr_optimizer.fit(X, y, sensitive_features=sf)
        res = thr_optimizer.interpolated_thresholder_.interpolation_dict
        for key in [0, 1]:
            assert res[key]['p0'] == pytest.approx(expected[key]['p0'], PREC)
            assert res[key]['operation0']._operator == expected[key]['op0']
            assert res[key]['operation0']._threshold == pytest.approx(expected[key]['thr0'], PREC)
            assert res[key]['p1'] == pytest.approx(expected[key]['p1'], PREC)
            assert res[key]['operation1']._operator == expected[key]['op1']
            assert res[key]['operation1']._threshold == pytest.approx(expected[key]['thr1'], PREC)
            if 'p_ignore' in expected[key]:
                assert res[key]['p_ignore'] == pytest.approx(expected[key]['p_ignore'], PREC)
                assert res[key]['prediction_constant'] == \
                    pytest.approx(expected[key]['prediction_constant'], PREC)
            else:
                assert 'p_ignore' not in res[key]
示例#6
0
def test_threshold_optimization_equalized_odds_e2e(data_X_y_sf):
    adjusted_predictor = ThresholdOptimizer(
        estimator=ExamplePredictor(scores_ex), constraints=EQUALIZED_ODDS)
    adjusted_predictor.fit(data_X_y_sf.X,
                           data_X_y_sf.y,
                           sensitive_features=data_X_y_sf.sensitive_features)

    predictions = adjusted_predictor._pmf_predict(
        data_X_y_sf.X, sensitive_features=data_X_y_sf.sensitive_features)

    expected_ps = _expected_ps_equalized_odds[data_X_y_sf.example_name]
    mapped_sensitive_features = _map_into_single_column(
        data_X_y_sf.sensitive_features)

    # assert equalized odds
    for a in data_X_y_sf.feature_names:
        pos_indices = (mapped_sensitive_features == a) * (labels_ex == 1)
        neg_indices = (mapped_sensitive_features == a) * (labels_ex == 0)
        average_probs_positive_indices = np.average(predictions[pos_indices],
                                                    axis=0)
        average_probs_negative_indices = np.average(predictions[neg_indices],
                                                    axis=0)
        assert np.isclose(average_probs_positive_indices[0],
                          expected_ps[_POS_P0])
        assert np.isclose(average_probs_positive_indices[1],
                          expected_ps[_POS_P1])
        assert np.isclose(average_probs_negative_indices[0],
                          expected_ps[_NEG_P0])
        assert np.isclose(average_probs_negative_indices[1],
                          expected_ps[_NEG_P1])
def test_random_state_threshold_optimizer():
    """Test that the random_state argument works as expected.

    This test case reproduces the problem reported in issue 588 if the
    random_state does not work as intended within ThresholdOptimizer.
    https://github.com/fairlearn/fairlearn/issues/588
    """
    X_train, X_test, y_train, y_test, race_train, race_test = _get_test_data()

    # Train a simple logistic regression model
    lr = LogisticRegression(max_iter=1000, random_state=0)
    lr.fit(X_train, y_train)

    # Train threshold optimizer
    to = ThresholdOptimizer(estimator=lr,
                            constraints='equalized_odds',
                            grid_size=1000)
    to.fit(X_train, y_train, sensitive_features=race_train)

    # score groups
    y_pred_test = to.predict(X_test,
                             sensitive_features=race_test,
                             random_state=0)
    for _ in range(100):
        assert (y_pred_test == to.predict(X_test,
                                          sensitive_features=race_test,
                                          random_state=0)).all()
    assert (y_pred_test != to.predict(
        X_test, sensitive_features=race_test, random_state=1)).any()
def thresholdOptimizer(X_train, Y_train, A_train, model, constraint):
    """
    Parameters:
    y_train: input data for training the model
    X_train: list of ground truths
    constraints: either "demographic_parity" or "equalized_odds"
    
    Returns the predictions of the optimized model
    """
    postprocess_est = ThresholdOptimizer(estimator=model,
                                         constraints=constraint)

    # Balanced data set is obtained by sampling the same number of points from the majority class (Y=0)
    # as there are points in the minority class (Y=1)

    Y_train = pd.Series(Y_train)
    balanced_idx1 = X_train[[Y_train == 1]].index
    pp_train_idx = balanced_idx1.union(Y_train[Y_train == 0].sample(
        n=balanced_idx1.size, random_state=1234).index)

    X_train_balanced = X_train.loc[pp_train_idx, :]
    Y_train_balanced = Y_train.loc[pp_train_idx]
    A_train_balanced = A_train.loc[pp_train_idx]

    postprocess_est.fit(X_train_balanced,
                        Y_train_balanced,
                        sensitive_features=A_train_balanced)

    postprocess_preds = postprocess_est.predict(X_test,
                                                sensitive_features=A_test)

    return postprocess_preds
示例#9
0
def test_predict_different_argument_lengths(sensitive_features,
                                            sensitive_feature_names,
                                            X_transform, y_transform,
                                            sensitive_features_transform,
                                            constraints):
    X = X_transform(_format_as_list_of_lists(sensitive_features))
    y = y_transform(labels_ex)
    sensitive_features_ = sensitive_features_transform(sensitive_features)
    adjusted_predictor = ThresholdOptimizer(
        unconstrained_predictor=ExamplePredictor(), constraints=constraints)
    adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)

    with pytest.raises(ValueError,
                       match=DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE.format(
                           "X and sensitive_features")):
        adjusted_predictor.predict(
            X,
            sensitive_features=sensitive_features_transform(
                sensitive_features[:-1]))

    with pytest.raises(ValueError,
                       match=DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE.format(
                           "X and sensitive_features")):
        adjusted_predictor.predict(X_transform(
            _format_as_list_of_lists(sensitive_features))[:-1],
                                   sensitive_features=sensitive_features_)
示例#10
0
def run_thresholdoptimizer_classification(estimator):
    """Run classification test with ThresholdOptimizer."""
    X, Y, A = fetch_adult()

    to = ThresholdOptimizer(estimator=estimator, prefit=False)
    to.fit(X, Y, sensitive_features=A)

    results = to.predict(X, sensitive_features=A)
    assert results is not None
示例#11
0
def _fit_and_plot(constraints, plotting_data):
    adjusted_predictor = ThresholdOptimizer(
        estimator=ExamplePredictor(scores_ex), constraints=constraints)
    adjusted_predictor.fit(plotting_data.X,
                           plotting_data.y,
                           sensitive_features=plotting_data.sensitive_features)
    fig, (ax) = plt.subplots(1, 1)
    plot_threshold_optimizer(adjusted_predictor, ax=ax, show_plot=False)
    return fig
def test_threshold_optimization_degenerate_labels(data_X_sf, y_transform, constraints):
    y = y_transform(degenerate_labels_ex)

    adjusted_predictor = ThresholdOptimizer(estimator=ExamplePredictor(scores_ex),
                                            constraints=constraints,
                                            predict_method='predict')

    feature_name = _degenerate_labels_feature_name[data_X_sf.example_name]
    with pytest.raises(ValueError, match=DEGENERATE_LABELS_ERROR_MESSAGE.format(feature_name)):
        adjusted_predictor.fit(data_X_sf.X, y,
                               sensitive_features=data_X_sf.sensitive_features)
def test_predict_output_0_or_1(data_X_y_sf, constraints):
    adjusted_predictor = ThresholdOptimizer(estimator=ExamplePredictor(scores_ex),
                                            constraints=constraints,
                                            predict_method='predict')
    adjusted_predictor.fit(data_X_y_sf.X, data_X_y_sf.y,
                           sensitive_features=data_X_y_sf.sensitive_features)

    predictions = adjusted_predictor.predict(
        data_X_y_sf.X, sensitive_features=data_X_y_sf.sensitive_features)
    for prediction in predictions:
        assert prediction in [0, 1]
def test_threshold_optimization_non_binary_labels(data_X_y_sf, constraints):
    non_binary_y = deepcopy(data_X_y_sf.y)
    non_binary_y[0] = 2

    adjusted_predictor = ThresholdOptimizer(estimator=ExamplePredictor(scores_ex),
                                            constraints=constraints,
                                            predict_method='predict')

    with pytest.raises(ValueError, match=_LABELS_NOT_0_1_ERROR_MESSAGE):
        adjusted_predictor.fit(data_X_y_sf.X, non_binary_y,
                               sensitive_features=data_X_y_sf.sensitive_features)
示例#15
0
def test_inconsistent_input_data_types(X, y, sensitive_features, constraints):
    adjusted_predictor = ThresholdOptimizer(
        unconstrained_predictor=ExamplePredictor(), constraints=constraints)

    error_message = INPUT_DATA_FORMAT_ERROR_MESSAGE.format(
        type(X).__name__,
        type(y).__name__,
        type(sensitive_features).__name__)

    if X is None or y is None and sensitive_features is None:
        with pytest.raises(TypeError) as exception:
            adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
        assert str(exception.value) == error_message
示例#16
0
def test_threshold_optimization_degenerate_labels(X_transform, y_transform,
                                                  sensitive_features_transform,
                                                  constraints):
    X = X_transform(_format_as_list_of_lists(sensitive_features_ex1))
    y = y_transform(degenerate_labels_ex)
    sensitive_features = sensitive_features_transform(sensitive_features_ex1)

    adjusted_predictor = ThresholdOptimizer(
        unconstrained_predictor=ExamplePredictor(), constraints=constraints)

    with pytest.raises(ValueError,
                       match=DEGENERATE_LABELS_ERROR_MESSAGE.format('A')):
        adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
示例#17
0
def test_predict_output_0_or_1(sensitive_features, sensitive_feature_names,
                               X_transform, y_transform,
                               sensitive_features_transform, constraints):
    X = X_transform(_format_as_list_of_lists(sensitive_features))
    y = y_transform(labels_ex)
    sensitive_features_ = sensitive_features_transform(sensitive_features)
    adjusted_predictor = ThresholdOptimizer(
        unconstrained_predictor=ExamplePredictor(), constraints=constraints)
    adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)

    predictions = adjusted_predictor.predict(
        X, sensitive_features=sensitive_features_)
    for prediction in predictions:
        assert prediction in [0, 1]
示例#18
0
def test_threshold_optimization_non_binary_labels(X_transform, y_transform,
                                                  sensitive_features_transform,
                                                  constraints):
    non_binary_labels = copy.deepcopy(labels_ex)
    non_binary_labels[0] = 2

    X = X_transform(_format_as_list_of_lists(sensitive_features_ex1))
    y = y_transform(non_binary_labels)
    sensitive_features = sensitive_features_transform(sensitive_features_ex1)

    adjusted_predictor = ThresholdOptimizer(
        unconstrained_predictor=ExamplePredictor(), constraints=constraints)

    with pytest.raises(ValueError, match=NON_BINARY_LABELS_ERROR_MESSAGE):
        adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
示例#19
0
def test_no_matplotlib(constraints):
    n_samples = 50
    n_features = 50
    n_sensitive_feature_values = 2
    n_classes = 2

    threshold_optimizer = ThresholdOptimizer(unconstrained_predictor=FakePredictor(),
                                             constraints=constraints,
                                             plot=True)
    with pytest.raises(RuntimeError) as exc:
        threshold_optimizer.fit(X=np.random.random((n_samples, n_features)),
                                y=np.random.randint(n_classes, size=n_samples),
                                sensitive_features=np.random.randint(n_sensitive_feature_values,
                                                                     size=n_samples))
        assert str(exc.value) == _MATPLOTLIB_IMPORT_ERROR_MESSAGE
示例#20
0
def test_predict_multiple_sensitive_features_columns_error(
        sensitive_features, sensitive_feature_names, X_transform, y_transform,
        constraints):
    X = X_transform(_format_as_list_of_lists(sensitive_features))
    y = y_transform(labels_ex)
    sensitive_features_ = pd.DataFrame({
        "A1": sensitive_features,
        "A2": sensitive_features
    })
    adjusted_predictor = ThresholdOptimizer(
        unconstrained_predictor=ExamplePredictor(), constraints=constraints)
    adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)

    with pytest.raises(ValueError,
                       match=MULTIPLE_DATA_COLUMNS_ERROR_MESSAGE.format(
                           "sensitive_features")):
        adjusted_predictor.predict(X, sensitive_features=sensitive_features_)
示例#21
0
def test_threshold_optimization_demographic_parity_e2e(data_X_y_sf):
    adjusted_predictor = ThresholdOptimizer(
        estimator=ExamplePredictor(scores_ex), constraints=DEMOGRAPHIC_PARITY)
    adjusted_predictor.fit(data_X_y_sf.X,
                           data_X_y_sf.y,
                           sensitive_features=data_X_y_sf.sensitive_features)
    predictions = adjusted_predictor._pmf_predict(
        data_X_y_sf.X, sensitive_features=data_X_y_sf.sensitive_features)

    expected_ps = _expected_ps_demographic_parity[data_X_y_sf.example_name]

    # assert demographic parity
    for sensitive_feature_name in data_X_y_sf.feature_names:
        average_probs = np.average(predictions[_map_into_single_column(
            data_X_y_sf.sensitive_features) == sensitive_feature_name],
                                   axis=0)
        assert np.isclose(average_probs[0], expected_ps[_P0])
        assert np.isclose(average_probs[1], expected_ps[_P1])
class demographic_parity_classifier(base_binary_classifier):
    def fit(self, _X, _Y, _classifier_name="logistic", _predictor="hard"):
        my_erm_classifier = erm_classifier(self.train_X, self.train_Y)
        my_erm_classifier.fit(self.train_X, self.train_Y, classifier_name=_classifier_name)
        self.model = ThresholdOptimizer(estimator=my_erm_classifier, \
                constraints="demographic_parity", prefit=True)
        self.model.fit(self.train_X, self.train_Y, \
                sensitive_features=self.sensitive_train, _predictor=_predictor) 
    
    def predict(self, x_samples, sensitive_features):
        y_samples = self.model.predict(x_samples, sensitive_features=sensitive_features)
        return y_samples
    
    def get_accuracy(self, X, y_true, sensitive_features):
        y_pred = self.predict(X, sensitive_features)
        return 1 - np.sum(np.power(y_pred - y_true, 2))/len(y_true) 

    def predict_proba(self, x_samples, sensitive_features):
        y_samples = self.model._pmf_predict(x_samples, sensitive_features=sensitive_features)
        return y_samples
def test_predict_method(predict_method):
    class Dummy(BaseEstimator, ClassifierMixin):
        def fit(self, X, y):
            return self

        def predict(self, X):
            raise Exception("predict")

        def predict_proba(self, X):
            raise Exception("predict_proba")

        def decision_function(self, X):
            raise Exception("decision_function")

    X, y = make_classification()
    sensitive_feature = np.random.randint(0, 2, len(y))
    clf = ThresholdOptimizer(estimator=Dummy(), predict_method=predict_method)
    exception = "predict_proba" if predict_method == "auto" else predict_method
    with pytest.raises(Exception, match=exception):
        clf.fit(X, y, sensitive_features=sensitive_feature)
示例#24
0
def test_threshold_optimization_demographic_parity_e2e(
        sensitive_features, sensitive_feature_names, expected_p0, expected_p1,
        X_transform, y_transform, sensitive_features_transform):
    X = X_transform(_format_as_list_of_lists(sensitive_features))
    y = y_transform(labels_ex)
    sensitive_features_ = sensitive_features_transform(sensitive_features)
    adjusted_predictor = ThresholdOptimizer(
        unconstrained_predictor=ExamplePredictor(),
        constraints=DEMOGRAPHIC_PARITY)
    adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)

    predictions = adjusted_predictor._pmf_predict(
        X, sensitive_features=sensitive_features_)

    # assert demographic parity
    for sensitive_feature_name in sensitive_feature_names:
        average_probs = np.average(predictions[np.array(sensitive_features) ==
                                               sensitive_feature_name],
                                   axis=0)
        assert np.isclose(average_probs[0], expected_p0)
        assert np.isclose(average_probs[1], expected_p1)
示例#25
0
def run_thresholdoptimizer_classification(estimator):
    """Run classification test with ThresholdOptimizer."""
    X_train, Y_train, A_train, X_test, Y_test, A_test = fetch_adult()

    unmitigated = copy.deepcopy(estimator)
    unmitigated.fit(X_train, Y_train)
    unmitigated_predictions = unmitigated.predict(X_test)

    to = ThresholdOptimizer(estimator=estimator,
                            prefit=False,
                            predict_method='predict')
    to.fit(X_train, Y_train, sensitive_features=A_train)

    mitigated_predictions = to.predict(X_test, sensitive_features=A_test)

    dp_diff_unmitigated = demographic_parity_difference(
        Y_test, unmitigated_predictions, sensitive_features=A_test)

    dp_diff_mitigated = demographic_parity_difference(
        Y_test, mitigated_predictions, sensitive_features=A_test)
    assert dp_diff_mitigated <= dp_diff_unmitigated
def test_none_input_data(X, y, sensitive_features, constraints):
    adjusted_predictor = ThresholdOptimizer(estimator=ExamplePredictor(scores_ex),
                                            constraints=constraints,
                                            predict_method='predict')

    if y is None:
        with pytest.raises(ValueError) as exception:
            adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
        assert str(exception.value) == _MESSAGE_Y_NONE
    elif X is None:
        with pytest.raises(ValueError) as exception:
            adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
        assert "Expected 2D array, got scalar array instead" in str(exception.value)
    elif sensitive_features is None:
        with pytest.raises(ValueError) as exception:
            adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
        assert str(exception.value) == _MESSAGE_SENSITIVE_FEATURES_NONE
    else:
        # skip since no arguments are None
        pass
def test_threshold_optimization_equalized_odds(score_transform, y_transform,
                                               sensitive_features_transform):
    y = y_transform(labels_ex)
    sensitive_features = sensitive_features_transform(sensitive_features_ex1)
    # PassThroughPredictor takes scores_ex as input in predict and
    # returns score_transform(scores_ex) as output
    estimator = ThresholdOptimizer(estimator=PassThroughPredictor(score_transform),
                                   constraints='equalized_odds',
                                   flip=True,
                                   predict_method='predict')
    estimator.fit(pd.DataFrame(scores_ex), y, sensitive_features=sensitive_features)

    def prob_pred(sensitive_features, scores):
        return estimator._pmf_predict(
            pd.DataFrame(scores), sensitive_features=sensitive_features)[0, 1]

    # For Equalized Odds we need to factor in that the output is calculated by
    # p_ignore * prediction_constant + (1 - p_ignore) * (p0 * pred0(x) + p1 * pred1(x))
    # with p_ignore != 0 and prediction_constant != 0 for at least some sensitive feature values.
    prediction_constant = 0.334

    # sensitive feature value A
    # p_ignore is almost 0 which means there's almost no adjustment
    p_ignore = 0.001996007984031716
    base_value = prediction_constant * p_ignore
    value_for_less_than_2_5 = base_value + (1 - p_ignore) * 0.668

    assert np.isclose(value_for_less_than_2_5,
                      prob_pred([sensitive_feature_names_ex1[0]], [0]))
    assert np.isclose(value_for_less_than_2_5,
                      prob_pred([sensitive_feature_names_ex1[0]], [2.499]))
    assert base_value == prob_pred([sensitive_feature_names_ex1[0]], [2.5])
    assert base_value == prob_pred([sensitive_feature_names_ex1[0]], [100])

    # sensitive feature value B
    # p_ignore is the largest among the three classes indicating a large adjustment
    p_ignore = 0.1991991991991991
    base_value = prediction_constant * p_ignore
    value_for_less_than_0_5 = base_value + (1 - p_ignore) * 0.001
    assert np.isclose(value_for_less_than_0_5,
                      prob_pred([sensitive_feature_names_ex1[1]], [0]))
    assert np.isclose(value_for_less_than_0_5,
                      prob_pred([sensitive_feature_names_ex1[1]], [0.5]))
    assert base_value + 1 - \
        p_ignore == prob_pred([sensitive_feature_names_ex1[1]], [0.51])
    assert base_value + 1 - \
        p_ignore == prob_pred([sensitive_feature_names_ex1[1]], [1])
    assert base_value + 1 - \
        p_ignore == prob_pred([sensitive_feature_names_ex1[1]], [100])

    # sensitive feature value C
    # p_ignore is 0 which means there's no adjustment
    p_ignore = 0
    base_value = prediction_constant * p_ignore
    value_between_0_5_and_1_5 = base_value + (1 - p_ignore) * 0.501
    assert base_value == prob_pred([sensitive_feature_names_ex1[2]], [0])
    assert base_value == prob_pred([sensitive_feature_names_ex1[2]], [0.5])
    assert np.isclose(value_between_0_5_and_1_5,
                      prob_pred([sensitive_feature_names_ex1[2]], [0.51]))
    assert np.isclose(value_between_0_5_and_1_5,
                      prob_pred([sensitive_feature_names_ex1[2]], [1]))
    assert np.isclose(value_between_0_5_and_1_5,
                      prob_pred([sensitive_feature_names_ex1[2]], [1.5]))
    assert base_value + 1 - \
        p_ignore == prob_pred([sensitive_feature_names_ex1[2]], [1.51])
    assert base_value + 1 - \
        p_ignore == prob_pred([sensitive_feature_names_ex1[2]], [100])

    # Assert Equalized Odds actually holds
    predictions_by_sensitive_feature = _get_predictions_by_sensitive_feature(
        prob_pred, sensitive_features_ex1, scores_ex, labels_ex)

    def _average_prediction_for_label(label, sensitive_feature_value,
                                      predictions_by_sensitive_feature):
        relevant_predictions = predictions_by_sensitive_feature[sensitive_feature_value]
        predictions_for_label = [lp.prediction for lp in relevant_predictions if lp.label == label]
        sum_of_predictions_for_label = np.sum(predictions_for_label)
        n_predictions_for_label = len([lp for lp in relevant_predictions if lp.label == label])
        return sum_of_predictions_for_label / n_predictions_for_label

    predictions_based_on_label = {0: [], 1: []}
    for label in [0, 1]:
        for sensitive_feature_value in sorted(predictions_by_sensitive_feature):
            predictions_based_on_label[label] \
                .append(_average_prediction_for_label(label, sensitive_feature_value,
                        predictions_by_sensitive_feature))

    # assert counts of positive predictions for negative labels
    assert np.isclose(predictions_based_on_label[0], [0.334] * 3).all()
    # assert counts of positive predictions for positive labels
    assert np.isclose(predictions_based_on_label[1], [0.66733333] * 3).all()
def test_threshold_optimization_demographic_parity(score_transform, y_transform,
                                                   sensitive_features_transform):
    y = y_transform(labels_ex)
    sensitive_features = sensitive_features_transform(sensitive_features_ex1)
    # PassThroughPredictor takes scores_ex as input in predict and
    # returns score_transform(scores_ex) as output
    estimator = ThresholdOptimizer(estimator=PassThroughPredictor(score_transform),
                                   constraints='demographic_parity',
                                   flip=True,
                                   predict_method='predict')
    estimator.fit(pd.DataFrame(scores_ex), y, sensitive_features=sensitive_features)

    def prob_pred(sensitive_features, scores):
        return estimator._pmf_predict(
            pd.DataFrame(scores), sensitive_features=sensitive_features)[0, 1]

    # For Demographic Parity we can ignore p_ignore since it's always 0.

    # sensitive feature value A
    value_for_less_than_2_5 = 0.8008
    assert np.isclose(value_for_less_than_2_5,
                      prob_pred([sensitive_feature_names_ex1[0]], [0]))
    assert np.isclose(value_for_less_than_2_5,
                      prob_pred([sensitive_feature_names_ex1[0]], [2.499]))
    assert 0 == prob_pred([sensitive_feature_names_ex1[0]], [2.5])
    assert 0 == prob_pred([sensitive_feature_names_ex1[0]], [100])

    # sensitive feature value B
    value_for_less_than_0_5 = 0.00133333333333
    assert np.isclose(value_for_less_than_0_5,
                      prob_pred([sensitive_feature_names_ex1[1]], [0]))
    assert np.isclose(value_for_less_than_0_5,
                      prob_pred([sensitive_feature_names_ex1[1]], [0.5]))
    assert 1 == prob_pred([sensitive_feature_names_ex1[1]], [0.51])
    assert 1 == prob_pred([sensitive_feature_names_ex1[1]], [1])
    assert 1 == prob_pred([sensitive_feature_names_ex1[1]], [100])

    # sensitive feature value C
    value_between_0_5_and_1_5 = 0.608
    assert 0 == prob_pred([sensitive_feature_names_ex1[2]], [0])
    assert 0 == prob_pred([sensitive_feature_names_ex1[2]], [0.5])
    assert np.isclose(value_between_0_5_and_1_5,
                      prob_pred([sensitive_feature_names_ex1[2]], [0.51]))
    assert np.isclose(value_between_0_5_and_1_5,
                      prob_pred([sensitive_feature_names_ex1[2]], [1]))
    assert np.isclose(value_between_0_5_and_1_5,
                      prob_pred([sensitive_feature_names_ex1[2]], [1.5]))
    assert 1 == prob_pred([sensitive_feature_names_ex1[2]], [1.51])
    assert 1 == prob_pred([sensitive_feature_names_ex1[2]], [100])

    # Assert Demographic Parity actually holds
    predictions_by_sensitive_feature = _get_predictions_by_sensitive_feature(
        prob_pred, sensitive_features_ex1, scores_ex, labels_ex)

    def _average_prediction(sensitive_feature_value, predictions_by_sensitive_feature):
        relevant_predictions = predictions_by_sensitive_feature[sensitive_feature_value]
        predictions = [lp.prediction for lp in relevant_predictions]
        return np.sum(predictions) / len(relevant_predictions)

    average_probabilities_by_sensitive_feature = []
    for sensitive_feature_value in sorted(predictions_by_sensitive_feature):
        average_probabilities_by_sensitive_feature \
            .append(_average_prediction(sensitive_feature_value,
                                        predictions_by_sensitive_feature))
    assert np.isclose(average_probabilities_by_sensitive_feature, [0.572] * 3).all()
class fair_classifier(pseudo_classifier):
    def __init__(self, train_X, train_y, train_score_y, sensitive_train, \
            test_X, test_y, test_score_y, sensitive_test, metric, sensitive_features_dict=None, HARD=False):
        self.train_X = train_X
        self.train_Y = train_y
        if HARD:
            self.train_score_Y = np.round(train_score_y)
        else:
            self.train_score_Y = train_score_y
        self.sensitive_train = sensitive_train

        self.test_X = test_X
        self.test_Y = test_y
        if HARD:
            self.test_score_Y = np.round(test_score_y)
        else:
            self.test_score_Y = test_score_y
        self.sensitive_test = sensitive_test

        self.sensitive_features_dict = sensitive_features_dict
        self.erm_classifier = pseudo_classifier(self.train_X, self.train_Y, self.train_score_Y, \
                self.sensitive_train, self.test_X, self.test_Y, self.test_score_Y, self.sensitive_test)
        assert (metric in ["equalized_odds", "demographic_parity"])
        self.metric = metric

    def fit(self):
        self.erm_classifier.fit(self.train_X, self.train_Y)
        self.model = ThresholdOptimizer(estimator=self.erm_classifier,
                                        constraints=self.metric,
                                        prefit=True)
        self.model.fit(self.train_X,
                       self.train_Y,
                       sensitive_features=self.sensitive_train)

    def predict(self, x_samples, sensitive_features):
        y_samples = self.model.predict(x_samples,
                                       sensitive_features=sensitive_features)
        return y_samples

    def get_accuracy(self, X, y_true, sensitive_features):
        y_pred = self.predict(X, sensitive_features)
        return 1 - np.sum(np.power(y_pred - y_true, 2)) / len(y_true)

    def predict_prob(self, x_samples, sensitive_features):
        y_samples = self.model._pmf_predict(
            x_samples, sensitive_features=sensitive_features)
        return y_samples

    def get_avg_group_confusion_matrix(self, sensitive_features, X, true_Y):
        # produces average tp/fp/tn/fn/acc per group
        # Basically get_group_confusion_matrix but modified to return average values where possible
        # For a trained classifier, get the true positive and true negative rates based on
        # group identity. Dobased on groups (currently only works for binary)
        # sensitive_index is the index of the sensitive attribute.
        groups = np.unique(sensitive_features)
        tp_rate = {}
        fp_rate = {}
        tn_rate = {}
        fn_rate = {}

        true_pos_index = np.where(true_Y == 1)
        true_neg_index = np.where(true_Y == 0)

        # Calculate probability of classification for each input
        y_pred_prob = self.predict_prob(X, sensitive_features)
        # Calculate average probability of correct classification (i.e. expected accuracy)
        avg_micro_acc = (np.sum(y_pred_prob[true_pos_index][:, 1]) + np.sum(
            y_pred_prob[true_neg_index][:, 0])) / len(true_Y)
        print("Average Overall Accuracy: ", avg_micro_acc)

        micro_auc = roc_auc_score(true_Y, y_pred_prob[:, 1])
        print("Overall AUC: ", micro_auc)

        out_dict = {}  # The format is: {group:[tp, fp, tn, fn]}

        avg_macro_acc = 0
        macro_auc = 0

        for index, group in enumerate(groups):
            indicies = np.where(sensitive_features == group)[0]
            true_class = true_Y[indicies]
            pred_prob = y_pred_prob[indicies]

            true_pos_index = np.where(true_class == 1)[0]
            true_neg_index = np.where(true_class == 0)[0]
            if len(true_pos_index) == 0 or len(true_neg_index) == 0:
                print("No True positives or no true negatives in this group")
                continue

            # Find avg rates (i.e. avg probability of tp/tn/fp/fn)
            tp = np.sum(pred_prob[true_pos_index][:, 1]) / len(true_pos_index)
            tn = np.sum(pred_prob[true_neg_index][:, 0]) / len(true_neg_index)
            fp = np.sum(pred_prob[true_neg_index][:, 1]) / len(true_neg_index)
            fn = np.sum(pred_prob[true_pos_index][:, 0]) / len(true_pos_index)
            tp_rate[group] = tp
            tn_rate[group] = tn
            fp_rate[group] = fp
            fn_rate[group] = fn

            # Expected accuracy
            accuracy = (np.sum(pred_prob[true_pos_index][:, 1]) + np.sum(
                pred_prob[true_neg_index][:, 0])) / len(true_class)
            avg_macro_acc += accuracy

            auc = roc_auc_score(true_class, pred_prob[:, 1])
            macro_auc += auc

            out_dict[group] = [tp, tn, fp, fn, accuracy, auc]
            print(group, "average confusion matrix")
            if tp == 0 and fp == 0:
                print("None classified as Positive in group", group)
                print("\t Average Group Accuracy: ", accuracy)
            else:
                # Can't compute F1 out of these since dealing with average values
                #precision = tp / (tp + fp)
                #recall = tp / (tp + fn)
                #f1 = 2 * precision * recall / (precision + recall)
                #print("\t F1 score: ", f1)
                print("\t Average Group Accuracy: ", accuracy)
                print("\t Group AUC: ", auc)
                print("\t Average True positive rate:", tp)
                print("\t Average True negative rate:", tn)
                print("\t Average False positive rate:", fp)
                print("\t Average False negative rate:", fn)

        avg_macro_acc /= len(groups)
        macro_auc /= len(groups)

        return out_dict, {
            "Accuracy": (avg_micro_acc, avg_macro_acc),
            "AUC": (micro_auc, macro_auc)
        }
示例#30
0
    def predict(self, X):
        # use predict_proba to get real values instead of 0/1, select only prob for 1
        scores = self.logistic_regression_estimator_.predict_proba(X)[:, 1]
        return scores


from fairlearn.postprocessing import ThresholdOptimizer

estimator_wrapper = LogisticRegressionAsRegression(estimator).fit(
    X_train, y_train)
postprocessed_predictor_EO = ThresholdOptimizer(estimator=estimator_wrapper,
                                                constraints="equalized_odds",
                                                prefit=True)

postprocessed_predictor_EO.fit(X_train,
                               y_train,
                               sensitive_features=sensitive_features_train)

fairness_aware_predictions_EO_train = postprocessed_predictor_EO.predict(
    X_train, sensitive_features=sensitive_features_train)
fairness_aware_predictions_EO_test = postprocessed_predictor_EO.predict(
    X_test, sensitive_features=sensitive_features_test)

# show only test data related plot by default - uncomment the next line to see
# training data plot as well

# show_proportions(
#     X_train, sensitive_features_train, fairness_aware_predictions_EO_train,
#     y_train,
#     description="equalized odds with postprocessed model on training data:",
#     plot_row_index=1)