Пример #1
0
def test_bgl_unfair(A_two_dim):
    a0_count = 5
    a1_count = 7

    a0_label = 2
    a1_label = 3

    a0_factor = 1
    a1_factor = 16

    grid_size = 7

    X, Y, A = _simple_regression_data(
        a0_count, a1_count, a0_factor, a1_factor, a0_label, a1_label, A_two_dim
    )

    bgl_square_loss = BoundedGroupLoss(SquareLoss(-np.inf, np.inf))
    grid_search = GridSearch(
        LinearRegression(), constraints=bgl_square_loss, grid_size=grid_size
    )

    grid_search.fit(X, Y, sensitive_features=A)

    assert_n_grid_search_results(grid_size, grid_search)

    test_X = pd.DataFrame(
        {
            "actual_feature": [0.2, 0.7],
            "sensitive_features": [a0_label, a1_label],
            "constant_ones_feature": [1, 1],
        }
    )

    best_predict = grid_search.predict(test_X)
    assert np.allclose([-1.91764706, 9.61176471], best_predict)

    all_predict = [predictor.predict(test_X) for predictor in grid_search.predictors_]

    # TODO: investigate where the different outcomes for the first grid point are from, likely
    # due to some ignored data points at the edge resulting in another solution with the same
    # least squares loss (i.e. both solutions acceptable).
    # Reflects https://github.com/fairlearn/fairlearn/issues/265
    assert logging_all_close([[3.2, 11.2]], [all_predict[0]]) or logging_all_close(
        [[3.03010885, 11.2]], [all_predict[0]]
    )

    assert logging_all_close(
        [
            [-3.47346939, 10.64897959],
            [-2.68, 10.12],
            [-1.91764706, 9.61176471],
            [-1.18461538, 9.12307692],
            [-0.47924528, 8.65283019],
            [0.2, 0.7],
        ],
        all_predict[1:],
    )
Пример #2
0
def test_bgl_unfair(A_two_dim):
    a0_count = 5
    a1_count = 7

    a0_label = 2
    a1_label = 3

    a0_factor = 1
    a1_factor = 16

    X, Y, A = _simple_regression_data(a0_count, a1_count, a0_factor, a1_factor,
                                      a0_label, a1_label, A_two_dim)

    bgl_square_loss = GroupLossMoment(SquareLoss(-np.inf, np.inf))
    target = GridSearch(LinearRegression(),
                        constraints=bgl_square_loss,
                        grid_size=7)

    target.fit(X, Y, sensitive_features=A)

    assert len(target.all_results) == 7

    test_X = pd.DataFrame({
        "actual_feature": [0.2, 0.7],
        "sensitive_features": [a0_label, a1_label],
        "constant_ones_feature": [1, 1]
    })

    best_predict = target.predict(test_X)
    assert np.allclose([-1.91764706, 9.61176471], best_predict)

    all_predict = [r.predictor.predict(test_X) for r in target.all_results]

    assert logging_all_close(
        [[3.2, 11.2], [-3.47346939, 10.64897959], [-2.68, 10.12],
         [-1.91764706, 9.61176471], [-1.18461538, 9.12307692],
         [-0.47924528, 8.65283019], [0.2, 0.7]], all_predict)