def test_bgl_unfair(A_two_dim): a0_count = 5 a1_count = 7 a0_label = 2 a1_label = 3 a0_factor = 1 a1_factor = 16 grid_size = 7 X, Y, A = _simple_regression_data( a0_count, a1_count, a0_factor, a1_factor, a0_label, a1_label, A_two_dim ) bgl_square_loss = BoundedGroupLoss(SquareLoss(-np.inf, np.inf)) grid_search = GridSearch( LinearRegression(), constraints=bgl_square_loss, grid_size=grid_size ) grid_search.fit(X, Y, sensitive_features=A) assert_n_grid_search_results(grid_size, grid_search) test_X = pd.DataFrame( { "actual_feature": [0.2, 0.7], "sensitive_features": [a0_label, a1_label], "constant_ones_feature": [1, 1], } ) best_predict = grid_search.predict(test_X) assert np.allclose([-1.91764706, 9.61176471], best_predict) all_predict = [predictor.predict(test_X) for predictor in grid_search.predictors_] # TODO: investigate where the different outcomes for the first grid point are from, likely # due to some ignored data points at the edge resulting in another solution with the same # least squares loss (i.e. both solutions acceptable). # Reflects https://github.com/fairlearn/fairlearn/issues/265 assert logging_all_close([[3.2, 11.2]], [all_predict[0]]) or logging_all_close( [[3.03010885, 11.2]], [all_predict[0]] ) assert logging_all_close( [ [-3.47346939, 10.64897959], [-2.68, 10.12], [-1.91764706, 9.61176471], [-1.18461538, 9.12307692], [-0.47924528, 8.65283019], [0.2, 0.7], ], all_predict[1:], )
def test_bgl_unfair(A_two_dim): a0_count = 5 a1_count = 7 a0_label = 2 a1_label = 3 a0_factor = 1 a1_factor = 16 X, Y, A = _simple_regression_data(a0_count, a1_count, a0_factor, a1_factor, a0_label, a1_label, A_two_dim) bgl_square_loss = GroupLossMoment(SquareLoss(-np.inf, np.inf)) target = GridSearch(LinearRegression(), constraints=bgl_square_loss, grid_size=7) target.fit(X, Y, sensitive_features=A) assert len(target.all_results) == 7 test_X = pd.DataFrame({ "actual_feature": [0.2, 0.7], "sensitive_features": [a0_label, a1_label], "constant_ones_feature": [1, 1] }) best_predict = target.predict(test_X) assert np.allclose([-1.91764706, 9.61176471], best_predict) all_predict = [r.predictor.predict(test_X) for r in target.all_results] assert logging_all_close( [[3.2, 11.2], [-3.47346939, 10.64897959], [-2.68, 10.12], [-1.91764706, 9.61176471], [-1.18461538, 9.12307692], [-0.47924528, 8.65283019], [0.2, 0.7]], all_predict)