def test_error_rate_consistency(self, eps, ratio, pos_copies): learner = LeastSquaresBinaryClassifierLearner() if ratio is None: constraints_moment = EqualizedOdds(difference_bound=eps) else: constraints_moment = EqualizedOdds(ratio_bound=ratio, ratio_bound_slack=eps) results = {} for method in ["costs", "sampling"]: X, y, A = _get_data() if method == "sampling": select = y == 1 X = pd.concat((X,) + (X.loc[select, :],) * pos_copies).values y = pd.concat((y,) + (y[select],) * pos_copies).values A = pd.concat((A,) + (A[select],) * pos_copies).values objective_moment = ErrorRate() else: objective_moment = ErrorRate(costs={"fn": 1.0 + pos_copies, "fp": 1.0}) expgrad = ExponentiatedGradient( learner, constraints=deepcopy(constraints_moment), objective=deepcopy(objective_moment), eps=eps, nu=1e-3, ) expgrad.fit(X, y, sensitive_features=A) # select probability of predicting 1 def Q(X): return expgrad._pmf_predict(X)[:, 1] constraints_eval = deepcopy(constraints_moment) constraints_eval.load_data(X, y, sensitive_features=A) disparity = constraints_eval.gamma(Q).max() objective_eval = deepcopy(objective_moment) objective_eval.load_data(X, y, sensitive_features=A) total_error = objective_eval.gamma(Q)[0] * len(y) results[method] = { "error": objective_eval.gamma(Q)[0], "total_error": total_error, "disp": disparity, "n_predictors": len(expgrad.predictors_), "best_gap": expgrad.best_gap_, "last_iter": expgrad.last_iter_, "best_iter": expgrad.best_iter_, "n_oracle_calls": expgrad.n_oracle_calls_, "n_oracle_calls_dummy_returned": expgrad.n_oracle_calls_dummy_returned_, } self._assert_expgrad_two_states(results["costs"], results["sampling"])
def run_smoke_test(self, data, flipped=False): ratio = 1.0 if "ratio" in data.keys(): ratio = data["ratio"] expgrad = ExponentiatedGradient( self.learner, constraints=data["cons_class"](ratio=ratio), eps=data["eps"]) expgrad.fit(self.X, (self.flipped_y if flipped else self.y), sensitive_features=self.A) def Q(X): return expgrad._pmf_predict(X)[:, 1] n_predictors = len(expgrad._predictors) disparity_moment = data["cons_class"](ratio=ratio) disparity_moment.load_data(self.X, (self.flipped_y if flipped else self.y), sensitive_features=self.A) error = ErrorRate() error.load_data(self.X, (self.flipped_y if flipped else self.y), sensitive_features=self.A) disparity = disparity_moment.gamma(Q).max() error = error.gamma(Q)[0] assert expgrad._best_gap == pytest.approx(data["best_gap"], abs=self._PRECISION) assert expgrad._last_t == data["last_t"] assert expgrad._best_t == data["best_t"] assert disparity == pytest.approx(data["disp"], abs=self._PRECISION) assert error == pytest.approx(data["error"], abs=self._PRECISION) assert expgrad._n_oracle_calls == data["n_oracle_calls"] assert n_predictors == data["n_predictors"]
def run_smoke_test(self, data): expgrad = ExponentiatedGradient(self.learner, constraints=data["cons_class"](), eps=data["eps"]) expgrad.fit(self.X, self.y, sensitive_features=self.A) res = expgrad._expgrad_result._as_dict() Q = res["best_classifier"] res["n_classifiers"] = len(res["classifiers"]) disp = data["cons_class"]() disp.load_data(self.X, self.y, sensitive_features=self.A) error = ErrorRate() error.load_data(self.X, self.y, sensitive_features=self.A) res["disp"] = disp.gamma(Q).max() res["error"] = error.gamma(Q)[0] assert res["best_gap"] == pytest.approx( data["best_gap"], abs=self._PRECISION) assert res["last_t"] == data["last_t"] assert res["best_t"] == data["best_t"] assert res["disp"] == pytest.approx(data["disp"], abs=self._PRECISION) assert res["error"] == pytest.approx( data["error"], abs=self._PRECISION) assert res["n_oracle_calls"] == data["n_oracle_calls"] assert res["n_classifiers"] == data["n_classifiers"]
def run_smoke_test_binary_classification(self, data, flipped=False): learner = LeastSquaresBinaryClassifierLearner() if "ratio" in data.keys(): disparity_moment = data["constraint_class"]( ratio_bound_slack=data["eps"], ratio_bound=data["ratio"]) else: disparity_moment = data["constraint_class"]( difference_bound=data["eps"]) # Create Exponentiated Gradient object with a copy of the constraint. # The original disparity_moment object is used for validation, so the # assumption is that the moment logic is correct in these tests. expgrad = ExponentiatedGradient(learner, constraints=deepcopy(disparity_moment), eps=data["eps"]) X, y, A = _get_data(A_two_dim=False, flip_y=flipped) expgrad.fit(X, y, sensitive_features=A) self._assert_expgrad_state(expgrad, data) # select probability of predicting 1 def Q(X): return expgrad._pmf_predict(X)[:, 1] default_objective = ErrorRate() disparity_moment.load_data(X, y, sensitive_features=A) default_objective.load_data(X, y, sensitive_features=A) disparity = disparity_moment.gamma(Q).max() error = default_objective.gamma(Q)[0] assert disparity == pytest.approx(data["disp"], abs=_PRECISION) assert error == pytest.approx(data["error"], abs=_PRECISION)
def test_argument_types(self, transformX, transformY, transformA, A_two_dim): # This is an expanded-out version of one of the smoke tests X, y, A = _get_data(A_two_dim) merged_A = _map_into_single_column(A) expgrad = ExponentiatedGradient(LeastSquaresBinaryClassifierLearner(), constraints=DemographicParity(), eps=0.1) expgrad.fit(transformX(X), transformY(y), sensitive_features=transformA(A)) Q = expgrad._best_classifier n_classifiers = len(expgrad._classifiers) disparity_moment = DemographicParity() disparity_moment.load_data(X, y, sensitive_features=merged_A) error = ErrorRate() error.load_data(X, y, sensitive_features=merged_A) disparity = disparity_moment.gamma(Q).max() error = error.gamma(Q)[0] assert expgrad._best_gap == pytest.approx(0.0000, abs=_PRECISION) assert expgrad._last_t == 5 assert expgrad._best_t == 5 assert disparity == pytest.approx(0.1, abs=_PRECISION) assert error == pytest.approx(0.25, abs=_PRECISION) assert expgrad._n_oracle_calls == 32 assert n_classifiers == 3
def test_argument_types(self, transformX, transformY, transformA): # This is an expanded-out version of one of the smoke tests expgrad = ExponentiatedGradient(self.learner, constraints=DemographicParity(), eps=0.1) expgrad.fit(transformX(self.X), transformY(self.y), sensitive_features=transformA(self.A)) res = expgrad._expgrad_result._as_dict() Q = res["best_classifier"] res["n_classifiers"] = len(res["classifiers"]) disp = DemographicParity() disp.load_data(self.X, self.y, sensitive_features=self.A) error = ErrorRate() error.load_data(self.X, self.y, sensitive_features=self.A) res["disp"] = disp.gamma(Q).max() res["error"] = error.gamma(Q)[0] assert res["best_gap"] == pytest.approx(0.0000, abs=self._PRECISION) assert res["last_t"] == 5 assert res["best_t"] == 5 assert res["disp"] == pytest.approx(0.1, abs=self._PRECISION) assert res["error"] == pytest.approx(0.25, abs=self._PRECISION) assert res["n_oracle_calls"] == 32 assert res["n_classifiers"] == 3
def test_argument_types(self, transformX, transformY, transformA, A_two_dim): # This is an expanded-out version of one of the smoke tests X, y, A = _get_data(A_two_dim) merged_A = _map_into_single_column(A) expgrad = ExponentiatedGradient(LeastSquaresBinaryClassifierLearner(), constraints=DemographicParity(), eps=0.1) expgrad.fit(transformX(X), transformY(y), sensitive_features=transformA(A)) res = expgrad._expgrad_result._as_dict() Q = res["best_classifier"] res["n_classifiers"] = len(res["classifiers"]) disp = DemographicParity() disp.load_data(X, y, sensitive_features=merged_A) error = ErrorRate() error.load_data(X, y, sensitive_features=merged_A) res["disp"] = disp.gamma(Q).max() res["error"] = error.gamma(Q)[0] assert res["best_gap"] == pytest.approx(0.0000, abs=_PRECISION) assert res["last_t"] == 5 assert res["best_t"] == 5 assert res["disp"] == pytest.approx(0.1, abs=_PRECISION) assert res["error"] == pytest.approx(0.25, abs=_PRECISION) assert res["n_oracle_calls"] == 32 assert res["n_classifiers"] == 3
def gridSearch(model, X_train, Y_train, A_train, grid_size): """ Generates a sequence of relabellings and reweightings, and trains a predictor for each. Only applicable for binary feature. Parameters: x_train: input data for training model y_train: list of ground truths model: the unmitigated algorthmic model Returns a dataframe of the different predictors and its accuracy scores and disparity scores. """ sweep = GridSearch(model, constraints=DemographicParity(), grid_size=grid_size) # we extract the full set of predictors from the `GridSearch` object sweep.fit(X_train, Y_train, sensitive_features=A_train) predictors = sweep._predictors """ Remove the predictors which are dominated in the error-disparity space by others from the sweep (note that the disparity will only be calculated for the protected attribute; other potentially protected attributes will not be mitigated) In general, one might not want to do this, since there may be other considerations beyond the strict optimisation of error and disparity (of the given protected attribute). """ errors, disparities = [], [] for m in predictors: classifier = lambda X: m.predict(X) error = ErrorRate() error.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train) disparity = DemographicParity() disparity.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train) errors.append(error.gamma(classifier)[0]) disparities.append(disparity.gamma(classifier).max()) all_results = pd.DataFrame({ "predictor": predictors, "error": errors, "disparity": disparities }) non_dominated = [] for row in all_results.itertuples(): errors_for_lower_or_eq_disparity = all_results["error"][ all_results["disparity"] <= row.disparity] if row.error <= errors_for_lower_or_eq_disparity.min(): non_dominated.append(row.predictor) return non_dominated
def run_smoke_test(self, data, flipped=False): if flipped: y = self.flipped_y else: y = self.y if "ratio" in data.keys(): expgrad = ExponentiatedGradient(self.learner, constraints=data["cons_class"]( ratio_bound_slack=data["eps"], ratio_bound=data["ratio"]), eps=data["eps"]) else: expgrad = ExponentiatedGradient( self.learner, constraints=data["cons_class"](difference_bound=data["eps"]), eps=data["eps"]) expgrad.fit(self.X, y, sensitive_features=self.A) def Q(X): return expgrad._pmf_predict(X)[:, 1] n_predictors = len(expgrad.predictors_) if "ratio" in data.keys(): disparity_moment = data["cons_class"]( ratio_bound_slack=data["eps"], ratio_bound=data["ratio"]) else: disparity_moment = data["cons_class"](difference_bound=data["eps"]) disparity_moment.load_data(self.X, y, sensitive_features=self.A) error = ErrorRate() error.load_data(self.X, y, sensitive_features=self.A) disparity = disparity_moment.gamma(Q).max() error = error.gamma(Q)[0] assert expgrad.best_gap_ == pytest.approx(data["best_gap"], abs=self._PRECISION) assert expgrad.last_iter_ == data["last_iter"] assert expgrad.best_iter_ == data["best_iter"] assert expgrad.last_iter_ >= _MIN_ITER assert disparity == pytest.approx(data["disp"], abs=self._PRECISION) assert error == pytest.approx(data["error"], abs=self._PRECISION) assert expgrad.n_oracle_calls_ == data["n_oracle_calls"] assert expgrad.n_oracle_calls_dummy_returned_ == data[ "n_oracle_calls_dummy_returned"] assert n_predictors == data["n_predictors"] assert len(expgrad.oracle_execution_times_) == expgrad.n_oracle_calls_
def __remove_predictors_dominated_error_disparity_by_sweep(self, predictors, X_train, Y_train, A_train): errors, disparities = [], [] for m in predictors: def classifier(X): return m.predict(X) error = ErrorRate() error.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train.diabetic) disparity = DemographicParity() disparity.load_data(X_train, pd.Series( Y_train), sensitive_features=A_train.diabetic) errors.append(error.gamma(classifier)[0]) disparities.append(disparity.gamma(classifier).max()) return pd.DataFrame({"predictor": predictors, "error": errors, "disparity": disparities})
def test_argument_types_ratio_bound(self, transformX, transformY, transformA, A_two_dim): # This is an expanded-out version of one of the smoke tests X, y, A = _get_data(A_two_dim) merged_A = _map_into_single_column(A) transformed_X = transformX(X) transformed_y = transformY(y) transformed_A = transformA(A) eps = 0.1 ratio = 1.0 expgrad = ExponentiatedGradient( LeastSquaresBinaryClassifierLearner(), constraints=DemographicParity(ratio_bound_slack=eps, ratio_bound=ratio), eps=eps, ) expgrad.fit(transformed_X, transformed_y, sensitive_features=transformed_A) def Q(X): return expgrad._pmf_predict(X)[:, 1] n_predictors = len(expgrad.predictors_) disparity_moment = DemographicParity(ratio_bound_slack=eps, ratio_bound=ratio) disparity_moment.load_data(X, y, sensitive_features=merged_A) error = ErrorRate() error.load_data(X, y, sensitive_features=merged_A) disparity = disparity_moment.gamma(Q).max() disp = disparity_moment.gamma(Q) disp_eps = disparity_moment.gamma(Q) - disparity_moment.bound() error = error.gamma(Q)[0] assert expgrad.best_gap_ == pytest.approx(0.0000, abs=_PRECISION) assert expgrad.last_iter_ == 5 assert expgrad.best_iter_ == 5 assert disparity == pytest.approx(0.1, abs=_PRECISION) assert np.all(np.isclose(disp - eps, disp_eps)) assert error == pytest.approx(0.25, abs=_PRECISION) assert expgrad.n_oracle_calls_ == 32 assert n_predictors == 3
def run_smoke_test(self, data): expgrad = ExponentiatedGradient(self.learner, constraints=data["cons_class"](), eps=data["eps"]) expgrad.fit(self.X, self.y, sensitive_features=self.A) Q = expgrad._best_classifier n_classifiers = len(expgrad._classifiers) disparity_moment = data["cons_class"]() disparity_moment.load_data(self.X, self.y, sensitive_features=self.A) error = ErrorRate() error.load_data(self.X, self.y, sensitive_features=self.A) disparity = disparity_moment.gamma(Q).max() error = error.gamma(Q)[0] assert expgrad._best_gap == pytest.approx(data["best_gap"], abs=self._PRECISION) assert expgrad._last_t == data["last_t"] assert expgrad._best_t == data["best_t"] assert disparity == pytest.approx(data["disp"], abs=self._PRECISION) assert error == pytest.approx(data["error"], abs=self._PRECISION) assert expgrad._n_oracle_calls == data["n_oracle_calls"] assert n_classifiers == data["n_classifiers"]
def test_bad_costs(bad_costs): with pytest.raises(ValueError) as execInfo: _ = ErrorRate(costs=bad_costs) assert _MESSAGE_BAD_COSTS in execInfo.value.args[0]
class TestExponentiatedGradientSmoke: smoke_test_data = [ { "constraint_class": DemographicParity, "eps": 0.100, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.100000, "error": 0.250000, "n_oracle_calls": 32, "n_oracle_calls_dummy_returned": 0, "n_predictors": 3, }, { "constraint_class": DemographicParity, "eps": 0.100, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": -0.020000, "error": 0.250000, "n_oracle_calls": 17, "n_oracle_calls_dummy_returned": 12, "n_predictors": 2, "ratio": 0.8, }, { "constraint_class": DemographicParity, "eps": 0.050, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.050000, "error": 0.266522, "n_oracle_calls": 23, "n_oracle_calls_dummy_returned": 0, "n_predictors": 6, }, { "constraint_class": DemographicParity, "eps": 0.050, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": -0.020000, "error": 0.25, "n_oracle_calls": 17, "n_oracle_calls_dummy_returned": 12, "n_predictors": 2, "ratio": 0.8, }, { "constraint_class": DemographicParity, "eps": 0.020, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.020000, "error": 0.332261, "n_oracle_calls": 22, "n_oracle_calls_dummy_returned": 0, "n_predictors": 5, }, { "constraint_class": DemographicParity, "eps": 0.020, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": -0.020000, "error": 0.25, "n_oracle_calls": 17, "n_oracle_calls_dummy_returned": 12, "n_predictors": 2, "ratio": 0.8, }, { "constraint_class": DemographicParity, "eps": 0.010, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.010000, "error": 0.354174, "n_oracle_calls": 22, "n_oracle_calls_dummy_returned": 0, "n_predictors": 5, }, { "constraint_class": DemographicParity, "eps": 0.010, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": -0.020000, "error": 0.25, "n_oracle_calls": 17, "n_oracle_calls_dummy_returned": 12, "n_predictors": 2, "ratio": 0.8, }, { "constraint_class": DemographicParity, "eps": 0.005, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.005000, "error": 0.365130, "n_oracle_calls": 22, "n_oracle_calls_dummy_returned": 0, "n_predictors": 5, }, { "constraint_class": DemographicParity, "eps": 0.005, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": -0.020000, "error": 0.25, "n_oracle_calls": 17, "n_oracle_calls_dummy_returned": 12, "n_predictors": 2, "ratio": 0.8, }, # ================================================ { "constraint_class": DemographicParity, "eps": 0.050, "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}), "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.050000, "error": 0.407142, "n_oracle_calls": 18, "n_oracle_calls_dummy_returned": 0, "n_predictors": 4, }, { "constraint_class": DemographicParity, "eps": 0.050, "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}), "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.050000, "error": 0.263830, "n_oracle_calls": 21, "n_oracle_calls_dummy_returned": 12, "n_predictors": 3, "ratio": 0.8, }, { "constraint_class": DemographicParity, "eps": 0.020, "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}), "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.020000, "error": 0.422, "n_oracle_calls": 19, "n_oracle_calls_dummy_returned": 0, "n_predictors": 5, }, { "constraint_class": DemographicParity, "eps": 0.020, "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}), "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.020000, "error": 0.286170, "n_oracle_calls": 21, "n_oracle_calls_dummy_returned": 12, "n_predictors": 3, "ratio": 0.8, }, # ================================================ { "constraint_class": EqualizedOdds, "eps": 0.100, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.100000, "error": 0.309333, "n_oracle_calls": 21, "n_oracle_calls_dummy_returned": 0, "n_predictors": 4, }, { "constraint_class": EqualizedOdds, "eps": 0.100, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.100000, "error": 0.25, "n_oracle_calls": 22, "n_oracle_calls_dummy_returned": 12, "n_predictors": 4, "ratio": 0.8, }, { "constraint_class": EqualizedOdds, "eps": 0.050, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.050000, "error": 0.378827, "n_oracle_calls": 19, "n_oracle_calls_dummy_returned": 0, "n_predictors": 6, }, { "constraint_class": EqualizedOdds, "eps": 0.050, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.050000, "error": 0.277016, "n_oracle_calls": 22, "n_oracle_calls_dummy_returned": 12, "n_predictors": 4, "ratio": 0.8, }, { "constraint_class": EqualizedOdds, "eps": 0.020, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.020000, "error": 0.421531, "n_oracle_calls": 19, "n_oracle_calls_dummy_returned": 0, "n_predictors": 6, }, { "constraint_class": EqualizedOdds, "eps": 0.020, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.020000, "error": 0.296612, "n_oracle_calls": 22, "n_oracle_calls_dummy_returned": 12, "n_predictors": 4, "ratio": 0.8, }, { "constraint_class": EqualizedOdds, "eps": 0.010, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.010000, "error": 0.435765, "n_oracle_calls": 19, "n_oracle_calls_dummy_returned": 0, "n_predictors": 6, }, { "constraint_class": EqualizedOdds, "eps": 0.010, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.010000, "error": 0.303145, "n_oracle_calls": 22, "n_oracle_calls_dummy_returned": 12, "n_predictors": 4, "ratio": 0.8, }, { "constraint_class": EqualizedOdds, "eps": 0.005, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.005000, "error": 0.442883, "n_oracle_calls": 19, "n_oracle_calls_dummy_returned": 0, "n_predictors": 6, }, { "constraint_class": EqualizedOdds, "eps": 0.005, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.005000, "error": 0.306411, "n_oracle_calls": 22, "n_oracle_calls_dummy_returned": 12, "n_predictors": 4, "ratio": 0.8, }, # ================================================ { "constraint_class": EqualizedOdds, "eps": 0.050, "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}), "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.050000, "error": 0.4125, "n_oracle_calls": 23, "n_oracle_calls_dummy_returned": 0, "n_predictors": 6, }, { "constraint_class": EqualizedOdds, "eps": 0.050, "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}), "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.050000, "error": 0.324067, "n_oracle_calls": 22, "n_oracle_calls_dummy_returned": 12, "n_predictors": 4, "ratio": 0.8, }, { "constraint_class": EqualizedOdds, "eps": 0.020, "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}), "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.020000, "error": 0.435, "n_oracle_calls": 23, "n_oracle_calls_dummy_returned": 0, "n_predictors": 6, }, { "constraint_class": EqualizedOdds, "eps": 0.020, "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}), "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.020000, "error": 0.339179, "n_oracle_calls": 22, "n_oracle_calls_dummy_returned": 12, "n_predictors": 4, "ratio": 0.8, }, # ================================================ { "constraint_class": ErrorRateParity, "eps": 0.1, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.100000, "error": 0.25625, "n_oracle_calls": 17, "n_oracle_calls_dummy_returned": 0, "n_predictors": 3, }, { "constraint_class": ErrorRateParity, "eps": 0.1, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.092857, "error": 0.25, "n_oracle_calls": 17, "n_oracle_calls_dummy_returned": 0, "n_predictors": 3, "ratio": 0.8, }, { "constraint_class": ErrorRateParity, "eps": 0.05, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.049999, "error": 0.3, "n_oracle_calls": 17, "n_oracle_calls_dummy_returned": 0, "n_predictors": 3, }, { "constraint_class": ErrorRateParity, "eps": 0.05, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.050000, "error": 0.253472, "n_oracle_calls": 26, "n_oracle_calls_dummy_returned": 0, "n_predictors": 6, "ratio": 0.8, }, { "constraint_class": ErrorRateParity, "eps": 0.02, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.019999, "error": 0.326250, "n_oracle_calls": 17, "n_oracle_calls_dummy_returned": 0, "n_predictors": 3, }, { "constraint_class": ErrorRateParity, "eps": 0.02, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.020000, "error": 0.268055, "n_oracle_calls": 26, "n_oracle_calls_dummy_returned": 0, "n_predictors": 5, "ratio": 0.8, }, { "constraint_class": ErrorRateParity, "eps": 0.01, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.010000, "error": 0.325555, "n_oracle_calls": 18, "n_oracle_calls_dummy_returned": 0, "n_predictors": 4, }, { "constraint_class": ErrorRateParity, "eps": 0.01, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.010000, "error": 0.272916, "n_oracle_calls": 26, "n_oracle_calls_dummy_returned": 0, "n_predictors": 5, "ratio": 0.8, }, { "constraint_class": ErrorRateParity, "eps": 0.005, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.005000, "error": 0.329444, "n_oracle_calls": 19, "n_oracle_calls_dummy_returned": 0, "n_predictors": 5, }, { "constraint_class": ErrorRateParity, "eps": 0.005, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.005000, "error": 0.275347, "n_oracle_calls": 26, "n_oracle_calls_dummy_returned": 0, "n_predictors": 5, "ratio": 0.8, }, # ================================================ { "constraint_class": TruePositiveRateParity, "eps": 0.005, "best_gap": 0.0, "last_iter": 5, "best_iter": 5, "disp": 0.005000, "error": 0.25, "n_oracle_calls": 16, "n_oracle_calls_dummy_returned": 0, "n_predictors": 2, }, { "constraint_class": FalsePositiveRateParity, "eps": 0.005, "best_gap": 0.0, "last_iter": 5, "best_iter": 5, "disp": 0.005000, "error": 0.427133, "n_oracle_calls": 17, "n_oracle_calls_dummy_returned": 0, "n_predictors": 3, }, ] smoke_test_data_flipped = [ { "constraint_class": TruePositiveRateParity, "eps": 0.005, "best_gap": 0.0, "last_iter": 5, "best_iter": 5, "disp": 0.005000, "error": 0.427133, "n_oracle_calls": 17, "n_oracle_calls_dummy_returned": 0, "n_predictors": 3, }, { "constraint_class": FalsePositiveRateParity, "eps": 0.005, "best_gap": 0.0, "last_iter": 5, "best_iter": 5, "disp": 0.005000, "error": 0.25, "n_oracle_calls": 16, "n_oracle_calls_dummy_returned": 0, "n_predictors": 2, }, { "constraint_class": EqualizedOdds, "eps": 0.005, "best_gap": 0.000000, "last_iter": 5, "best_iter": 5, "disp": 0.005000, "error": 0.442883, "n_oracle_calls": 19, "n_oracle_calls_dummy_returned": 0, "n_predictors": 6, }, ] smoke_test_data_regression = [ { "constraint_class": BoundedGroupLoss, "loss": SquareLoss(0, 1), "eps": 0.01, "best_gap": 0.003905, "last_iter": 6, "best_iter": 6, "upper_bound": 0.01, # infeasible "disp": [ 0.178333, 0.178333, 0.178333, 0.178333, 0.178333, 0.178333, 0.028045, 0.178333, 0.178333, 0.178333, 0.030853, 0.178333, 0.178333, 0.178333, 0.178333, 0.178333, ], "error": [ 0.1035, 0.1035, 0.1035, 0.1035, 0.1035, 0.1035, 0.024412, 0.1035, 0.1035, 0.1035, 0.025691, 0.1035, 0.1035, 0.1035, 0.1035, 0.1035, ], "weights": [ 0, 0, 0, 0, 0, 0, 0.956748, 0, 0, 0, 0.043251, 0, 0, 0, 0, 0, 0, ], "n_oracle_calls": 23, "n_oracle_calls_dummy_returned": 0, "n_predictors": 16, }, { "constraint_class": BoundedGroupLoss, "loss": SquareLoss(0, 1), "eps": 0.01, "best_gap": 0.0, "last_iter": 5, "best_iter": 5, "upper_bound": 0.05, # feasible "disp": [ 0.178333, 0.178333, 0.036690, 0.178333, 0.178333, 0.178333, 0.178333, ], "error": [0.1035, 0.1035, 0.021988, 0.1035, 0.1035, 0.1035, 0.1035], "weights": [0, 0, 1, 0, 0, 0, 0], "n_oracle_calls": 32, "n_oracle_calls_dummy_returned": 0, "n_predictors": 7, }, { "constraint_class": BoundedGroupLoss, "loss": SquareLoss(0, 1), "eps": 0.01, "best_gap": 0.0, "last_iter": 5, "best_iter": 5, "max_iter": 20, "nu": 1e-6, "upper_bound": 0.05, # feasible "disp": [ 0.178333, 0.178333, 0.036690, 0.178333, 0.178333, 0.178333, 0.178333, ], "error": [0.1035, 0.1035, 0.021988, 0.1035, 0.1035, 0.1035, 0.1035], "weights": [0, 0, 1, 0, 0, 0, 0], "n_oracle_calls": 29, "n_oracle_calls_dummy_returned": 0, "n_predictors": 7, }, { "constraint_class": BoundedGroupLoss, "loss": ZeroOneLoss(), "eps": 0.01, "best_gap": 0.007185, "last_iter": 5, "best_iter": 5, "upper_bound": 0.01, # infeasible "disp": [ 0.383333, 0.383333, 0.383333, 0.383333, 0.1479, 0.383333, 0.383333, 0.383333, 0.140256, 0.383333, 0.383333, 0.383333, 0.383333, 0.383333, ], "error": [ 0.255, 0.255, 0.255, 0.255, 0.140198, 0.255, 0.255, 0.255, 0.135674, 0.255, 0.255, 0.255, 0.255, 0.255, ], "weights": [0, 0, 0, 0, 0.221468, 0, 0, 0, 0.778531, 0, 0, 0, 0, 0], "n_oracle_calls": 20, "n_oracle_calls_dummy_returned": 0, "n_predictors": 14, }, { "constraint_class": BoundedGroupLoss, "loss": ZeroOneLoss(), "eps": 0.01, "best_gap": 0.0, "last_iter": 5, "best_iter": 5, "upper_bound": 0.2, # feasible "disp": [0.383333, 0.383333, 0.166918], "error": [0.255, 0.255, 0.116949], "weights": [0, 0, 1], "n_oracle_calls": 20, "n_oracle_calls_dummy_returned": 0, "n_predictors": 3, }, { "constraint_class": BoundedGroupLoss, "loss": ZeroOneLoss(), "eps": 0.01, "best_gap": 0.0, "last_iter": 5, "best_iter": 5, "max_iter": 20, "nu": 1e-6, "upper_bound": 0.2, # feasible "disp": [0.383333, 0.383333, 0.166918], "error": [0.255, 0.255, 0.116949], "weights": [0, 0, 1], "n_oracle_calls": 17, "n_oracle_calls_dummy_returned": 0, "n_predictors": 3, }, { "constraint_class": BoundedGroupLoss, "loss": AbsoluteLoss(0, 1), "eps": 0.01, "best_gap": 0.007185, "last_iter": 5, "best_iter": 5, "upper_bound": 0.01, # infeasible "disp": [ 0.383333, 0.383333, 0.383333, 0.383333, 0.1479, 0.383333, 0.383333, 0.383333, 0.140256, 0.383333, 0.383333, 0.383333, 0.383333, 0.383333, ], "error": [ 0.255, 0.255, 0.255, 0.255, 0.140198, 0.255, 0.255, 0.255, 0.135674, 0.255, 0.255, 0.255, 0.255, 0.255, ], "weights": [0, 0, 0, 0, 0.221468, 0, 0, 0, 0.778531, 0, 0, 0, 0, 0], "n_oracle_calls": 20, "n_oracle_calls_dummy_returned": 0, "n_predictors": 14, }, ] def run_smoke_test_binary_classification(self, data, flipped=False): learner = LeastSquaresBinaryClassifierLearner() if "ratio" in data.keys(): disparity_moment = data["constraint_class"]( ratio_bound_slack=data["eps"], ratio_bound=data["ratio"] ) else: disparity_moment = data["constraint_class"](difference_bound=data["eps"]) if "objective" in data.keys(): objective_moment = deepcopy(data["objective"]) else: objective_moment = ErrorRate() # Create Exponentiated Gradient object with a copy of the constraint. # The original disparity_moment object is used for validation, so the # assumption is that the moment logic is correct in these tests. expgrad = ExponentiatedGradient( learner, constraints=deepcopy(disparity_moment), objective=deepcopy(objective_moment), eps=data["eps"], ) X, y, A = _get_data(A_two_dim=False, flip_y=flipped) expgrad.fit(X, y, sensitive_features=A) self._assert_expgrad_state(expgrad, data) # select probability of predicting 1 def Q(X): return expgrad._pmf_predict(X)[:, 1] disparity_moment.load_data(X, y, sensitive_features=A) objective_moment.load_data(X, y, sensitive_features=A) disparity = disparity_moment.gamma(Q).max() error = objective_moment.gamma(Q)[0] assert disparity == pytest.approx(data["disp"], abs=_PRECISION) assert error == pytest.approx(data["error"], abs=_PRECISION) @pytest.mark.parametrize("testdata", smoke_test_data) def test_smoke(self, testdata): self.run_smoke_test_binary_classification(testdata) @pytest.mark.parametrize("testdata", smoke_test_data_flipped) def test_smoke_flipped(self, testdata): self.run_smoke_test_binary_classification(testdata, flipped=True) @pytest.mark.parametrize("data", smoke_test_data_regression) def test_smoke_regression(self, data): learner = LeastSquaresRegressor() disparity_moment = data["constraint_class"]( loss=data["loss"], upper_bound=data["upper_bound"] ) # Create Exponentiated Gradient object with a copy of the constraint. # The original disparity_moment object is used for validation, so the # assumption is that the moment logic is correct in these tests. expgrad = ExponentiatedGradient( learner, constraints=deepcopy(disparity_moment), eps=data["eps"], nu=data.get("nu"), max_iter=data.get("max_iter", 50), ) X, y, A = _get_data(A_two_dim=False, y_as_scores=True) expgrad.fit(X, y, sensitive_features=A) self._assert_expgrad_state(expgrad, data) # check all predictors disparity_moment.load_data(X, y, sensitive_features=A) for i in range(len(expgrad.predictors_)): def Q(X): return expgrad._pmf_predict(X)[i] default_objective = MeanLoss(data["loss"]) default_objective.load_data(X, y, sensitive_features=A) disparity = disparity_moment.gamma(Q).max() error = default_objective.gamma(Q)[0] assert disparity == pytest.approx(data["disp"][i], abs=_PRECISION) assert error == pytest.approx(data["error"][i], abs=_PRECISION) assert expgrad.weights_[i] == pytest.approx( data["weights"][i], abs=_PRECISION ) assert sum(expgrad.weights_) == pytest.approx(1, abs=_PRECISION) @pytest.mark.parametrize( "Constraints", [ TruePositiveRateParity, FalsePositiveRateParity, DemographicParity, EqualizedOdds, ErrorRateParity, ], ) def test_simple_fit_predict_binary_classification(self, Constraints): X, y, sensitive_features = _get_data() estimator = LeastSquaresBinaryClassifierLearner() expgrad = ExponentiatedGradient(estimator, Constraints()) expgrad.fit(X, y, sensitive_features=sensitive_features) expgrad.predict(X) @pytest.mark.parametrize( "constraints", [ BoundedGroupLoss(loss=SquareLoss(0, 1), upper_bound=0.01), BoundedGroupLoss(loss=AbsoluteLoss(0, 1), upper_bound=0.01), BoundedGroupLoss(loss=ZeroOneLoss(), upper_bound=0.01), ], ) def test_simple_fit_predict_regression(self, constraints): X, y, sensitive_features = _get_data(y_as_scores=True) estimator = LeastSquaresRegressor() expgrad = ExponentiatedGradient(estimator, constraints) expgrad.fit(X, y, sensitive_features=sensitive_features) expgrad.predict(X) def test_single_y_value(self): # Setup with data designed to result in "all single class" # at some point in the grid X_dict = {"c": [10, 50, 10]} X = pd.DataFrame(X_dict) y = [1, 1, 1] A = ["a", "b", "b"] estimator = LogisticRegression( solver="liblinear", fit_intercept=True, random_state=97 ) expgrad = ExponentiatedGradient(estimator, DemographicParity()) # Following line should not throw an exception expgrad.fit(X, y, sensitive_features=A) # Check the predictors for a ConstantPredictor test_X_dict = {"c": [1, 2, 3, 4, 5, 6]} test_X = pd.DataFrame(test_X_dict) assert expgrad.n_oracle_calls_dummy_returned_ > 0 assert len(expgrad.oracle_execution_times_) == expgrad.n_oracle_calls_ for p in expgrad.predictors_: assert isinstance(p, DummyClassifier) assert np.array_equal(p.predict(test_X), [1, 1, 1, 1, 1, 1]) def _assert_expgrad_state(self, expgrad, data): n_predictors = len(expgrad.predictors_) assert expgrad.best_gap_ == pytest.approx(data["best_gap"], abs=_PRECISION) assert expgrad.best_gap_ < expgrad.nu assert expgrad.last_iter_ == data["last_iter"] assert expgrad.best_iter_ == data["best_iter"] assert expgrad.last_iter_ >= _MIN_ITER assert expgrad.n_oracle_calls_ == data["n_oracle_calls"] assert ( expgrad.n_oracle_calls_dummy_returned_ == data["n_oracle_calls_dummy_returned"] ) assert n_predictors == data["n_predictors"] assert len(expgrad.oracle_execution_times_) == expgrad.n_oracle_calls_ @pytest.mark.parametrize("eps", [0.05, 0.02]) @pytest.mark.parametrize("ratio", [None, 0.8]) @pytest.mark.parametrize("pos_copies", [0, 1, 2]) def test_error_rate_consistency(self, eps, ratio, pos_copies): learner = LeastSquaresBinaryClassifierLearner() if ratio is None: constraints_moment = EqualizedOdds(difference_bound=eps) else: constraints_moment = EqualizedOdds(ratio_bound=ratio, ratio_bound_slack=eps) results = {} for method in ["costs", "sampling"]: X, y, A = _get_data() if method == "sampling": select = y == 1 X = pd.concat((X,) + (X.loc[select, :],) * pos_copies).values y = pd.concat((y,) + (y[select],) * pos_copies).values A = pd.concat((A,) + (A[select],) * pos_copies).values objective_moment = ErrorRate() else: objective_moment = ErrorRate(costs={"fn": 1.0 + pos_copies, "fp": 1.0}) expgrad = ExponentiatedGradient( learner, constraints=deepcopy(constraints_moment), objective=deepcopy(objective_moment), eps=eps, nu=1e-3, ) expgrad.fit(X, y, sensitive_features=A) # select probability of predicting 1 def Q(X): return expgrad._pmf_predict(X)[:, 1] constraints_eval = deepcopy(constraints_moment) constraints_eval.load_data(X, y, sensitive_features=A) disparity = constraints_eval.gamma(Q).max() objective_eval = deepcopy(objective_moment) objective_eval.load_data(X, y, sensitive_features=A) total_error = objective_eval.gamma(Q)[0] * len(y) results[method] = { "error": objective_eval.gamma(Q)[0], "total_error": total_error, "disp": disparity, "n_predictors": len(expgrad.predictors_), "best_gap": expgrad.best_gap_, "last_iter": expgrad.last_iter_, "best_iter": expgrad.best_iter_, "n_oracle_calls": expgrad.n_oracle_calls_, "n_oracle_calls_dummy_returned": expgrad.n_oracle_calls_dummy_returned_, } self._assert_expgrad_two_states(results["costs"], results["sampling"]) def _assert_expgrad_two_states(self, state1, state2): assert state1["total_error"] == pytest.approx( state2["total_error"], abs=_PRECISION ) assert state1["disp"] == pytest.approx(state2["disp"], abs=_PRECISION) assert state1["n_predictors"] == state2["n_predictors"] assert state1["best_gap"] == pytest.approx(state2["best_gap"], abs=_PRECISION) assert state1["last_iter"] == state2["last_iter"] assert state1["best_iter"] == state2["best_iter"] assert state1["n_oracle_calls"] == state2["n_oracle_calls"] assert ( state1["n_oracle_calls_dummy_returned"] == state2["n_oracle_calls_dummy_returned"] )
# We could load these predictors into the Fairness dashboard now. # However, the plot would be somewhat confusing due to their number. # In this case, we are going to remove the predictors which are dominated in the # error-disparity space by others from the sweep (note that the disparity will only be # calculated for the sensitive feature; other potentially sensitive features will # not be mitigated). # In general, one might not want to do this, since there may be other considerations # beyond the strict optimization of error and disparity (of the given sensitive feature). errors, disparities = [], [] for m in predictors: def classifier(X): return m.predict(X) error = ErrorRate() error.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train) disparity = DemographicParity() disparity.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train) errors.append(error.gamma(classifier)[0]) disparities.append(disparity.gamma(classifier).max()) all_results = pd.DataFrame({ "predictor": predictors, "error": errors, "disparity": disparities })