def test_argument_types(self, transformX, transformY, transformA, A_two_dim): # This is an expanded-out version of one of the smoke tests X, y, A = _get_data(A_two_dim) merged_A = _map_into_single_column(A) expgrad = ExponentiatedGradient(LeastSquaresBinaryClassifierLearner(), constraints=DemographicParity(), eps=0.1) expgrad.fit(transformX(X), transformY(y), sensitive_features=transformA(A)) Q = expgrad._best_classifier n_classifiers = len(expgrad._classifiers) disparity_moment = DemographicParity() disparity_moment.load_data(X, y, sensitive_features=merged_A) error = ErrorRate() error.load_data(X, y, sensitive_features=merged_A) disparity = disparity_moment.gamma(Q).max() error = error.gamma(Q)[0] assert expgrad._best_gap == pytest.approx(0.0000, abs=_PRECISION) assert expgrad._last_t == 5 assert expgrad._best_t == 5 assert disparity == pytest.approx(0.1, abs=_PRECISION) assert error == pytest.approx(0.25, abs=_PRECISION) assert expgrad._n_oracle_calls == 32 assert n_classifiers == 3
def test_threshold_optimization_equalized_odds_e2e(data_X_y_sf): adjusted_predictor = ThresholdOptimizer( estimator=ExamplePredictor(scores_ex), constraints=EQUALIZED_ODDS) adjusted_predictor.fit(data_X_y_sf.X, data_X_y_sf.y, sensitive_features=data_X_y_sf.sensitive_features) predictions = adjusted_predictor._pmf_predict( data_X_y_sf.X, sensitive_features=data_X_y_sf.sensitive_features) expected_ps = _expected_ps_equalized_odds[data_X_y_sf.example_name] mapped_sensitive_features = _map_into_single_column( data_X_y_sf.sensitive_features) # assert equalized odds for a in data_X_y_sf.feature_names: pos_indices = (mapped_sensitive_features == a) * (labels_ex == 1) neg_indices = (mapped_sensitive_features == a) * (labels_ex == 0) average_probs_positive_indices = np.average(predictions[pos_indices], axis=0) average_probs_negative_indices = np.average(predictions[neg_indices], axis=0) assert np.isclose(average_probs_positive_indices[0], expected_ps[_POS_P0]) assert np.isclose(average_probs_positive_indices[1], expected_ps[_POS_P1]) assert np.isclose(average_probs_negative_indices[0], expected_ps[_NEG_P0]) assert np.isclose(average_probs_negative_indices[1], expected_ps[_NEG_P1])
def test_argument_types(self, transformX, transformY, transformA, A_two_dim): # This is an expanded-out version of one of the smoke tests X, y, A = _get_data(A_two_dim) merged_A = _map_into_single_column(A) expgrad = ExponentiatedGradient(LeastSquaresBinaryClassifierLearner(), constraints=DemographicParity(), eps=0.1) expgrad.fit(transformX(X), transformY(y), sensitive_features=transformA(A)) res = expgrad._expgrad_result._as_dict() Q = res["best_classifier"] res["n_classifiers"] = len(res["classifiers"]) disp = DemographicParity() disp.load_data(X, y, sensitive_features=merged_A) error = ErrorRate() error.load_data(X, y, sensitive_features=merged_A) res["disp"] = disp.gamma(Q).max() res["error"] = error.gamma(Q)[0] assert res["best_gap"] == pytest.approx(0.0000, abs=_PRECISION) assert res["last_t"] == 5 assert res["best_t"] == 5 assert res["disp"] == pytest.approx(0.1, abs=_PRECISION) assert res["error"] == pytest.approx(0.25, abs=_PRECISION) assert res["n_oracle_calls"] == 32 assert res["n_classifiers"] == 3
def _get_predictions_by_sensitive_feature(adjusted_predictor, sensitive_features, scores, labels): labels_and_predictions = defaultdict(list) sensitive_features_mapped = _map_into_single_column(sensitive_features) for i in range(len(sensitive_features_mapped)): labels_and_predictions[sensitive_features_mapped[i]].append( LabelAndPrediction( labels[i], adjusted_predictor([sensitive_features_mapped[i]], [scores[i]]))) return labels_and_predictions
def test_argument_types_ratio_bound(self, transformX, transformY, transformA, A_two_dim): # This is an expanded-out version of one of the smoke tests X, y, A = _get_data(A_two_dim) merged_A = _map_into_single_column(A) transformed_X = transformX(X) transformed_y = transformY(y) transformed_A = transformA(A) eps = 0.1 ratio = 1.0 expgrad = ExponentiatedGradient( LeastSquaresBinaryClassifierLearner(), constraints=DemographicParity(ratio_bound_slack=eps, ratio_bound=ratio), eps=eps, ) expgrad.fit(transformed_X, transformed_y, sensitive_features=transformed_A) def Q(X): return expgrad._pmf_predict(X)[:, 1] n_predictors = len(expgrad.predictors_) disparity_moment = DemographicParity(ratio_bound_slack=eps, ratio_bound=ratio) disparity_moment.load_data(X, y, sensitive_features=merged_A) error = ErrorRate() error.load_data(X, y, sensitive_features=merged_A) disparity = disparity_moment.gamma(Q).max() disp = disparity_moment.gamma(Q) disp_eps = disparity_moment.gamma(Q) - disparity_moment.bound() error = error.gamma(Q)[0] assert expgrad.best_gap_ == pytest.approx(0.0000, abs=_PRECISION) assert expgrad.last_iter_ == 5 assert expgrad.best_iter_ == 5 assert disparity == pytest.approx(0.1, abs=_PRECISION) assert np.all(np.isclose(disp - eps, disp_eps)) assert error == pytest.approx(0.25, abs=_PRECISION) assert expgrad.n_oracle_calls_ == 32 assert n_predictors == 3
def test_threshold_optimization_demographic_parity_e2e(data_X_y_sf): adjusted_predictor = ThresholdOptimizer( estimator=ExamplePredictor(scores_ex), constraints=DEMOGRAPHIC_PARITY) adjusted_predictor.fit(data_X_y_sf.X, data_X_y_sf.y, sensitive_features=data_X_y_sf.sensitive_features) predictions = adjusted_predictor._pmf_predict( data_X_y_sf.X, sensitive_features=data_X_y_sf.sensitive_features) expected_ps = _expected_ps_demographic_parity[data_X_y_sf.example_name] # assert demographic parity for sensitive_feature_name in data_X_y_sf.feature_names: average_probs = np.average(predictions[_map_into_single_column( data_X_y_sf.sensitive_features) == sensitive_feature_name], axis=0) assert np.isclose(average_probs[0], expected_ps[_P0]) assert np.isclose(average_probs[1], expected_ps[_P1])