def _selected_label_compare(moment, metric, selected_label): # Similar to _simple_compare, but we need to worry about the y label X, y = loan_scenario_generator(n, f, sfs, ibs, seed=7132752) X_dummy = pd.get_dummies(X) est = LogisticRegression() est.fit(X_dummy, y) y_pred = est.predict(X_dummy) target = moment() target.load_data(np.asarray(X_dummy), np.asarray(y), sensitive_features=X['sens'], control_features=X['ctrl']) # gamma measures the constraint violation relative to the overall value results = target.gamma(est.predict) # Compute the constraint violation using the metrics mf_pred = MetricFrame(metrics=metric, y_true=y, y_pred=y_pred, sensitive_features=X['sens'], control_features=X['ctrl']) diffs = mf_pred.by_group - mf_pred.overall # Compare (with a very small amount of wriggle room) for ib in ibs: for sf in sfs: # Format defined within utility_parity._combine_event_and_control label_format = "control={0},label={1}" label = label_format.format(ib, selected_label) assert diffs[(ib, sf)] == pytest.approx(results[('+', label, sf)], rel=1e-10, abs=1e-12) assert diffs[(ib, sf)] == pytest.approx(-results[('-', label, sf)], rel=1e-10, abs=1e-12)
def _simple_compare(moment, metric): X, y = loan_scenario_generator(n, f, sfs, ibs, seed=7632752) X_dummy = pd.get_dummies(X) est = LogisticRegression() est.fit(X_dummy, y) y_pred = est.predict(X_dummy) target = moment() target.load_data(np.asarray(X_dummy), np.asarray(y), sensitive_features=X['sens'], control_features=X['ctrl']) # gamma measures the constraint violation relative to the overall value results = target.gamma(est.predict) # Compute the constraint violation using the metrics mf_pred = MetricFrame(metrics=metric, y_true=y, y_pred=y_pred, sensitive_features=X['sens'], control_features=X['ctrl']) diffs = mf_pred.by_group - mf_pred.overall # Compare (with a very small amount of wriggle room) for ib in ibs: for sf in sfs: event_format = "control={0},all" assert diffs[(ib, sf)] == pytest.approx(results[('+', event_format.format(ib), sf)], rel=1e-10, abs=1e-12) assert diffs[(ib, sf)] == pytest.approx(-results[('-', event_format.format(ib), sf)], rel=1e-10, abs=1e-12)
def test_equalized_odds(): # Have to do this one longhand, since it combines tpr and fpr X, y = loan_scenario_generator(n, f, sfs, ibs, seed=632753) X_dummy = pd.get_dummies(X) metrics = {"tpr": true_positive_rate, "fpr": false_positive_rate} unmitigated = LogisticRegression() unmitigated.fit(X_dummy, y) y_pred = unmitigated.predict(X_dummy) mf_unmitigated = MetricFrame( metrics=metrics, y_true=y, y_pred=y_pred, sensitive_features=X["sens"], control_features=X["ctrl"], ) expgrad_basic = ExponentiatedGradient( LogisticRegression(), constraints=EqualizedOdds(difference_bound=0.01), eps=0.01) expgrad_basic.fit(X_dummy, y, sensitive_features=X["sens"]) y_pred_basic = expgrad_basic.predict(X_dummy, random_state=9235) mf_basic = MetricFrame( metrics=metrics, y_true=y, y_pred=y_pred_basic, sensitive_features=X["sens"], control_features=X["ctrl"], ) expgrad_control = ExponentiatedGradient( LogisticRegression(), constraints=EqualizedOdds(difference_bound=0.01), eps=0.01) expgrad_control.fit(X_dummy, y, sensitive_features=X["sens"], control_features=X["ctrl"]) y_pred_control = expgrad_control.predict(X_dummy, random_state=8152) mf_control = MetricFrame( metrics=metrics, y_true=y, y_pred=y_pred_control, sensitive_features=X["sens"], control_features=X["ctrl"], ) compare_unmitigated = mf_control.difference( method="to_overall") <= mf_unmitigated.difference(method="to_overall") print(compare_unmitigated) compare_basic = mf_control.difference( method="to_overall") <= mf_basic.difference(method="to_overall") print(compare_basic) assert compare_basic.values.reshape(6).all() assert compare_unmitigated.values.reshape(6).all()
def run_comparisons(moment, metric_fn): X, y = loan_scenario_generator(n, f, sfs, ibs, seed=163) X_dummy = pd.get_dummies(X) mf_input = MetricFrame(metric_fn, y, y, sensitive_features=X['sens'], control_features=X['ctrl']) print("Metric for input:\n", mf_input.by_group) print("Input Metric differences:\n", mf_input.difference(method='to_overall'), "\n") unmitigated = LogisticRegression() unmitigated.fit(X_dummy, y) y_pred = unmitigated.predict(X_dummy) mf_unmitigated = MetricFrame(metric_fn, y, y_pred, sensitive_features=X['sens'], control_features=X['ctrl']) print("Unmitigated metric:\n", mf_unmitigated.by_group) print("Unmitigated metric differences:\n", mf_unmitigated.difference(method='to_overall'), "\n") expgrad_basic = ExponentiatedGradient( LogisticRegression(), constraints=moment(), eps=0.005) expgrad_basic.fit(X_dummy, y, sensitive_features=X['sens']) y_pred_basic = expgrad_basic.predict(X_dummy, random_state=8235) mf_basic = MetricFrame(metric_fn, y, y_pred_basic, sensitive_features=X['sens'], control_features=X['ctrl']) print("Basic expgrad metric:\n", mf_basic.by_group) print("Basic expgrad metric differences:\n", mf_basic.difference(method='to_overall'), "\n") expgrad_control = ExponentiatedGradient( LogisticRegression(), constraints=moment(), eps=0.005) expgrad_control.fit(X_dummy, y, sensitive_features=X['sens'], control_features=X['ctrl']) y_pred_control = expgrad_control.predict(X_dummy, random_state=852) mf_control = MetricFrame(metric_fn, y, y_pred_control, sensitive_features=X['sens'], control_features=X['ctrl']) print("expgrad_control metric:\n", mf_control.by_group) print("expgrad_control metric differences:\n", mf_control.difference(method='to_overall')) assert (mf_control.difference(method='to_overall') <= mf_unmitigated.difference(method='to_overall')).all() assert (mf_control.difference(method='to_overall') <= mf_basic.difference(method='to_overall')).all()