def test_nb_tnr(): """Test nb tnr.""" data: DataTuple = load_data(nonbinary_toy()) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = SVM() predictions: Prediction = model.run_test(train, test) tnr_score = TNR(pos_class=1).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=2).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=3).score(predictions, test) assert tnr_score == 0.0 tnr_score = TNR(pos_class=4).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=5).score(predictions, test) assert tnr_score == 1.0 with pytest.raises(LabelOutOfBounds): _ = TNR(pos_class=0).score(predictions, test) accs = em.metric_per_sensitive_attribute(predictions, test, TNR()) assert accs == { "sens_0": approx(1.0, abs=0.1), "sens_1": approx(1.0, abs=0.1) } model = LR() predictions = model.run_test(train, test) print([(k, z) for k, z in zip(predictions.hard.values, test.y.values) if k != z]) tnr_score = TNR(pos_class=1).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=2).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=3).score(predictions, test) assert tnr_score == approx(0.7, abs=0.1) tnr_score = TNR(pos_class=4).score(predictions, test) assert tnr_score == approx(0.85, abs=0.1) tnr_score = TNR(pos_class=5).score(predictions, test) assert tnr_score == 1.0 with pytest.raises(LabelOutOfBounds): _ = TNR(pos_class=0).score(predictions, test) tnrs = em.metric_per_sensitive_attribute(predictions, test, TNR()) assert tnrs == { "sens_0": approx(1.0, abs=0.1), "sens_1": approx(1.0, abs=0.1) }
def test_dp_flip_inverted_s(toy_train_test: TrainValPair) -> None: """Test the dem par flipping method.""" train, test = toy_train_test train = train.replace(s=1 - train.s) test = test.replace(s=1 - test.s) train_test = em.concat_tt([train, test], ignore_index=True) in_model: InAlgorithm = LR() assert in_model is not None assert in_model.name == "Logistic Regression (C=1.0)" predictions: Prediction = in_model.run(train, train_test) # seperate out predictions on train set and predictions on test set pred_train = predictions.hard.iloc[: train.y.shape[0]] pred_test = predictions.hard.iloc[train.y.shape[0] :].reset_index(drop=True) assert np.count_nonzero(pred_test.values == 1) == 44 assert np.count_nonzero(pred_test.values == 0) == 36 post_model: PostAlgorithm = DPFlip() assert post_model.name == "DemPar. Post Process" fair_preds = post_model.run(Prediction(pred_train), train, Prediction(pred_test), test) assert np.count_nonzero(fair_preds.hard.values == 1) == 57 assert np.count_nonzero(fair_preds.hard.values == 0) == 23 diffs = em.diff_per_sensitive_attribute( em.metric_per_sensitive_attribute(fair_preds, test, ProbPos()) ) for diff in diffs.values(): assert pytest.approx(diff, abs=1e-2) == 0
def test_post_sep_fit_pred( toy_train_test: TrainValPair, post_model: PostAlgorithm, name: str, num_pos: int ) -> None: """Test the dem par flipping method.""" train, test = toy_train_test train_test = em.concat_tt([train, test], ignore_index=True) in_model: InAlgorithm = LR() assert in_model is not None assert in_model.name == "Logistic Regression (C=1.0)" predictions: Prediction = in_model.run(train, train_test) # seperate out predictions on train set and predictions on test set pred_train = predictions.hard.iloc[: train.y.shape[0]] pred_test = predictions.hard.iloc[train.y.shape[0] :].reset_index(drop=True) assert np.count_nonzero(pred_test.values == 1) == 44 assert np.count_nonzero(pred_test.values == 0) == 36 assert post_model.name == name fair_model = post_model.fit(Prediction(pred_train), train) fair_preds = fair_model.predict(Prediction(pred_test), test) assert np.count_nonzero(fair_preds.hard.values == 1) == num_pos assert np.count_nonzero(fair_preds.hard.values == 0) == len(fair_preds) - num_pos diffs = em.diff_per_sensitive_attribute( em.metric_per_sensitive_attribute(fair_preds, test, ProbPos()) ) if isinstance(post_model, DPFlip): for diff in diffs.values(): assert pytest.approx(diff, abs=1e-2) == 0
def test_issue_431(): """This issue highlighted that error would be raised due to not all values existing in subsets of the data.""" x = pd.DataFrame(np.random.randn(100), columns=["x"]) s = pd.DataFrame(np.random.randn(100), columns=["s"]) y = pd.DataFrame(np.random.randint(0, 5, 100), columns=["y"]) data = DataTuple(x=x, s=s, y=y) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = LR() predictions: Prediction = model.run(train, test) acc_per_sens = metric_per_sensitive_attribute( predictions, test, TPR(pos_class=1, labels=list(range(y.nunique()[0])))) print(acc_per_sens)
def test_plot_evals(): """Test plot evals.""" results: Results = evaluate_models( datasets=[adult(), toy()], preprocess_models=[Upsampler(strategy="preferential")], inprocess_models=[LR(), SVM(kernel="linear"), Kamiran()], metrics=[Accuracy(), CV()], per_sens_metrics=[TPR(), ProbPos()], repeats=3, test_mode=True, delete_prev=True, ) assert results["seed"][0] == results["seed"][1] == results["seed"][2] == 0 assert results["seed"][3] == results["seed"][4] == results["seed"][ 5] == 2410 assert results["seed"][6] == results["seed"][7] == results["seed"][ 8] == 4820 figs_and_plots: List[Tuple[plt.Figure, plt.Axes]] # type: ignore[name-defined] # plot with metrics figs_and_plots = plot_results(results, Accuracy(), ProbPos()) # num(datasets) * num(preprocess) * num(accuracy combinations) * num(prop_pos combinations) assert len( figs_and_plots) == 2 * 2 * 1 * 2 + 4 # TODO: this +4 should be FIXED, # it matches the column name containing a hyphen as a DIFF metric. # plot with column names figs_and_plots = plot_results(results, "Accuracy", "prob_pos_sensitive-attr_0") assert len(figs_and_plots) == 1 * 2 * 1 * 1 with pytest.raises( ValueError, match='No matching columns found for Metric "NMI preds and s".'): plot_results(results, Accuracy(), NMI()) with pytest.raises(ValueError, match='No column named "unknown metric".'): plot_results(results, "unknown metric", Accuracy())
MetricTest(model=SVM(), metric=BalancedAccuracy(), name="Balanced Accuracy", expected=0.923), MetricTest(model=SVM(), metric=NMI(base="s"), name="NMI preds and s", expected=0.102), MetricTest(model=SVM(), metric=CV(), name="CV", expected=0.630), MetricTest(model=SVM(), metric=AverageOddsDiff(), name="AverageOddsDiff", expected=0.105), MetricTest(model=SVM(), metric=Theil(), name="Theil_Index", expected=0.033), MetricTest(model=LR(), metric=Theil(), name="Theil_Index", expected=0.029), MetricTest(model=Kamiran(), metric=Theil(), name="Theil_Index", expected=0.030), MetricTest(model=SVM(), metric=Hsic(), name="HSIC", expected=0.020), MetricTest(model=SVM(), metric=AS(), name="anti_spurious", expected=0.852), MetricTest(model=SVM(), metric=NMI(base="y"), name="NMI preds and y", expected=0.638), ] @pytest.mark.parametrize("model,metric,name,expected", METRIC_TESTS,
"sensitive-attr_1": 0.024, "sensitive-attr_0": 0.045 }, ), PerSensMetricTest( dataset=nonbinary_toy(), classifier=SVM(kernel="linear"), metric=Accuracy(), expected_values={ "sens_1": 1.0, "sens_0": 1.0 }, ), PerSensMetricTest( dataset=nonbinary_toy(), classifier=LR(), metric=Accuracy(), expected_values={ "sens_1": 0.667, "sens_0": 0.727 }, ), ] @pytest.mark.parametrize("dataset,classifier,metric,expected_values", PER_SENS, ids=get_id) def test_metric_per_sens_attr(dataset: Dataset, classifier: InAlgorithm, metric: Metric, expected_values: Dict[str, float]):
model=Agarwal(dir='/tmp', classifier="SVM", fairness="EqOd", kernel="linear"), num_pos=42, ), InprocessTest(name="Blind", model=Blind(), num_pos=48), InprocessTest(name="DemPar. Oracle", model=DPOracle(), num_pos=53), InprocessTest(name="Dist Robust Optim", model=DRO(eta=0.5, dir="/tmp"), num_pos=45), InprocessTest(name="Dist Robust Optim", model=DRO(eta=5.0, dir="/tmp"), num_pos=59), InprocessTest(name="Kamiran & Calders LR", model=Kamiran(), num_pos=44), InprocessTest(name="Logistic Regression (C=1.0)", model=LR(), num_pos=44), InprocessTest(name="Logistic Regression Prob (C=1.0)", model=LRProb(), num_pos=44), InprocessTest(name="LRCV", model=LRCV(), num_pos=40), InprocessTest(name="Majority", model=Majority(), num_pos=80), InprocessTest(name="MLP", model=MLP(), num_pos=43), InprocessTest(name="Oracle", model=Oracle(), num_pos=41), InprocessTest(name="SVM", model=SVM(), num_pos=45), InprocessTest(name="SVM (linear)", model=SVM(kernel="linear"), num_pos=41), ] @pytest.mark.parametrize("name,model,num_pos", INPROCESS_TESTS) def test_inprocess(toy_train_test: TrainTestPair, name: str, model: InAlgorithm, num_pos: int):