def test_label_plot(): """Test label plot.""" data: DataTuple = load_data(adult()) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, _ = train_test save_label_plot(train, "./plots/labels.png")
def test_metric_per_sens_attr(dataset: Dataset, classifier: InAlgorithm, metric: Metric, expected_values: Dict[str, float]): """Test accuracy per sens attr.""" data: DataTuple = load_data(dataset) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = classifier predictions: Prediction = model.run(train, test) acc_per_sens = metric_per_sensitive_attribute(predictions, test, metric) try: for key, value in acc_per_sens.items(): assert value == approx(expected_values[key], abs=0.001) except AssertionError: print({key: round(value, 3) for key, value in acc_per_sens.items()}) raise AssertionError acc_per_sens = metric_per_sensitive_attribute(predictions, test, metric, use_sens_name=False) try: for key, value in expected_values.items(): # Check that the sensitive attribute name is now just 'S'. assert acc_per_sens[f"S_{''.join(key.split('_')[1:])}"] == approx( value, abs=0.001) except AssertionError: print({key: round(value, 3) for key, value in acc_per_sens.items()}) raise AssertionError
def test_binning(): """Test binning.""" data: DataTuple = em.load_data(em.adult()) binned: DataTuple = em.bin_cont_feats(data) assert len([col for col in binned.x.columns if col not in data.x.columns]) == 25 assert "age" not in binned.x.columns
def test_nb_acc(): """Test nb acc.""" data: DataTuple = load_data(nonbinary_toy()) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = SVM() predictions: Prediction = model.run_test(train, test) acc_score = Accuracy().score(predictions, test) assert acc_score == 0.1
def test_sequential_split(): """Test sequential split.""" data: DataTuple = em.load_data(em.toy()) train: DataTuple test: DataTuple train, test, _ = em.SequentialSplit(train_percentage=0.8)(data) assert all(data.x.iloc[0] == train.x.iloc[0]) assert all(data.x.iloc[-1] == test.x.iloc[-1]) assert len(train) == 320 assert len(test) == 80
def test_nb_tnr(): """Test nb tnr.""" data: DataTuple = load_data(nonbinary_toy()) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = SVM() predictions: Prediction = model.run_test(train, test) tnr_score = TNR(pos_class=1).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=2).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=3).score(predictions, test) assert tnr_score == 0.0 tnr_score = TNR(pos_class=4).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=5).score(predictions, test) assert tnr_score == 1.0 with pytest.raises(LabelOutOfBounds): _ = TNR(pos_class=0).score(predictions, test) accs = em.metric_per_sensitive_attribute(predictions, test, TNR()) assert accs == { "sens_0": approx(1.0, abs=0.1), "sens_1": approx(1.0, abs=0.1) } model = LR() predictions = model.run_test(train, test) print([(k, z) for k, z in zip(predictions.hard.values, test.y.values) if k != z]) tnr_score = TNR(pos_class=1).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=2).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=3).score(predictions, test) assert tnr_score == approx(0.7, abs=0.1) tnr_score = TNR(pos_class=4).score(predictions, test) assert tnr_score == approx(0.85, abs=0.1) tnr_score = TNR(pos_class=5).score(predictions, test) assert tnr_score == 1.0 with pytest.raises(LabelOutOfBounds): _ = TNR(pos_class=0).score(predictions, test) tnrs = em.metric_per_sensitive_attribute(predictions, test, TNR()) assert tnrs == { "sens_0": approx(1.0, abs=0.1), "sens_1": approx(1.0, abs=0.1) }
def test_train_test_split(): """Test train test split.""" data: DataTuple = em.load_data(em.toy()) train_test: Tuple[DataTuple, DataTuple] = em.train_test_split(data) train, test = train_test assert train is not None assert test is not None assert train.x.shape[0] > test.x.shape[0] assert train.x["a1"].values[0] == 0.2365572108691669 assert train.x["a2"].values[0] == approx(0.008603090240657633, abs=1e-6) assert train.x.shape[0] == train.s.shape[0] assert train.s.shape[0] == train.y.shape[0] num_samples = len(data) len_default = math.floor((num_samples / 100) * 80) assert train.s.shape[0] == len_default assert test.s.shape[0] == num_samples - len_default len_0_9 = math.floor((num_samples / 100) * 90) train, test = em.train_test_split(data, train_percentage=0.9) assert train.s.shape[0] == len_0_9 assert test.s.shape[0] == num_samples - len_0_9 len_0_7 = math.floor((num_samples / 100) * 70) train, test = em.train_test_split(data, train_percentage=0.7) assert train.s.shape[0] == len_0_7 assert test.s.shape[0] == num_samples - len_0_7 len_0_5 = math.floor((num_samples / 100) * 50) train, test = em.train_test_split(data, train_percentage=0.5) assert train.s.shape[0] == len_0_5 assert test.s.shape[0] == num_samples - len_0_5 len_0_3 = math.floor((num_samples / 100) * 30) train, test = em.train_test_split(data, train_percentage=0.3) assert train.s.shape[0] == len_0_3 assert test.s.shape[0] == num_samples - len_0_3 len_0_1 = math.floor((num_samples / 100) * 10) train, test = em.train_test_split(data, train_percentage=0.1) assert train.s.shape[0] == len_0_1 assert test.s.shape[0] == num_samples - len_0_1 len_0_0 = math.floor((num_samples / 100) * 0) train, test = em.train_test_split(data, train_percentage=0.0) assert train.s.shape[0] == len_0_0 assert train.name == "Toy - Train" assert test.s.shape[0] == num_samples - len_0_0 assert test.name == "Toy - Test"
def test_random_seed(): """Test random seed.""" data: DataTuple = em.load_data(em.toy()) train_test_0: Tuple[DataTuple, DataTuple] = em.train_test_split(data) train_0, test_0 = train_test_0 assert train_0 is not None assert test_0 is not None assert train_0.x.shape[0] > test_0.x.shape[0] assert train_0.x["a1"].values[0] == 0.2365572108691669 assert train_0.x["a2"].values[0] == approx(0.008603090240657633, abs=1e-6) assert train_0.x.shape[0] == train_0.s.shape[0] assert train_0.s.shape[0] == train_0.y.shape[0] train_test_1: Tuple[DataTuple, DataTuple] = em.train_test_split(data, random_seed=1) train_1, test_1 = train_test_1 assert train_1 is not None assert test_1 is not None assert train_1.x.shape[0] > test_1.x.shape[0] assert train_1.x["a1"].values[0] == 1.3736566330173798 assert train_1.x["a2"].values[0] == approx(0.21742296144957174, abs=1e-6) assert train_1.x.shape[0] == train_1.s.shape[0] assert train_1.s.shape[0] == train_1.y.shape[0] train_test_2: Tuple[DataTuple, DataTuple] = em.train_test_split(data, random_seed=2) train_2, test_2 = train_test_2 assert train_2 is not None assert test_2 is not None assert train_2.x.shape[0] > test_2.x.shape[0] assert train_2.x["a1"].values[0] == 1.2255705960148289 assert train_2.x["a2"].values[0] == -1.208089015454192 assert train_2.x.shape[0] == train_2.s.shape[0] assert train_2.s.shape[0] == train_2.y.shape[0] train_test_3: Tuple[DataTuple, DataTuple] = em.train_test_split(data, random_seed=3) train_3, test_3 = train_test_3 assert train_3 is not None assert test_3 is not None assert train_3.x.shape[0] > test_3.x.shape[0] assert train_3.x["a1"].values[0] == approx(0.21165963748018515, abs=1e-6) assert train_3.x["a2"].values[0] == -2.425137404779957 assert train_3.x.shape[0] == train_3.s.shape[0] assert train_3.s.shape[0] == train_3.y.shape[0]
def test_corels(toy_train_test: TrainTestPair) -> None: """Test corels.""" model: InAlgorithm = Corels() assert model is not None assert model.name == "CORELS" train_toy, test_toy = toy_train_test with pytest.raises(RuntimeError): model.run(train_toy, test_toy) data: DataTuple = load_data(compas()) train, test = train_test_split(data) predictions: Prediction = model.run(train, test) expected_num_pos = 428 assert predictions.hard.values[predictions.hard.values == 1].shape[0] == expected_num_pos num_neg = predictions.hard.values[predictions.hard.values == 0].shape[0] assert num_neg == len(predictions) - expected_num_pos
def test_dependence_measures_adult() -> None: """Test dependence measures.""" data = load_data(em.adult(split="Sex")) train_percentage = 0.75 unbalanced, balanced, _ = BalancedTestSplit( train_percentage=train_percentage)(data) fair_prediction = Prediction( hard=balanced.y["salary_>50K"]) # predict the balanced label unfair_prediction = Prediction( hard=unbalanced.y["salary_>50K"]) # predict the normal label extremely_unfair_prediction = Prediction( hard=unbalanced.s["sex_Male"]) # predict s # measure the dependence between s and the prediction in several ways assert _compute_di(fair_prediction, balanced) == approx(1, abs=1e-15) assert _compute_di(unfair_prediction, unbalanced) == approx(0.364, abs=3e-3) assert _compute_di(extremely_unfair_prediction, unbalanced) == approx(0, abs=3e-3) assert _compute_inv_cv(fair_prediction, balanced) == approx(0, abs=1e-15) assert _compute_inv_cv(unfair_prediction, unbalanced) == approx(0.199, abs=3e-3) assert _compute_inv_cv(extremely_unfair_prediction, unbalanced) == approx(1, abs=3e-3) nmi = NMI() assert nmi.score(fair_prediction, balanced) == approx(0, abs=1e-15) assert nmi.score(unfair_prediction, unbalanced) == approx(0.0432, abs=3e-4) assert nmi.score(extremely_unfair_prediction, unbalanced) == approx(1, abs=3e-4) yanovich = Yanovich() assert yanovich.score(fair_prediction, balanced) == approx(0, abs=1e-15) assert yanovich.score(unfair_prediction, unbalanced) == approx(0.0396, abs=3e-4) assert yanovich.score(extremely_unfair_prediction, unbalanced) == approx(1, abs=3e-4) renyi = RenyiCorrelation() assert renyi.score(fair_prediction, balanced) == approx(0, abs=1e-15) assert renyi.score(unfair_prediction, unbalanced) == approx(0.216, abs=3e-4) assert renyi.score(extremely_unfair_prediction, unbalanced) == approx(1, abs=3e-4)
def test_tpr_ratio_non_binary_race(): """Test tpr ratio non binary race.""" data: DataTuple = load_data(em.adult("Race")) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = SVM() predictions: Prediction = model.run_test(train, test) tprs = em.metric_per_sensitive_attribute(predictions, test, TPR()) assert TPR().name == "TPR" test_dict = { "race_0": approx(0.37, abs=0.01), "race_1": approx(0.12, abs=0.01), "race_2": approx(0.14, abs=0.01), "race_3": approx(0.12, abs=0.01), "race_4": approx(0.16, abs=0.01), } for key, val in tprs.items(): assert val == test_dict[key] tpr_diff = em.ratio_per_sensitive_attribute(tprs) test_dict = { "race_0/race_1": approx(0.32, abs=0.1), "race_0/race_2": approx(0.37, abs=0.1), "race_0/race_3": approx(0.33, abs=0.1), "race_0/race_4": approx(0.44, abs=0.1), "race_1/race_2": approx(0.88, abs=0.1), "race_1/race_3": approx(0.97, abs=0.1), "race_1/race_4": approx(0.72, abs=0.1), "race_2/race_3": approx(0.91, abs=0.1), "race_2/race_4": approx(0.74, abs=0.1), "race_3/race_4": approx(0.74, abs=0.1), } for key, val in tpr_diff.items(): assert val == test_dict[key]
def test_prop_train_test_split(): """Test prop train test split.""" data: DataTuple = em.load_data(em.toy()) train: DataTuple test: DataTuple train, test, _ = ProportionalSplit(train_percentage=0.8)(data, split_id=0) assert train is not None assert test is not None assert train.x.shape[0] > test.x.shape[0] assert train.x["a1"].values[0] == -0.7135614558562237 assert train.x["a2"].values[0] == 1.1211390799513148 assert train.x.shape[0] == train.s.shape[0] assert train.s.shape[0] == train.y.shape[0] NUM_SAMPLES = len(data) len_default = math.floor((NUM_SAMPLES / 100) * 80) assert train.s.shape[0] == len_default assert test.s.shape[0] == NUM_SAMPLES - len_default # assert that the proportion of s=0 to s=1 has remained the same (and also test for y=0/y=1) assert np.count_nonzero(train.s.to_numpy() == 0) == round( 0.8 * np.count_nonzero(data.s.to_numpy() == 0)) assert np.count_nonzero(train.y.to_numpy() == 0) == round( 0.8 * np.count_nonzero(data.y.to_numpy() == 0)) len_0_9 = math.floor((NUM_SAMPLES / 100) * 90) train, test, _ = ProportionalSplit(train_percentage=0.9)(data, split_id=0) assert train.s.shape[0] == len_0_9 assert test.s.shape[0] == NUM_SAMPLES - len_0_9 assert np.count_nonzero(train.s.to_numpy() == 0) == approx(round( 0.9 * np.count_nonzero(data.s.to_numpy() == 0)), abs=1) assert np.count_nonzero(train.y.to_numpy() == 0) == approx(round( 0.9 * np.count_nonzero(data.y.to_numpy() == 0)), abs=1) len_0_7 = math.floor((NUM_SAMPLES / 100) * 70) train, test, _ = ProportionalSplit(train_percentage=0.7)(data, split_id=0) assert train.s.shape[0] == len_0_7 assert test.s.shape[0] == NUM_SAMPLES - len_0_7 assert np.count_nonzero(train.s.to_numpy() == 0) == approx(round( 0.7 * np.count_nonzero(data.s.to_numpy() == 0)), abs=1) assert np.count_nonzero(train.y.to_numpy() == 0) == approx(round( 0.7 * np.count_nonzero(data.y.to_numpy() == 0)), abs=1) len_0_5 = math.floor((NUM_SAMPLES / 100) * 50) train, test, _ = ProportionalSplit(train_percentage=0.5)(data, split_id=0) assert train.s.shape[0] == len_0_5 assert test.s.shape[0] == NUM_SAMPLES - len_0_5 len_0_3 = math.floor((NUM_SAMPLES / 100) * 30) train, test, _ = ProportionalSplit(train_percentage=0.3)(data, split_id=0) assert train.s.shape[0] == len_0_3 assert test.s.shape[0] == NUM_SAMPLES - len_0_3 len_0_1 = math.floor((NUM_SAMPLES / 100) * 10) train, test, _ = ProportionalSplit(train_percentage=0.1)(data, split_id=0) assert train.s.shape[0] == len_0_1 assert test.s.shape[0] == NUM_SAMPLES - len_0_1 len_0_0 = math.floor((NUM_SAMPLES / 100) * 0) train, test, _ = ProportionalSplit(train_percentage=0.0)(data, split_id=0) assert train.s.shape[0] == len_0_0 assert train.name == "Toy - Train" assert test.s.shape[0] == NUM_SAMPLES - len_0_0 assert test.name == "Toy - Test"