def test_simple_inequality(self): a = group_accuracy_score(Y_true, Y_pred, groups) b = group_accuracy_score(Y_true, Y_pred, gr_inv) assert not (a == b) assert not (b == a) assert a != b assert b != a
def test_specific_metrics(self): y_t = [0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1] y_p = [1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0] s_f = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1] exp_acc = group_accuracy_score(y_t, y_p, s_f) exp_roc = group_roc_auc_score(y_t, y_p, s_f) predictions = {"some model": y_p} sensitive_feature = {"my sf": s_f} actual = _create_group_metric_set(y_t, predictions, sensitive_feature, 'binary_classification') # Do some sanity checks validate_dashboard_dictionary(actual) assert actual['trueY'] == y_t assert actual['predictedY'][0] == y_p assert actual['precomputedFeatureBins'][0]['binVector'] == s_f assert len(actual['precomputedMetrics'][0][0]) == 10 # Cross check the two metrics we computed # Comparisons simplified because s_f was already {0,1} actual_acc = actual['precomputedMetrics'][0][0]['accuracy_score'] assert actual_acc['global'] == exp_acc.overall assert actual_acc['bins'] == list(exp_acc.by_group.values()) actual_roc = actual['precomputedMetrics'][0][0][ 'balanced_accuracy_score'] assert actual_roc['global'] == exp_roc.overall assert actual_roc['bins'] == list(exp_roc.by_group.values())
def test_two_models(): # Two models, single sensitive feature vector, no names Y_true = [0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1] Y_pred = [[0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1], [1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0]] a, b = 'a', 'b' sensitive_features = [[b, a, a, b, b, a, a, b, b, a, b, a, b, a, b]] sf_int = [int(x == 'b') for x in sensitive_features[0]] result = create_group_metric_set('binary_classification', Y_true, Y_pred, sensitive_features) assert result['predictionType'] == 'binaryClassification' assert result['schemaType'] == 'groupMetricSet' assert result['schemaVersion'] == 0 assert isinstance(result['trueY'], list) assert np.array_equal(result['trueY'], Y_true) assert isinstance(result['precomputedFeatureBins'], list) assert len(result['precomputedFeatureBins']) == 1 bin_dict = result['precomputedFeatureBins'][0] assert isinstance(bin_dict, dict) assert np.array_equal(bin_dict['binVector'], sf_int) assert np.array_equal(bin_dict['binLabels'], ['a', 'b']) assert isinstance(result['predictedY'], list) assert len(result['predictedY']) == 2 for i in range(2): y_p = result['predictedY'][i] assert isinstance(y_p, list) assert np.array_equal(y_p, Y_pred[i]) assert isinstance(result['precomputedMetrics'], list) assert len(result['precomputedMetrics']) == 1 metrics_group_0 = result['precomputedMetrics'][0] assert isinstance(metrics_group_0, list) assert len(metrics_group_0) == 2 for i in range(2): metrics_g0_m0 = metrics_group_0[i] assert isinstance(metrics_g0_m0, dict) assert len(metrics_g0_m0) == 10 accuracy = metrics_g0_m0['accuracy_score'] assert isinstance(accuracy, dict) gmr = group_accuracy_score(Y_true, Y_pred[i], sensitive_features[0]) assert gmr.overall == pytest.approx(accuracy['global']) assert isinstance(accuracy['bins'], list) assert len(accuracy['bins']) == 2 assert gmr.by_group['a'] == pytest.approx(accuracy['bins'][0]) assert gmr.by_group['b'] == pytest.approx(accuracy['bins'][1]) roc_auc = metrics_g0_m0['balanced_accuracy_score'] assert isinstance(roc_auc, dict) gmr = group_roc_auc_score(Y_true, Y_pred[i], sensitive_features[0]) assert gmr.overall == pytest.approx(roc_auc['global']) assert isinstance(roc_auc['bins'], list) assert len(roc_auc['bins']) == 2 assert gmr.by_group['a'] == pytest.approx(roc_auc['bins'][0]) assert gmr.by_group['b'] == pytest.approx(roc_auc['bins'][1])
def test_mixed_types(self): a = group_accuracy_score(Y_true, Y_pred, groups) b = group_confusion_matrix(Y_true, Y_pred, groups) assert not (a == b) assert not (b == a) assert a != b assert b != a
def test_group_accuracy_score_unnormalized(): result = metrics.group_accuracy_score(Y_true, Y_pred, groups, normalize=False) expected_overall = skm.accuracy_score(Y_true, Y_pred, False) assert result.overall == expected_overall
def test_compute_binary(): target = GroupMetricSet() target.compute(Y_true, Y_pred, groups, model_type=GroupMetricSet.BINARY_CLASSIFICATION) sample_expected = group_accuracy_score(Y_true, Y_pred, groups) assert np.array_equal(Y_true, target.y_true) assert np.array_equal(Y_pred, target.y_pred) assert np.array_equal(groups, target.groups) assert np.array_equal(['0', '1'], target.group_names) assert len(target.metrics) == 10 assert target.metrics[ GroupMetricSet.GROUP_ACCURACY_SCORE].overall == sample_expected.overall for g in np.unique(groups): assert (target.metrics[GroupMetricSet.GROUP_ACCURACY_SCORE].by_group[g] == sample_expected.by_group[g])
def test_argument_types(transform_y_true, transform_y_pred1, transform_group_1): # Three models, two groups, no names Y_true = transform_y_true([0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0]) Y_pred = [[0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1], transform_y_pred1([1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1]), [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0]] g = [[0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1], [4, 5, 6, 6, 5, 4, 4, 5, 5, 6, 6]] Groups = [g[0], transform_group_1(g[1])] result = create_group_metric_set('binary_classification', Y_true, Y_pred, Groups) assert result['predictionType'] == 'binaryClassification' assert result['schemaType'] == 'groupMetricSet' assert result['schemaVersion'] == 0 assert isinstance(result['trueY'], list) assert np.array_equal(result['trueY'], Y_true) assert isinstance(result['precomputedFeatureBins'], list) assert len(result['precomputedFeatureBins']) == 2 bin_dict0 = result['precomputedFeatureBins'][0] assert isinstance(bin_dict0, dict) assert np.array_equal(bin_dict0['binVector'], g[0]) assert np.array_equal(bin_dict0['binLabels'], ['0', '1']) bin_dict1 = result['precomputedFeatureBins'][1] assert isinstance(bin_dict1, dict) assert np.array_equal(bin_dict1['binVector'], [x - 4 for x in g[1]]) assert np.array_equal(bin_dict1['binLabels'], ['4', '5', '6']) assert isinstance(result['predictedY'], list) assert len(result['predictedY']) == 3 for i in range(3): y_p = result['predictedY'][i] assert isinstance(y_p, list) assert np.array_equal(y_p, Y_pred[i]) assert isinstance(result['precomputedMetrics'], list) assert len(result['precomputedMetrics']) == 2 # Check the first grouping (with alphabetical labels) metrics_group_0 = result['precomputedMetrics'][0] assert isinstance(metrics_group_0, list) assert len(metrics_group_0) == 3 # Loop over the models for i in range(3): m_g0 = metrics_group_0[i] assert isinstance(m_g0, dict) assert len(m_g0) == 10 accuracy = m_g0['accuracy_score'] assert isinstance(accuracy, dict) gmr = group_accuracy_score(Y_true, Y_pred[i], Groups[0]) assert gmr.overall == pytest.approx(accuracy['global']) assert isinstance(accuracy['bins'], list) assert len(accuracy['bins']) == 2 assert gmr.by_group[0] == pytest.approx(accuracy['bins'][0]) assert gmr.by_group[1] == pytest.approx(accuracy['bins'][1]) # Check the second grouping (three unique numeric labels) metrics_group_1 = result['precomputedMetrics'][1] assert isinstance(metrics_group_1, list) assert len(metrics_group_1) == 3 # Loop over the models for i in range(3): m_g1 = metrics_group_1[i] assert isinstance(m_g1, dict) assert len(m_g1) == 10 accuracy = m_g1['accuracy_score'] assert isinstance(accuracy, dict) gmr = group_accuracy_score(Y_true, Y_pred[i], Groups[1]) assert gmr.overall == pytest.approx(accuracy['global']) assert isinstance(accuracy['bins'], list) assert len(accuracy['bins']) == 3 # Use the fact that the groups are integers for j in range(3): assert gmr.by_group[j + 4] == pytest.approx(accuracy['bins'][j])
def test_multiple_models_multiple_sensitive_features(): # Three models, two sensitive feature vectors, no names Y_true = [0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0] Y_pred = [[0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1], [1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0]] # First group is just 'a' and 'b'. Second is 4, 5 and 6 sensitive_features = [[ 'a', 'b', 'b', 'a', 'b', 'b', 'b', 'a', 'b', 'b', 'b' ], [4, 5, 6, 6, 5, 4, 4, 5, 5, 6, 6]] sf_int = [int(x == 'b') for x in sensitive_features[0]] result = create_group_metric_set('binary_classification', Y_true, Y_pred, sensitive_features) assert result['predictionType'] == 'binaryClassification' assert result['schemaType'] == 'groupMetricSet' assert result['schemaVersion'] == 0 assert isinstance(result['trueY'], list) assert np.array_equal(result['trueY'], Y_true) assert isinstance(result['precomputedFeatureBins'], list) assert len(result['precomputedFeatureBins']) == 2 bin_dict0 = result['precomputedFeatureBins'][0] assert isinstance(bin_dict0, dict) assert np.array_equal(bin_dict0['binVector'], sf_int) assert np.array_equal(bin_dict0['binLabels'], ['a', 'b']) bin_dict1 = result['precomputedFeatureBins'][1] assert isinstance(bin_dict1, dict) assert np.array_equal(bin_dict1['binVector'], [x - 4 for x in sensitive_features[1]]) assert np.array_equal(bin_dict1['binLabels'], ['4', '5', '6']) assert isinstance(result['predictedY'], list) assert len(result['predictedY']) == 3 for i in range(3): y_p = result['predictedY'][i] assert isinstance(y_p, list) assert np.array_equal(y_p, Y_pred[i]) assert isinstance(result['precomputedMetrics'], list) assert len(result['precomputedMetrics']) == 2 # Check the first grouping (with alphabetical labels) metrics_group_0 = result['precomputedMetrics'][0] assert isinstance(metrics_group_0, list) assert len(metrics_group_0) == 3 # Loop over the models for i in range(3): m_g0 = metrics_group_0[i] assert isinstance(m_g0, dict) assert len(m_g0) == 10 accuracy = m_g0['accuracy_score'] assert isinstance(accuracy, dict) gmr = group_accuracy_score(Y_true, Y_pred[i], sensitive_features[0]) assert gmr.overall == pytest.approx(accuracy['global']) assert isinstance(accuracy['bins'], list) assert len(accuracy['bins']) == 2 assert gmr.by_group['a'] == pytest.approx(accuracy['bins'][0]) assert gmr.by_group['b'] == pytest.approx(accuracy['bins'][1]) roc_auc = m_g0['balanced_accuracy_score'] assert isinstance(roc_auc, dict) gmr = group_roc_auc_score(Y_true, Y_pred[i], sensitive_features[0]) assert gmr.overall == pytest.approx(roc_auc['global']) assert isinstance(roc_auc['bins'], list) assert len(roc_auc['bins']) == 2 assert gmr.by_group['a'] == pytest.approx(roc_auc['bins'][0]) assert gmr.by_group['b'] == pytest.approx(roc_auc['bins'][1]) # Check the second grouping (three unique numeric labels) metrics_group_1 = result['precomputedMetrics'][1] assert isinstance(metrics_group_1, list) assert len(metrics_group_1) == 3 # Loop over the models for i in range(3): m_g1 = metrics_group_1[i] assert isinstance(m_g1, dict) assert len(m_g1) == 10 accuracy = m_g1['accuracy_score'] assert isinstance(accuracy, dict) gmr = group_accuracy_score(Y_true, Y_pred[i], sensitive_features[1]) assert gmr.overall == pytest.approx(accuracy['global']) assert isinstance(accuracy['bins'], list) assert len(accuracy['bins']) == 3 # Use the fact that the groups are integers for j in range(3): assert gmr.by_group[j + 4] == pytest.approx(accuracy['bins'][j]) roc_auc = m_g1['balanced_accuracy_score'] assert isinstance(roc_auc, dict) gmr = group_roc_auc_score(Y_true, Y_pred[i], sensitive_features[1]) assert gmr.overall == pytest.approx(roc_auc['global']) assert isinstance(roc_auc['bins'], list) assert len(roc_auc['bins']) == 3 for i in range(3): assert gmr.by_group[i + 4] == pytest.approx(roc_auc['bins'][i])