def test_simple_inequality(self):
        a = group_accuracy_score(Y_true, Y_pred, groups)
        b = group_accuracy_score(Y_true, Y_pred, gr_inv)

        assert not (a == b)
        assert not (b == a)
        assert a != b
        assert b != a
示例#2
0
    def test_specific_metrics(self):
        y_t = [0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1]
        y_p = [1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0]
        s_f = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1]

        exp_acc = group_accuracy_score(y_t, y_p, s_f)
        exp_roc = group_roc_auc_score(y_t, y_p, s_f)

        predictions = {"some model": y_p}
        sensitive_feature = {"my sf": s_f}

        actual = _create_group_metric_set(y_t, predictions, sensitive_feature,
                                          'binary_classification')

        # Do some sanity checks
        validate_dashboard_dictionary(actual)
        assert actual['trueY'] == y_t
        assert actual['predictedY'][0] == y_p
        assert actual['precomputedFeatureBins'][0]['binVector'] == s_f
        assert len(actual['precomputedMetrics'][0][0]) == 10

        # Cross check the two metrics we computed
        # Comparisons simplified because s_f was already {0,1}
        actual_acc = actual['precomputedMetrics'][0][0]['accuracy_score']
        assert actual_acc['global'] == exp_acc.overall
        assert actual_acc['bins'] == list(exp_acc.by_group.values())

        actual_roc = actual['precomputedMetrics'][0][0][
            'balanced_accuracy_score']
        assert actual_roc['global'] == exp_roc.overall
        assert actual_roc['bins'] == list(exp_roc.by_group.values())
示例#3
0
def test_two_models():
    # Two models, single sensitive feature vector, no names
    Y_true = [0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1]
    Y_pred = [[0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1],
              [1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0]]
    a, b = 'a', 'b'
    sensitive_features = [[b, a, a, b, b, a, a, b, b, a, b, a, b, a, b]]
    sf_int = [int(x == 'b') for x in sensitive_features[0]]

    result = create_group_metric_set('binary_classification', Y_true, Y_pred,
                                     sensitive_features)
    assert result['predictionType'] == 'binaryClassification'
    assert result['schemaType'] == 'groupMetricSet'
    assert result['schemaVersion'] == 0

    assert isinstance(result['trueY'], list)
    assert np.array_equal(result['trueY'], Y_true)

    assert isinstance(result['precomputedFeatureBins'], list)
    assert len(result['precomputedFeatureBins']) == 1
    bin_dict = result['precomputedFeatureBins'][0]
    assert isinstance(bin_dict, dict)
    assert np.array_equal(bin_dict['binVector'], sf_int)
    assert np.array_equal(bin_dict['binLabels'], ['a', 'b'])

    assert isinstance(result['predictedY'], list)
    assert len(result['predictedY']) == 2
    for i in range(2):
        y_p = result['predictedY'][i]
        assert isinstance(y_p, list)
        assert np.array_equal(y_p, Y_pred[i])

    assert isinstance(result['precomputedMetrics'], list)
    assert len(result['precomputedMetrics']) == 1
    metrics_group_0 = result['precomputedMetrics'][0]
    assert isinstance(metrics_group_0, list)
    assert len(metrics_group_0) == 2
    for i in range(2):
        metrics_g0_m0 = metrics_group_0[i]
        assert isinstance(metrics_g0_m0, dict)
        assert len(metrics_g0_m0) == 10

        accuracy = metrics_g0_m0['accuracy_score']
        assert isinstance(accuracy, dict)
        gmr = group_accuracy_score(Y_true, Y_pred[i], sensitive_features[0])
        assert gmr.overall == pytest.approx(accuracy['global'])
        assert isinstance(accuracy['bins'], list)
        assert len(accuracy['bins']) == 2
        assert gmr.by_group['a'] == pytest.approx(accuracy['bins'][0])
        assert gmr.by_group['b'] == pytest.approx(accuracy['bins'][1])

        roc_auc = metrics_g0_m0['balanced_accuracy_score']
        assert isinstance(roc_auc, dict)
        gmr = group_roc_auc_score(Y_true, Y_pred[i], sensitive_features[0])
        assert gmr.overall == pytest.approx(roc_auc['global'])
        assert isinstance(roc_auc['bins'], list)
        assert len(roc_auc['bins']) == 2
        assert gmr.by_group['a'] == pytest.approx(roc_auc['bins'][0])
        assert gmr.by_group['b'] == pytest.approx(roc_auc['bins'][1])
    def test_mixed_types(self):
        a = group_accuracy_score(Y_true, Y_pred, groups)
        b = group_confusion_matrix(Y_true, Y_pred, groups)

        assert not (a == b)
        assert not (b == a)
        assert a != b
        assert b != a
def test_group_accuracy_score_unnormalized():
    result = metrics.group_accuracy_score(Y_true,
                                          Y_pred,
                                          groups,
                                          normalize=False)

    expected_overall = skm.accuracy_score(Y_true, Y_pred, False)

    assert result.overall == expected_overall
def test_compute_binary():
    target = GroupMetricSet()

    target.compute(Y_true,
                   Y_pred,
                   groups,
                   model_type=GroupMetricSet.BINARY_CLASSIFICATION)

    sample_expected = group_accuracy_score(Y_true, Y_pred, groups)

    assert np.array_equal(Y_true, target.y_true)
    assert np.array_equal(Y_pred, target.y_pred)
    assert np.array_equal(groups, target.groups)
    assert np.array_equal(['0', '1'], target.group_names)
    assert len(target.metrics) == 10
    assert target.metrics[
        GroupMetricSet.GROUP_ACCURACY_SCORE].overall == sample_expected.overall
    for g in np.unique(groups):
        assert (target.metrics[GroupMetricSet.GROUP_ACCURACY_SCORE].by_group[g]
                == sample_expected.by_group[g])
示例#7
0
def test_argument_types(transform_y_true, transform_y_pred1,
                        transform_group_1):
    # Three models, two groups, no names
    Y_true = transform_y_true([0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0])
    Y_pred = [[0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1],
              transform_y_pred1([1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1]),
              [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0]]
    g = [[0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1], [4, 5, 6, 6, 5, 4, 4, 5, 5, 6, 6]]
    Groups = [g[0], transform_group_1(g[1])]

    result = create_group_metric_set('binary_classification', Y_true, Y_pred,
                                     Groups)
    assert result['predictionType'] == 'binaryClassification'
    assert result['schemaType'] == 'groupMetricSet'
    assert result['schemaVersion'] == 0

    assert isinstance(result['trueY'], list)
    assert np.array_equal(result['trueY'], Y_true)

    assert isinstance(result['precomputedFeatureBins'], list)
    assert len(result['precomputedFeatureBins']) == 2
    bin_dict0 = result['precomputedFeatureBins'][0]
    assert isinstance(bin_dict0, dict)
    assert np.array_equal(bin_dict0['binVector'], g[0])
    assert np.array_equal(bin_dict0['binLabels'], ['0', '1'])
    bin_dict1 = result['precomputedFeatureBins'][1]
    assert isinstance(bin_dict1, dict)
    assert np.array_equal(bin_dict1['binVector'], [x - 4 for x in g[1]])
    assert np.array_equal(bin_dict1['binLabels'], ['4', '5', '6'])

    assert isinstance(result['predictedY'], list)
    assert len(result['predictedY']) == 3
    for i in range(3):
        y_p = result['predictedY'][i]
        assert isinstance(y_p, list)
        assert np.array_equal(y_p, Y_pred[i])

    assert isinstance(result['precomputedMetrics'], list)
    assert len(result['precomputedMetrics']) == 2

    # Check the first grouping (with alphabetical labels)
    metrics_group_0 = result['precomputedMetrics'][0]
    assert isinstance(metrics_group_0, list)
    assert len(metrics_group_0) == 3
    # Loop over the models
    for i in range(3):
        m_g0 = metrics_group_0[i]
        assert isinstance(m_g0, dict)
        assert len(m_g0) == 10
        accuracy = m_g0['accuracy_score']
        assert isinstance(accuracy, dict)
        gmr = group_accuracy_score(Y_true, Y_pred[i], Groups[0])
        assert gmr.overall == pytest.approx(accuracy['global'])
        assert isinstance(accuracy['bins'], list)
        assert len(accuracy['bins']) == 2
        assert gmr.by_group[0] == pytest.approx(accuracy['bins'][0])
        assert gmr.by_group[1] == pytest.approx(accuracy['bins'][1])

    # Check the second grouping (three unique numeric labels)
    metrics_group_1 = result['precomputedMetrics'][1]
    assert isinstance(metrics_group_1, list)
    assert len(metrics_group_1) == 3
    # Loop over the models
    for i in range(3):
        m_g1 = metrics_group_1[i]
        assert isinstance(m_g1, dict)
        assert len(m_g1) == 10
        accuracy = m_g1['accuracy_score']
        assert isinstance(accuracy, dict)
        gmr = group_accuracy_score(Y_true, Y_pred[i], Groups[1])
        assert gmr.overall == pytest.approx(accuracy['global'])
        assert isinstance(accuracy['bins'], list)
        assert len(accuracy['bins']) == 3
        # Use the fact that the groups are integers
        for j in range(3):
            assert gmr.by_group[j + 4] == pytest.approx(accuracy['bins'][j])
示例#8
0
def test_multiple_models_multiple_sensitive_features():
    # Three models, two sensitive feature vectors, no names
    Y_true = [0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0]
    Y_pred = [[0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1],
              [1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1],
              [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0]]
    # First group is just 'a' and 'b'. Second is 4, 5 and 6
    sensitive_features = [[
        'a', 'b', 'b', 'a', 'b', 'b', 'b', 'a', 'b', 'b', 'b'
    ], [4, 5, 6, 6, 5, 4, 4, 5, 5, 6, 6]]
    sf_int = [int(x == 'b') for x in sensitive_features[0]]

    result = create_group_metric_set('binary_classification', Y_true, Y_pred,
                                     sensitive_features)
    assert result['predictionType'] == 'binaryClassification'
    assert result['schemaType'] == 'groupMetricSet'
    assert result['schemaVersion'] == 0

    assert isinstance(result['trueY'], list)
    assert np.array_equal(result['trueY'], Y_true)

    assert isinstance(result['precomputedFeatureBins'], list)
    assert len(result['precomputedFeatureBins']) == 2
    bin_dict0 = result['precomputedFeatureBins'][0]
    assert isinstance(bin_dict0, dict)
    assert np.array_equal(bin_dict0['binVector'], sf_int)
    assert np.array_equal(bin_dict0['binLabels'], ['a', 'b'])
    bin_dict1 = result['precomputedFeatureBins'][1]
    assert isinstance(bin_dict1, dict)
    assert np.array_equal(bin_dict1['binVector'],
                          [x - 4 for x in sensitive_features[1]])
    assert np.array_equal(bin_dict1['binLabels'], ['4', '5', '6'])

    assert isinstance(result['predictedY'], list)
    assert len(result['predictedY']) == 3
    for i in range(3):
        y_p = result['predictedY'][i]
        assert isinstance(y_p, list)
        assert np.array_equal(y_p, Y_pred[i])

    assert isinstance(result['precomputedMetrics'], list)
    assert len(result['precomputedMetrics']) == 2

    # Check the first grouping (with alphabetical labels)
    metrics_group_0 = result['precomputedMetrics'][0]
    assert isinstance(metrics_group_0, list)
    assert len(metrics_group_0) == 3
    # Loop over the models
    for i in range(3):
        m_g0 = metrics_group_0[i]
        assert isinstance(m_g0, dict)
        assert len(m_g0) == 10

        accuracy = m_g0['accuracy_score']
        assert isinstance(accuracy, dict)
        gmr = group_accuracy_score(Y_true, Y_pred[i], sensitive_features[0])
        assert gmr.overall == pytest.approx(accuracy['global'])
        assert isinstance(accuracy['bins'], list)
        assert len(accuracy['bins']) == 2
        assert gmr.by_group['a'] == pytest.approx(accuracy['bins'][0])
        assert gmr.by_group['b'] == pytest.approx(accuracy['bins'][1])

        roc_auc = m_g0['balanced_accuracy_score']
        assert isinstance(roc_auc, dict)
        gmr = group_roc_auc_score(Y_true, Y_pred[i], sensitive_features[0])
        assert gmr.overall == pytest.approx(roc_auc['global'])
        assert isinstance(roc_auc['bins'], list)
        assert len(roc_auc['bins']) == 2
        assert gmr.by_group['a'] == pytest.approx(roc_auc['bins'][0])
        assert gmr.by_group['b'] == pytest.approx(roc_auc['bins'][1])

    # Check the second grouping (three unique numeric labels)
    metrics_group_1 = result['precomputedMetrics'][1]
    assert isinstance(metrics_group_1, list)
    assert len(metrics_group_1) == 3
    # Loop over the models
    for i in range(3):
        m_g1 = metrics_group_1[i]
        assert isinstance(m_g1, dict)
        assert len(m_g1) == 10
        accuracy = m_g1['accuracy_score']
        assert isinstance(accuracy, dict)
        gmr = group_accuracy_score(Y_true, Y_pred[i], sensitive_features[1])
        assert gmr.overall == pytest.approx(accuracy['global'])
        assert isinstance(accuracy['bins'], list)
        assert len(accuracy['bins']) == 3
        # Use the fact that the groups are integers
        for j in range(3):
            assert gmr.by_group[j + 4] == pytest.approx(accuracy['bins'][j])

        roc_auc = m_g1['balanced_accuracy_score']
        assert isinstance(roc_auc, dict)
        gmr = group_roc_auc_score(Y_true, Y_pred[i], sensitive_features[1])
        assert gmr.overall == pytest.approx(roc_auc['global'])
        assert isinstance(roc_auc['bins'], list)
        assert len(roc_auc['bins']) == 3
        for i in range(3):
            assert gmr.by_group[i + 4] == pytest.approx(roc_auc['bins'][i])