示例#1
0
def test_sample_bad():
    """Bad clustering should score poorly
    """
    clusters = [[1, 1, 0, 0], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.5, 4)
    assert_almost_equal(aul2, 0.5, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 0.6667, 4)
示例#2
0
def test_sample_perverse():
    """Perverese cases are 0.0 < AUL < 0.5
    """
    clusters = [[1], [0, 0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.1111, 4)
    assert_almost_equal(aul2, 0.1111, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 0.0, 4)
示例#3
0
def test_sample_neg_class1():
    """Similar to ``test_sample_perfect`` but have a negative of class 1
    """
    clusters = [[1, 1, 1, 1, 1], [1], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.8690, 4)
    assert_almost_equal(aul2, 0.8690, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 0.9167, 4)
示例#4
0
def test_sample_cluster0_c0():
    """Similar to ``test_sample_perfect`` but have a cluster of class 0
    """
    clusters = [[1, 1, 1, 1], [0, 0], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.6667, 4)
    assert_almost_equal(aul2, 0.6667, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 1.0, 4)
示例#5
0
def test_sample_cluster0_nh():
    """Same as in ``test_sample_perfect`` but cluster 0 not homogeneous
    """
    clusters = [[1, 1, 1, 1, 0], [0], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 0.8, 4)
    assert_almost_equal(aul2, 0.8, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 0.8333, 4)
示例#6
0
def test_sample_perfect():
    """Perfect clustering
    """
    clusters = [[1, 1, 1, 1, 1], [0], [0]]

    aul1 = aul_score_from_labels(*clusters_to_labels(clusters))
    aul2 = aul_score_from_clusters(clusters)
    assert_almost_equal(aul1, 1.0, 4)
    assert_almost_equal(aul2, 1.0, 4)

    auc = RocCurve.from_clusters(clusters).auc_score()
    assert_almost_equal(auc, 1.0, 4)
示例#7
0
def add_ranking_metrics(args, clusters, pairs):
    """Add metrics based on ROC and Lift curves
    """
    args_metrics = utils.METRICS
    if set(utils.ROC_METRICS) & set(args_metrics):
        from lsh_hdc.ranking import RocCurve
        rc = RocCurve.from_clusters(clusters, is_class_pos=class_is_positive)
        if 'roc_auc' in args_metrics:
            pairs.append(('roc_auc', rc.auc_score()))
        if 'roc_max_info' in args_metrics:
            pairs.append(('roc_max_info', rc.max_informedness()))
    if set(utils.LIFT_METRICS) & set(args_metrics):
        from lsh_hdc.ranking import aul_score_from_clusters as aul_score
        clusters_2xc = ([class_is_positive(point) for point in cluster]
                        for cluster in clusters)
        if 'aul_score' in args_metrics:
            pairs.append(('aul_score', aul_score(clusters_2xc)))