Exemplo n.º 1
0
def test_des_clustering():
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
    rng = np.random.RandomState(123456)

    des_clustering = DESClustering(pool_classifiers, rng=rng)
    des_clustering.fit(X_dsel, y_dsel)
    assert np.isclose(des_clustering.score(X_test, y_test), 0.97872340425531912)
Exemplo n.º 2
0
def test_diversity_metric_ratio():
    test = DESClustering(create_pool_classifiers() * 10, metric='ratio')
    # Mocking this method to avoid preprocessing the cluster information that
    # is not required in this test.
    test._preprocess_clusters = MagicMock(return_value=1)
    test.fit(X_dsel_ex1, y_dsel_ex1)
    assert test.diversity_func_ == ratio_errors
Exemplo n.º 3
0
def test_estimate_competence(create_pool_classifiers,
                             example_estimate_competence):
    query = np.atleast_2d([1, 1])
    clustering_test = DESClustering(create_pool_classifiers * 2,
                                    clustering=KMeans(n_clusters=2),
                                    pct_accuracy=0.5,
                                    pct_diversity=0.33)

    X, y = example_estimate_competence[0:2]

    # Keep the original predict method to change after
    clustering_test.clustering.predict = MagicMock(
        return_value=return_cluster_index_ex2)
    clustering_test.fit(X, y)

    clustering_test.clustering_.predict = MagicMock(return_value=0)
    competences = clustering_test.estimate_competence(query)

    assert np.array_equal(competences,
                          clustering_test.performance_cluster_[0, :])

    clustering_test.clustering_.predict = MagicMock(return_value=1)
    competences = clustering_test.estimate_competence(query)
    assert np.array_equal(competences,
                          clustering_test.performance_cluster_[1, :])
Exemplo n.º 4
0
def test_fit_heterogeneous_clusters(example_estimate_competence,
                                    create_pool_classifiers):
    clustering_test = DESClustering(create_pool_classifiers,
                                    clustering=KMeans(n_clusters=2),
                                    pct_accuracy=0.5,
                                    pct_diversity=0.33)
    X, y = example_estimate_competence[0:2]

    clustering_test.clustering.predict = MagicMock(
        return_value=return_cluster_index_ex2)
    clustering_test.fit(X, y)

    # Index selected should be of any classifier that predicts the label 0
    assert np.isclose(clustering_test.performance_cluster_[:, 1],
                      [0.428, 0.375],
                      atol=0.01).all()
    assert np.isclose(clustering_test.performance_cluster_[:, 0],
                      [0.572, 0.625],
                      atol=0.01).all()
    assert clustering_test.indices_[0,
                                    0] == 0 or clustering_test.indices_[0,
                                                                        0] == 2
    assert clustering_test.indices_[1,
                                    0] == 0 or clustering_test.indices_[1,
                                                                        0] == 2
Exemplo n.º 5
0
def test_diversity_metric_ratio(create_X_y):
    X, y = create_X_y

    test = DESClustering(metric_diversity='ratio')
    # Mocking this method to avoid preprocessing the cluster
    # information that is not required in this test.
    test._preprocess_clusters = MagicMock(return_value=1)
    test.fit(X, y)
    assert test.diversity_func_ == ratio_errors
Exemplo n.º 6
0
def test_des_clustering():
    from sklearn.cluster import KMeans
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
    rng = np.random.RandomState(123456)
    cluster = KMeans(n_clusters=5, random_state=rng)
    des_clustering = DESClustering(pool_classifiers, clustering=cluster)
    des_clustering.fit(X_dsel, y_dsel)
    assert np.isclose(des_clustering.score(X_test, y_test),
                      0.97872340425531912)
Exemplo n.º 7
0
def test_diversity_metric_DF(create_X_y):
    X, y = create_X_y

    test = DESClustering(metric='DF')
    # Mocking this method to avoid preprocessing the cluster
    # information that is not required in this test.
    test._preprocess_clusters = MagicMock(return_value=1)
    test.fit(X, y)
    assert test.diversity_func_ == negative_double_fault
Exemplo n.º 8
0
def test_des_clustering_proba():
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
    rng = np.random.RandomState(123456)

    des_clustering = DESClustering(pool_classifiers, rng=rng)
    des_clustering.fit(X_dsel, y_dsel)
    probas = des_clustering.predict_proba(X_test)
    expected = np.load('deslib/tests/expected_values/des_clustering_proba_integration.npy')
    assert np.allclose(probas, expected)
Exemplo n.º 9
0
def test_des_clustering_proba():
    from sklearn.cluster import KMeans
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
    rng = np.random.RandomState(123456)
    cluster = KMeans(n_clusters=5, random_state=rng)
    des_clustering = DESClustering(pool_classifiers, clustering=cluster)
    des_clustering.fit(X_dsel, y_dsel)
    probas = des_clustering.predict_proba(X_test)
    expected = np.load(
        'deslib/tests/expected_values/des_clustering_proba_integration.npy')
    assert np.allclose(probas, expected)
Exemplo n.º 10
0
def test_fit_homogeneous_clusters():

    clustering_test = DESClustering(create_pool_classifiers()*2, k=2, pct_accuracy=0.5, pct_diversity=0.33)
    clustering_test.roc_algorithm.fit_predict = MagicMock(return_value=return_cluster_index_ex1)

    clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers)
    clustering_test.fit(X_dsel_ex1, y_dsel_ex1)

    assert clustering_test.accuracy_cluster[0, 1] == 0.0 and clustering_test.accuracy_cluster[0, [0, 2]].all() == 1.0
    assert clustering_test.accuracy_cluster[1, 1] == 1.0 and clustering_test.accuracy_cluster[1, [0, 2]].all() == 0.0
    for idx in clustering_test.indices[0, :]:
        assert idx in (0, 2, 3, 5)
Exemplo n.º 11
0
def test_fit_heterogeneous_clusters():

    clustering_test = DESClustering(create_pool_classifiers(), k=2, pct_accuracy=0.5, pct_diversity=0.33)
    clustering_test.roc_algorithm.fit_predict = MagicMock(return_value=return_cluster_index_ex2)

    clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers)
    clustering_test.fit(X_dsel_ex1, y_dsel_ex1)

    # Index selected should be of any classifier that predicts the class label 0
    assert np.isclose(clustering_test.accuracy_cluster[:, 1], [0.428, 0.375], atol=0.01).all()
    assert np.isclose(clustering_test.accuracy_cluster[:, 0], [0.572, 0.625], atol=0.01).all()
    assert clustering_test.indices[0, 0] == 0 or clustering_test.indices[0, 0] == 2
    assert clustering_test.indices[1, 0] == 0 or clustering_test.indices[1, 0] == 2
Exemplo n.º 12
0
def test_estimate_competence():

    query = np.atleast_2d([1, 1])
    clustering_test = DESClustering(create_pool_classifiers(), k=2, pct_accuracy=0.5, pct_diversity=0.33)
    clustering_test.roc_algorithm.fit_predict = MagicMock(return_value=return_cluster_index_ex2)

    clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers)
    clustering_test.fit(X_dsel_ex1, y_dsel_ex1)
    clustering_test.roc_algorithm.predict = MagicMock(return_value=0)
    competences = clustering_test.estimate_competence(query)

    assert np.array_equal(competences, clustering_test.accuracy_cluster[0, :])

    clustering_test.roc_algorithm.predict = MagicMock(return_value=1)
    competences = clustering_test.estimate_competence(query)
    assert np.array_equal(competences, clustering_test.accuracy_cluster[1, :])
Exemplo n.º 13
0
def test_fit_clusters_less_diverse():
    clustering_test = DESClustering(create_pool_classifiers() * 2,
                                    clustering=KMeans(n_clusters=2),
                                    pct_accuracy=0.5,
                                    pct_diversity=0.33,
                                    more_diverse=False)

    clustering_test.clustering.predict = MagicMock(
        return_value=return_cluster_index_ex1)
    clustering_test.fit(X_dsel_ex1, y_dsel_ex1)

    assert (clustering_test.accuracy_cluster_[0, 1] == 0.0
            and clustering_test.accuracy_cluster_[0, [0, 2]].all() == 1.0)
    assert (clustering_test.accuracy_cluster_[1, 1] == 1.0
            and clustering_test.accuracy_cluster_[1, [0, 2]].all() == 0.0)
    for idx in clustering_test.indices_[0, :]:
        assert idx in (1, 3, 4, 5)
Exemplo n.º 14
0
def test_fit_clusters_less_diverse(example_estimate_competence,
                                   create_pool_classifiers):
    clustering_test = DESClustering(create_pool_classifiers * 2,
                                    clustering=KMeans(n_clusters=2),
                                    pct_accuracy=0.5, pct_diversity=0.33,
                                    more_diverse=False)
    X, y = example_estimate_competence[0:2]

    clustering_test.clustering.predict = MagicMock(return_value=y)
    clustering_test.fit(X, y)

    assert (clustering_test.performance_cluster_[0, 1] == 0.0 and
            clustering_test.performance_cluster_[0, [0, 2]].all() == 1.0)
    assert (clustering_test.performance_cluster_[1, 1] == 1.0 and
            clustering_test.performance_cluster_[1, [0, 2]].all() == 0.0)
    for idx in clustering_test.indices_[0, :]:
        assert idx in (1, 3, 4, 5)
Exemplo n.º 15
0
def test_fit_homogeneous_clusters(create_pool_classifiers,
                                  example_estimate_competence):
    clustering_test = DESClustering(create_pool_classifiers * 2,
                                    clustering=KMeans(n_clusters=2),
                                    pct_accuracy=0.5,
                                    pct_diversity=0.33)

    X, y = example_estimate_competence[0:2]
    clustering_test.clustering.predict = MagicMock(return_value=y)

    clustering_test.fit(X, y)

    assert (clustering_test.accuracy_cluster_[0, 1] == 0.0
            and clustering_test.accuracy_cluster_[0, [0, 2]].all() == 1.0)
    assert (clustering_test.accuracy_cluster_[1, 1] == 1.0
            and clustering_test.accuracy_cluster_[1, [0, 2]].all() == 0.0)
    for idx in clustering_test.indices_[0, :]:
        assert idx in (0, 2, 3, 5)
Exemplo n.º 16
0
def test_fit_homogeneous_clusters():

    clustering_test = DESClustering(create_pool_classifiers() * 2,
                                    clustering=KMeans(n_clusters=2),
                                    pct_accuracy=0.5,
                                    pct_diversity=0.33)

    clustering_test.clustering.predict = MagicMock(
        return_value=return_cluster_index_ex1)

    clustering_test.fit(X_dsel_ex1, y_dsel_ex1)

    assert clustering_test.accuracy_cluster_[
        0, 1] == 0.0 and clustering_test.accuracy_cluster_[0,
                                                           [0, 2]].all() == 1.0
    assert clustering_test.accuracy_cluster_[
        1, 1] == 1.0 and clustering_test.accuracy_cluster_[1,
                                                           [0, 2]].all() == 0.0
    for idx in clustering_test.indices_[0, :]:
        assert idx in (0, 2, 3, 5)
Exemplo n.º 17
0
def test_fit_heterogeneous_clusters():
    clustering_test = DESClustering(create_pool_classifiers(),
                                    clustering=KMeans(n_clusters=2),
                                    pct_accuracy=0.5,
                                    pct_diversity=0.33)

    clustering_test.clustering.predict = MagicMock(
        return_value=return_cluster_index_ex2)
    clustering_test.fit(X_dsel_ex1, y_dsel_ex1)

    # Index selected should be of any classifier that predicts the label 0
    assert np.isclose(clustering_test.accuracy_cluster_[:, 1], [0.428, 0.375],
                      atol=0.01).all()
    assert np.isclose(clustering_test.accuracy_cluster_[:, 0], [0.572, 0.625],
                      atol=0.01).all()
    assert clustering_test.indices_[0,
                                    0] == 0 or clustering_test.indices_[0,
                                                                        0] == 2
    assert clustering_test.indices_[1,
                                    0] == 0 or clustering_test.indices_[1,
                                                                        0] == 2
Exemplo n.º 18
0
def test_fit_clusters_less_diverse():

    clustering_test = DESClustering(create_pool_classifiers() * 2,
                                    k=2,
                                    pct_accuracy=1.0,
                                    pct_diversity=0.60,
                                    more_diverse=False)

    clustering_test.roc_algorithm.fit_predict = MagicMock(
        return_value=return_cluster_index_ex1)

    clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers)
    clustering_test.fit(X_dsel_ex1, y_dsel_ex1)

    assert clustering_test.accuracy_cluster[
        0, 1] == 0.0 and clustering_test.accuracy_cluster[0,
                                                          [0, 2]].all() == 1.0
    assert clustering_test.accuracy_cluster[
        1, 1] == 1.0 and clustering_test.accuracy_cluster[1,
                                                          [0, 2]].all() == 0.0
    assert np.isin(clustering_test.indices[0, :], np.array([1, 3, 5, 4])).all()
Exemplo n.º 19
0
def test_not_clustering_algorithm(create_X_y):
    X, y = create_X_y

    des_clustering = DESClustering(clustering=Perceptron())
    with pytest.raises(ValueError):
        des_clustering.fit(X, y)
Exemplo n.º 20
0
def test_J_higher_than_N(create_X_y):
    X, y = create_X_y

    with pytest.raises(ValueError):
        des_clustering = DESClustering(pct_accuracy=0.3, pct_diversity=0.5)
        des_clustering.fit(X, y)
Exemplo n.º 21
0
def test_J_N_values(create_X_y):
    X, y = create_X_y

    with pytest.raises(ValueError):
        des_clustering = DESClustering(pct_accuracy=0.5, pct_diversity=0)
        des_clustering.fit(X, y)
Exemplo n.º 22
0
def test_input_diversity_parameter(create_X_y):
    X, y = create_X_y

    with pytest.raises(ValueError):
        des_clustering = DESClustering(metric_diversity='abc')
        des_clustering.fit(X, y)
Exemplo n.º 23
0
def test_not_clustering_algorithm():
    des_clustering = DESClustering(create_pool_classifiers(),
                                   clustering=Perceptron())
    with pytest.raises(ValueError):
        des_clustering.fit(X_dsel_ex1, y_dsel_ex1)
Exemplo n.º 24
0
def test_input_diversity_parameter():
    with pytest.raises(ValueError):
        des_clustering = DESClustering(create_pool_classifiers() * 10,
                                       metric='abc')
        des_clustering.fit(X_dsel_ex1, y_dsel_ex1)
Exemplo n.º 25
0
def test_J_higher_than_N():
    with pytest.raises(ValueError):
        des_clustering = DESClustering(create_pool_classifiers() * 100,
                                       pct_accuracy=0.3,
                                       pct_diversity=0.5)
        des_clustering.fit(X_dsel_ex1, y_dsel_ex1)