示例#1
0
def test_fit_predict_default(data):

    v_data = data['fit_data'][:2]
    spectral = MultiviewSpectralClustering(2, random_state=RANDOM_STATE)
    predictions = spectral.fit_predict(v_data)
    n_clusts = data['n_clusters']

    assert (predictions.shape[0] == data['n_fit'])
    for clust in predictions:
        assert (clust >= 0 and clust < n_clusts)
示例#2
0
def test_fit_predict_info_view(data):

    v_data = data['fit_data']
    info_view = np.random.randint(len(v_data))
    n_clusts = data['n_clusters']
    spectral = MultiviewSpectralClustering(n_clusts,
                                           random_state=RANDOM_STATE,
                                           info_view=info_view)
    predictions = spectral.fit_predict(v_data)

    assert (predictions.shape[0] == data['n_fit'])
    for clust in predictions:
        assert (clust >= 0 and clust < n_clusts)
示例#3
0
def test_fit_predict_max_iter(data):

    v_data = data['fit_data']
    max_iter = 5
    n_clusts = data['n_clusters']
    spectral = MultiviewSpectralClustering(n_clusts,
                                           random_state=RANDOM_STATE,
                                           max_iter=max_iter)
    predictions = spectral.fit_predict(v_data)

    assert (predictions.shape[0] == data['n_fit'])
    for clust in predictions:
        assert (clust >= 0 and clust < n_clusts)
示例#4
0
def test_not_valid_affinity(small_data):
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(affinity='What')
        spectral.fit_predict(small_data)
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(affinity=None)
        spectral.fit_predict(small_data)
示例#5
0
def test_n_clusters_not_positive_int(small_data):
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(n_clusters=-1)
        spectral.fit_predict(small_data)
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(n_clusters=0)
        spectral.fit_predict(small_data)
示例#6
0
def test_info_view_not_valid(small_data):
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(n_clusters=2, info_view=-1)
        spectral.fit_predict(small_data)
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(n_clusters=2, info_view=6)
        spectral.fit_predict(small_data)
示例#7
0
def test_gamma_not_positive_float(small_data):
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(gamma=-1.5)
        spectral.fit_predict(small_data)

    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(gamma=0)
        spectral.fit_predict(small_data)
示例#8
0
def test_n_neighbors_not_positive_int(small_data):
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(affinity='nearest_neighbors',
                                               n_neighbors=-1)
        spectral.fit_predict(small_data)

    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(affinity='nearest_neighbors',
                                               n_neighbors=0)
        spectral.fit_predict(small_data)
示例#9
0
def test_affinity_neighbors(data):

    v1_data = data['fit_data'][0]
    n_neighbors = 10
    neighbors = NearestNeighbors(n_neighbors=n_neighbors)
    neighbors.fit(v1_data)
    true_kernel = neighbors.kneighbors_graph(v1_data).toarray()
    spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE,
                                           affinity='nearest_neighbors',
                                           n_neighbors=10)
    n_kernel = spectral._affinity_mat(v1_data)
    assert (n_kernel.shape[0] == data['n_fit'])
    assert (n_kernel.shape[1] == data['n_fit'])

    for ind1 in range(n_kernel.shape[0]):
        for ind2 in range(n_kernel.shape[1]):
            assert np.abs(true_kernel[ind1][ind2] -
                          n_kernel[ind1][ind2]) < 0.000001
示例#10
0
def test_affinity_mat_poly(data):

    v1_data = data['fit_data'][0]

    distances = cdist(v1_data, v1_data)
    gamma = 1 / (2 * np.median(distances)**2)
    true_kernel = polynomial_kernel(v1_data, gamma=gamma)
    spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE,
                                           affinity='poly')
    p_kernel = spectral._affinity_mat(v1_data)

    assert (p_kernel.shape[0] == data['n_fit'])
    assert (p_kernel.shape[1] == data['n_fit'])

    for ind1 in range(p_kernel.shape[0]):
        for ind2 in range(p_kernel.shape[1]):
            assert np.abs(true_kernel[ind1][ind2] -
                          p_kernel[ind1][ind2]) < 0.000001
示例#11
0
def test_affinity_mat_rbf2(data):

    v1_data = data['fit_data'][0]
    gamma = 1
    spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE,
                                           gamma=gamma)
    distances = cdist(v1_data, v1_data)
    gamma = 1 / (2 * np.median(distances)**2)
    true_kernel = rbf_kernel(v1_data, gamma=1)
    g_kernel = spectral._affinity_mat(v1_data)

    assert (g_kernel.shape[0] == data['n_fit'])
    assert (g_kernel.shape[1] == data['n_fit'])

    for ind1 in range(g_kernel.shape[0]):
        for ind2 in range(g_kernel.shape[1]):
            assert np.abs(true_kernel[ind1][ind2] -
                          g_kernel[ind1][ind2]) < 0.000001
def perform_clustering(seed, m_data, labels, n_clusters, kernel='rbf'):

    # Single-view spectral clustering
    # Cluster each view separately
    s_spectral = SpectralClustering(n_clusters=n_clusters,
                                    random_state=RANDOM_SEED,
                                    affinity=kernel,
                                    n_init=100)
    s_clusters_v1 = s_spectral.fit_predict(m_data[0])
    s_clusters_v2 = s_spectral.fit_predict(m_data[1])

    # Concatenate the multiple views into a single view
    s_data = np.hstack(m_data)
    s_clusters = s_spectral.fit_predict(s_data)

    # Compute nmi between true class labels and single-view cluster labels
    s_nmi_v1 = nmi_score(labels, s_clusters_v1)
    s_nmi_v2 = nmi_score(labels, s_clusters_v2)
    s_nmi = nmi_score(labels, s_clusters)
    print('Single-view View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1))
    print('Single-view View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2))
    print('Single-view Concatenated NMI Score: {0:.3f}\n'.format(s_nmi))

    # Multi-view spectral clustering

    # Use the MultiviewSpectralClustering instance to cluster the data
    m_spectral = MultiviewSpectralClustering(n_clusters=n_clusters,
                                             random_state=RANDOM_SEED,
                                             affinity=kernel,
                                             n_init=100)
    m_clusters = m_spectral.fit_predict(m_data)

    # Compute nmi between true class labels and multi-view cluster labels
    m_nmi = nmi_score(labels, m_clusters)
    print('Multi-view Concatenated NMI Score: {0:.3f}\n'.format(m_nmi))

    return m_clusters
示例#13
0
def data():

    num_fit_samples = 200
    n_feats1 = 20
    n_feats2 = 18
    n_feats3 = 30
    n_clusters = 2
    np.random.seed(RANDOM_STATE)
    fit_data = []
    fit_data.append(np.random.rand(num_fit_samples, n_feats1))
    fit_data.append(np.random.rand(num_fit_samples, n_feats2))
    fit_data.append(np.random.rand(num_fit_samples, n_feats3))

    spectral = MultiviewSpectralClustering(n_clusters,
                                           random_state=RANDOM_STATE)
    return {
        'n_fit': num_fit_samples,
        'n_feats1': n_feats1,
        'n_feats2': n_feats2,
        'n_feats3': n_feats3,
        'n_clusters': n_clusters,
        'spectral': spectral,
        'fit_data': fit_data
    }
示例#14
0
# Compute nmi between true class labels and single-view cluster labels
s_nmi_v1 = nmi_score(labels, s_clusters_v1)
s_nmi_v2 = nmi_score(labels, s_clusters_v2)
s_nmi = nmi_score(labels, s_clusters)
print('Single-view View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1))
print('Single-view View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2))
print('Single-view Concatenated NMI Score: {0:.3f}\n'.format(s_nmi))

###############################################################################
# Multiview spectral clustering
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Use the MultiviewSpectralClustering instance to cluster the data
m_spectral = MultiviewSpectralClustering(n_clusters=n_class,
                                         affinity='nearest_neighbors',
                                         max_iter=12,
                                         random_state=RANDOM_SEED,
                                         n_init=10)
m_clusters = m_spectral.fit_predict(m_data)

# Compute nmi between true class labels and multi-view cluster labels
m_nmi = nmi_score(labels, m_clusters)
print('Multi-view NMI Score: {0:.3f}\n'.format(m_nmi))

###############################################################################
# Plots of clusters produced by multi-view spectral clustering and the true
# clusters
#
# We will display the clustering results of the Multi-view spectral clustering
# algorithm below, along with the true class labels.
    sca_kwargs = {'alpha': 0.7, 's': 10}
    f, axes = plt.subplots(1, 2, figsize=(8, 4))
    axes[0].scatter(Xs[:, 0], Xs[:, 1], c=y_true, **sca_kwargs)
    axes[0].set_title('True labels', fontsize=14)
    axes[1].scatter(Xs[:, 0], Xs[:, 1], c=y_predicted, **sca_kwargs)
    axes[1].set_title(title, fontsize=14)
    axes[1].annotate(f'Homogeneity\nscore = {score:.2f}',
                     xy=(0.95, 0.85),
                     xycoords='axes fraction',
                     fontsize=13,
                     ha='right')
    axes[0].set_ylabel(f'{method} Component 2')
    plt.setp(axes, xticks=[], yticks=[], xlabel=f'{method} Component 1')
    plt.tight_layout()
    plt.show()


# Cluster concatenated data
sv_clust = SpectralClustering(n_clusters=4, affinity='nearest_neighbors')
sv_labels = sv_clust.fit_predict(np.hstack(Xs))

plot_clusters(Xs_pca, y, sv_labels, 'Concatenated clustering labels', 'PCA')

# Cluster multiview data
mv_clust = MultiviewSpectralClustering(n_clusters=4,
                                       affinity='nearest_neighbors')
mv_labels = mv_clust.fit_predict(Xs)

plot_clusters(Xs_mvmds, y, mv_labels, 'Multiview clustering labels', 'MVMDS')
示例#16
0
def test_samples_not_2D_2(small_data):
    with pytest.raises(ValueError):
        view1 = np.random.random((10, ))
        view2 = np.random.random((10, ))
        spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE)
        spectral.fit_predict([view1, view2])
示例#17
0
def test_n_views_too_small2(small_data):
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE)
        spectral.fit_predict([])
示例#18
0
def test_random_state_not_convertible(small_data):
    with pytest.raises(ValueError):
        spectral = MultiviewSpectralClustering(n_clusters=5, random_state='ab')
        spectral.fit_predict(small_data)