def test_fit_predict_default(data): v_data = data['fit_data'][:2] spectral = MultiviewSpectralClustering(2, random_state=RANDOM_STATE) predictions = spectral.fit_predict(v_data) n_clusts = data['n_clusters'] assert (predictions.shape[0] == data['n_fit']) for clust in predictions: assert (clust >= 0 and clust < n_clusts)
def test_fit_predict_info_view(data): v_data = data['fit_data'] info_view = np.random.randint(len(v_data)) n_clusts = data['n_clusters'] spectral = MultiviewSpectralClustering(n_clusts, random_state=RANDOM_STATE, info_view=info_view) predictions = spectral.fit_predict(v_data) assert (predictions.shape[0] == data['n_fit']) for clust in predictions: assert (clust >= 0 and clust < n_clusts)
def test_fit_predict_max_iter(data): v_data = data['fit_data'] max_iter = 5 n_clusts = data['n_clusters'] spectral = MultiviewSpectralClustering(n_clusts, random_state=RANDOM_STATE, max_iter=max_iter) predictions = spectral.fit_predict(v_data) assert (predictions.shape[0] == data['n_fit']) for clust in predictions: assert (clust >= 0 and clust < n_clusts)
def test_not_valid_affinity(small_data): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(affinity='What') spectral.fit_predict(small_data) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(affinity=None) spectral.fit_predict(small_data)
def test_n_clusters_not_positive_int(small_data): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_clusters=-1) spectral.fit_predict(small_data) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_clusters=0) spectral.fit_predict(small_data)
def test_info_view_not_valid(small_data): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_clusters=2, info_view=-1) spectral.fit_predict(small_data) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_clusters=2, info_view=6) spectral.fit_predict(small_data)
def test_gamma_not_positive_float(small_data): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(gamma=-1.5) spectral.fit_predict(small_data) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(gamma=0) spectral.fit_predict(small_data)
def test_n_neighbors_not_positive_int(small_data): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(affinity='nearest_neighbors', n_neighbors=-1) spectral.fit_predict(small_data) with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(affinity='nearest_neighbors', n_neighbors=0) spectral.fit_predict(small_data)
def test_affinity_neighbors(data): v1_data = data['fit_data'][0] n_neighbors = 10 neighbors = NearestNeighbors(n_neighbors=n_neighbors) neighbors.fit(v1_data) true_kernel = neighbors.kneighbors_graph(v1_data).toarray() spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE, affinity='nearest_neighbors', n_neighbors=10) n_kernel = spectral._affinity_mat(v1_data) assert (n_kernel.shape[0] == data['n_fit']) assert (n_kernel.shape[1] == data['n_fit']) for ind1 in range(n_kernel.shape[0]): for ind2 in range(n_kernel.shape[1]): assert np.abs(true_kernel[ind1][ind2] - n_kernel[ind1][ind2]) < 0.000001
def test_affinity_mat_poly(data): v1_data = data['fit_data'][0] distances = cdist(v1_data, v1_data) gamma = 1 / (2 * np.median(distances)**2) true_kernel = polynomial_kernel(v1_data, gamma=gamma) spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE, affinity='poly') p_kernel = spectral._affinity_mat(v1_data) assert (p_kernel.shape[0] == data['n_fit']) assert (p_kernel.shape[1] == data['n_fit']) for ind1 in range(p_kernel.shape[0]): for ind2 in range(p_kernel.shape[1]): assert np.abs(true_kernel[ind1][ind2] - p_kernel[ind1][ind2]) < 0.000001
def test_affinity_mat_rbf2(data): v1_data = data['fit_data'][0] gamma = 1 spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE, gamma=gamma) distances = cdist(v1_data, v1_data) gamma = 1 / (2 * np.median(distances)**2) true_kernel = rbf_kernel(v1_data, gamma=1) g_kernel = spectral._affinity_mat(v1_data) assert (g_kernel.shape[0] == data['n_fit']) assert (g_kernel.shape[1] == data['n_fit']) for ind1 in range(g_kernel.shape[0]): for ind2 in range(g_kernel.shape[1]): assert np.abs(true_kernel[ind1][ind2] - g_kernel[ind1][ind2]) < 0.000001
def perform_clustering(seed, m_data, labels, n_clusters, kernel='rbf'): # Single-view spectral clustering # Cluster each view separately s_spectral = SpectralClustering(n_clusters=n_clusters, random_state=RANDOM_SEED, affinity=kernel, n_init=100) s_clusters_v1 = s_spectral.fit_predict(m_data[0]) s_clusters_v2 = s_spectral.fit_predict(m_data[1]) # Concatenate the multiple views into a single view s_data = np.hstack(m_data) s_clusters = s_spectral.fit_predict(s_data) # Compute nmi between true class labels and single-view cluster labels s_nmi_v1 = nmi_score(labels, s_clusters_v1) s_nmi_v2 = nmi_score(labels, s_clusters_v2) s_nmi = nmi_score(labels, s_clusters) print('Single-view View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1)) print('Single-view View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2)) print('Single-view Concatenated NMI Score: {0:.3f}\n'.format(s_nmi)) # Multi-view spectral clustering # Use the MultiviewSpectralClustering instance to cluster the data m_spectral = MultiviewSpectralClustering(n_clusters=n_clusters, random_state=RANDOM_SEED, affinity=kernel, n_init=100) m_clusters = m_spectral.fit_predict(m_data) # Compute nmi between true class labels and multi-view cluster labels m_nmi = nmi_score(labels, m_clusters) print('Multi-view Concatenated NMI Score: {0:.3f}\n'.format(m_nmi)) return m_clusters
def data(): num_fit_samples = 200 n_feats1 = 20 n_feats2 = 18 n_feats3 = 30 n_clusters = 2 np.random.seed(RANDOM_STATE) fit_data = [] fit_data.append(np.random.rand(num_fit_samples, n_feats1)) fit_data.append(np.random.rand(num_fit_samples, n_feats2)) fit_data.append(np.random.rand(num_fit_samples, n_feats3)) spectral = MultiviewSpectralClustering(n_clusters, random_state=RANDOM_STATE) return { 'n_fit': num_fit_samples, 'n_feats1': n_feats1, 'n_feats2': n_feats2, 'n_feats3': n_feats3, 'n_clusters': n_clusters, 'spectral': spectral, 'fit_data': fit_data }
# Compute nmi between true class labels and single-view cluster labels s_nmi_v1 = nmi_score(labels, s_clusters_v1) s_nmi_v2 = nmi_score(labels, s_clusters_v2) s_nmi = nmi_score(labels, s_clusters) print('Single-view View 1 NMI Score: {0:.3f}\n'.format(s_nmi_v1)) print('Single-view View 2 NMI Score: {0:.3f}\n'.format(s_nmi_v2)) print('Single-view Concatenated NMI Score: {0:.3f}\n'.format(s_nmi)) ############################################################################### # Multiview spectral clustering # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # Use the MultiviewSpectralClustering instance to cluster the data m_spectral = MultiviewSpectralClustering(n_clusters=n_class, affinity='nearest_neighbors', max_iter=12, random_state=RANDOM_SEED, n_init=10) m_clusters = m_spectral.fit_predict(m_data) # Compute nmi between true class labels and multi-view cluster labels m_nmi = nmi_score(labels, m_clusters) print('Multi-view NMI Score: {0:.3f}\n'.format(m_nmi)) ############################################################################### # Plots of clusters produced by multi-view spectral clustering and the true # clusters # # We will display the clustering results of the Multi-view spectral clustering # algorithm below, along with the true class labels.
sca_kwargs = {'alpha': 0.7, 's': 10} f, axes = plt.subplots(1, 2, figsize=(8, 4)) axes[0].scatter(Xs[:, 0], Xs[:, 1], c=y_true, **sca_kwargs) axes[0].set_title('True labels', fontsize=14) axes[1].scatter(Xs[:, 0], Xs[:, 1], c=y_predicted, **sca_kwargs) axes[1].set_title(title, fontsize=14) axes[1].annotate(f'Homogeneity\nscore = {score:.2f}', xy=(0.95, 0.85), xycoords='axes fraction', fontsize=13, ha='right') axes[0].set_ylabel(f'{method} Component 2') plt.setp(axes, xticks=[], yticks=[], xlabel=f'{method} Component 1') plt.tight_layout() plt.show() # Cluster concatenated data sv_clust = SpectralClustering(n_clusters=4, affinity='nearest_neighbors') sv_labels = sv_clust.fit_predict(np.hstack(Xs)) plot_clusters(Xs_pca, y, sv_labels, 'Concatenated clustering labels', 'PCA') # Cluster multiview data mv_clust = MultiviewSpectralClustering(n_clusters=4, affinity='nearest_neighbors') mv_labels = mv_clust.fit_predict(Xs) plot_clusters(Xs_mvmds, y, mv_labels, 'Multiview clustering labels', 'MVMDS')
def test_samples_not_2D_2(small_data): with pytest.raises(ValueError): view1 = np.random.random((10, )) view2 = np.random.random((10, )) spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE) spectral.fit_predict([view1, view2])
def test_n_views_too_small2(small_data): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(random_state=RANDOM_STATE) spectral.fit_predict([])
def test_random_state_not_convertible(small_data): with pytest.raises(ValueError): spectral = MultiviewSpectralClustering(n_clusters=5, random_state='ab') spectral.fit_predict(small_data)