def test_random_walk_normalized_laplacian(self): matrix = np.array([[3, 4], [-4, 3], [6, 8], [-3, -4]]) affinity = utils.compute_affinity_matrix(matrix) laplacian_norm = laplacian.compute_laplacian( affinity, laplacian_type=LaplacianType.RandomWalk) expected = np.array([[0.6, -0.2, -0.4, 0], [-0.2, 0.6, -0.2, -0.2], [-0.4, -0.2, 0.6, 0], [0, -0.33, 0, 0.33]]) self.assertTrue(np.allclose(expected, laplacian_norm, atol=0.01))
def test_3by2_matrix(self): X = np.array([[1, 2], [3, 4], [1, 3]]) affinity = utils.compute_affinity_matrix(X) w, v = utils.compute_sorted_eigenvectors(affinity) self.assertEqual((3, ), w.shape) self.assertEqual((3, 3), v.shape) self.assertGreater(w[0], w[1]) self.assertGreater(w[1], w[2])
def test_laplacian(self): matrix = np.array([[3, 4], [-4, 3], [6, 8], [-3, -4]]) affinity = utils.compute_affinity_matrix(matrix) laplacian_matrix = laplacian.compute_laplacian( affinity, laplacian_type=LaplacianType.Unnormalized) expected = np.array([[1.5, -0.5, -1, 0], [-0.5, 1.5, -0.5, -0.5], [-1, -0.5, 1.5, 0], [0, -0.5, 0, 0.5]]) self.assertTrue(np.array_equal(expected, laplacian_matrix))
def test_ascend(self): matrix = np.array([[1, 2], [3, 4], [1, 3]]) affinity = utils.compute_affinity_matrix(matrix) w, v = utils.compute_sorted_eigenvectors(affinity, descend=False) self.assertEqual((3,), w.shape) self.assertEqual((3, 3), v.shape) self.assertLess(w[0], w[1]) self.assertLess(w[1], w[2])
def test_4by2_matrix(self): X = np.array([[3, 4], [-4, 3], [6, 8], [-3, -4]]) affinity = utils.compute_affinity_matrix(X) expected = np.array([[ 1, 0.5, 1, 0, ], [0.5, 1, 0.5, 0.5], [1, 0.5, 1, 0], [0, 0.5, 0, 1]]) self.assertTrue(np.array_equal(expected, affinity))
def predict(self, X): """Perform spectral clustering on data X. Args: X: numpy array of shape (n_samples, n_features) Returns: labels: numpy array of shape (n_samples,) Raises: TypeError: if X has wrong type ValueError: if X has wrong shape """ if not isinstance(X, np.ndarray): raise TypeError("X must be a numpy array") if len(X.shape) != 2: raise ValueError("X must be 2-dimensional") # Compute affinity matrix. affinity = utils.compute_affinity_matrix(X) # Refinement opertions on the affinity matrix. for refinement_name in self.refinement_sequence: op = self._get_refinement_operator(refinement_name) affinity = op.refine(affinity) # Perform eigen decomposion. (eigenvalues, eigenvectors) = utils.compute_sorted_eigenvectors(affinity) # Get number of clusters. k = utils.compute_number_of_clusters(eigenvalues, self.stop_eigenvalue) if self.min_clusters is not None: k = max(k, self.min_clusters) if self.max_clusters is not None: k = min(k, self.max_clusters) # Get spectral embeddings. spectral_embeddings = eigenvectors[:, :k] # Run K-Means++ on spectral embeddings. # Note: The correct way should be using a K-Means implementation # that supports customized distance measure such as cosine distance. # This implemention from scikit-learn does NOT, which is inconsistent # with the paper. kmeans_clusterer = KMeans(n_clusters=k, init="k-means++", max_iter=300, random_state=0) labels = kmeans_clusterer.fit_predict(spectral_embeddings) return labels
def get_eigen_inputs(self, X, sparse=False, **kwargs): """Get the values used as input to Kmeans. Args: X: numpy array to performe eigen-decomposition on sparse: whether or not to use sparse eigen-decomposition **kwargs: extra arguments passed to spectralcluster.utils.compute_sorted_eigenvalues Returns: k: predicted number of clusters affinity: the refined affinity matrix eigenvectors: real eigenvectors of the affinity matrix eigenvalues: real eigenvalues of the affinity matrix Raises: ValueError: if name is an unknown refinement operation """ if not isinstance(X, np.ndarray): raise TypeError("X must be a numpy array") if len(X.shape) != 2: raise ValueError("X must be 2-dimensional") # Compute affinity matrix. affinity = utils.compute_affinity_matrix(X) # Refinement opertions on the affinity matrix. for refinement_name in self.refinement_sequence: op = self._get_refinement_operator(refinement_name) affinity = op.refine(affinity) # Perform eigen decomposion. (eigenvalues, eigenvectors) = utils.compute_sorted_eigenvectors(affinity, sparse=sparse, **kwargs) # Get number of clusters. k = utils.compute_number_of_clusters(eigenvalues, self.max_clusters, self.stop_eigenvalue) if self.min_clusters is not None: k = max(k, self.min_clusters) return k, affinity, eigenvectors, eigenvalues
def predict(self, X): """Perform spectral clustering on data X. Args: X: numpy array of shape (n_samples, n_features) Returns: labels: numpy array of shape (n_samples,) Raises: TypeError: if X has wrong type ValueError: if X has wrong shape, or we see an unknown refinement operation """ if not isinstance(X, np.ndarray): raise TypeError("X must be a numpy array") if len(X.shape) != 2: raise ValueError("X must be 2-dimensional") # Compute affinity matrix. affinity = utils.compute_affinity_matrix(X) # Refinement opertions on the affinity matrix. for op in self.refinement_sequence: if op == "CropDiagonal": affinity = refinement.CropDiagonal().refine(affinity) elif op == "GaussianBlur": affinity = refinement.GaussianBlur( self.gaussian_blur_sigma).refine(affinity) elif op == "RowWiseThreshold": affinity = refinement.RowWiseThreshold( self.p_percentile, self.thresholding_soft_multiplier).refine(affinity) elif op == "Symmetrize": affinity = refinement.Symmetrize().refine(affinity) elif op == "Diffuse": affinity = refinement.Diffuse().refine(affinity) elif op == "RowWiseNormalize": affinity = refinement.RowWiseNormalize().refine(affinity) else: raise ValueError("Unknown refinement operation: {}".format(op)) # Perform eigen decomposion. (eigenvalues, eigenvectors) = utils.compute_sorted_eigenvectors(affinity) # Get number of clusters. k = utils.compute_number_of_clusters(eigenvalues, self.stop_eigenvalue) if self.min_clusters is not None: k = max(k, self.min_clusters) if self.max_clusters is not None: k = min(k, self.max_clusters) # Get spectral embeddings. spectral_embeddings = eigenvectors[:, :k] # Run K-Means++ on spectral embeddings. # Note: The correct way should be using a K-Means implementation # that supports customized distance measure such as cosine distance. # This implemention from scikit-learn does NOT, which is inconsistent # with the paper. kmeans_clusterer = KMeans(n_clusters=k, init="k-means++", max_iter=300, random_state=0) labels = kmeans_clusterer.fit_predict(spectral_embeddings) return labels
def test_affinity(self): matrix = np.array([[3, 4], [-4, 3], [6, 8], [-3, -4]]) affinity = utils.compute_affinity_matrix(matrix) result = laplacian.compute_laplacian( affinity, laplacian_type=LaplacianType.Affinity) self.assertTrue(np.array_equal(affinity, result))