示例#1
0
 def test_random_walk_normalized_laplacian(self):
     matrix = np.array([[3, 4], [-4, 3], [6, 8], [-3, -4]])
     affinity = utils.compute_affinity_matrix(matrix)
     laplacian_norm = laplacian.compute_laplacian(
         affinity, laplacian_type=LaplacianType.RandomWalk)
     expected = np.array([[0.6, -0.2, -0.4, 0], [-0.2, 0.6, -0.2, -0.2],
                          [-0.4, -0.2, 0.6, 0], [0, -0.33, 0, 0.33]])
     self.assertTrue(np.allclose(expected, laplacian_norm, atol=0.01))
示例#2
0
 def test_3by2_matrix(self):
     X = np.array([[1, 2], [3, 4], [1, 3]])
     affinity = utils.compute_affinity_matrix(X)
     w, v = utils.compute_sorted_eigenvectors(affinity)
     self.assertEqual((3, ), w.shape)
     self.assertEqual((3, 3), v.shape)
     self.assertGreater(w[0], w[1])
     self.assertGreater(w[1], w[2])
示例#3
0
 def test_laplacian(self):
     matrix = np.array([[3, 4], [-4, 3], [6, 8], [-3, -4]])
     affinity = utils.compute_affinity_matrix(matrix)
     laplacian_matrix = laplacian.compute_laplacian(
         affinity, laplacian_type=LaplacianType.Unnormalized)
     expected = np.array([[1.5, -0.5, -1, 0], [-0.5, 1.5, -0.5, -0.5],
                          [-1, -0.5, 1.5, 0], [0, -0.5, 0, 0.5]])
     self.assertTrue(np.array_equal(expected, laplacian_matrix))
示例#4
0
 def test_ascend(self):
   matrix = np.array([[1, 2], [3, 4], [1, 3]])
   affinity = utils.compute_affinity_matrix(matrix)
   w, v = utils.compute_sorted_eigenvectors(affinity, descend=False)
   self.assertEqual((3,), w.shape)
   self.assertEqual((3, 3), v.shape)
   self.assertLess(w[0], w[1])
   self.assertLess(w[1], w[2])
示例#5
0
 def test_4by2_matrix(self):
     X = np.array([[3, 4], [-4, 3], [6, 8], [-3, -4]])
     affinity = utils.compute_affinity_matrix(X)
     expected = np.array([[
         1,
         0.5,
         1,
         0,
     ], [0.5, 1, 0.5, 0.5], [1, 0.5, 1, 0], [0, 0.5, 0, 1]])
     self.assertTrue(np.array_equal(expected, affinity))
    def predict(self, X):
        """Perform spectral clustering on data X.

        Args:
            X: numpy array of shape (n_samples, n_features)

        Returns:
            labels: numpy array of shape (n_samples,)

        Raises:
            TypeError: if X has wrong type
            ValueError: if X has wrong shape
        """
        if not isinstance(X, np.ndarray):
            raise TypeError("X must be a numpy array")
        if len(X.shape) != 2:
            raise ValueError("X must be 2-dimensional")
        #  Compute affinity matrix.
        affinity = utils.compute_affinity_matrix(X)

        # Refinement opertions on the affinity matrix.
        for refinement_name in self.refinement_sequence:
            op = self._get_refinement_operator(refinement_name)
            affinity = op.refine(affinity)

        # Perform eigen decomposion.
        (eigenvalues,
         eigenvectors) = utils.compute_sorted_eigenvectors(affinity)
        # Get number of clusters.
        k = utils.compute_number_of_clusters(eigenvalues, self.stop_eigenvalue)
        if self.min_clusters is not None:
            k = max(k, self.min_clusters)
        if self.max_clusters is not None:
            k = min(k, self.max_clusters)

        # Get spectral embeddings.
        spectral_embeddings = eigenvectors[:, :k]

        # Run K-Means++ on spectral embeddings.
        # Note: The correct way should be using a K-Means implementation
        # that supports customized distance measure such as cosine distance.
        # This implemention from scikit-learn does NOT, which is inconsistent
        # with the paper.
        kmeans_clusterer = KMeans(n_clusters=k,
                                  init="k-means++",
                                  max_iter=300,
                                  random_state=0)
        labels = kmeans_clusterer.fit_predict(spectral_embeddings)
        return labels
    def get_eigen_inputs(self, X, sparse=False, **kwargs):
        """Get the values used as input to Kmeans.

        Args:
            X: numpy array to performe eigen-decomposition on
            sparse: whether or not to use sparse eigen-decomposition
            **kwargs:  extra arguments passed to spectralcluster.utils.compute_sorted_eigenvalues

        Returns:
            k: predicted number of clusters
            affinity: the refined affinity matrix
            eigenvectors: real eigenvectors of the affinity matrix
            eigenvalues:  real eigenvalues of the affinity matrix

        Raises:
            ValueError: if name is an unknown refinement operation
        """
        if not isinstance(X, np.ndarray):
            raise TypeError("X must be a numpy array")
        if len(X.shape) != 2:
            raise ValueError("X must be 2-dimensional")
        #  Compute affinity matrix.
        affinity = utils.compute_affinity_matrix(X)

        # Refinement opertions on the affinity matrix.
        for refinement_name in self.refinement_sequence:
            op = self._get_refinement_operator(refinement_name)
            affinity = op.refine(affinity)

        # Perform eigen decomposion.
        (eigenvalues,
         eigenvectors) = utils.compute_sorted_eigenvectors(affinity,
                                                           sparse=sparse,
                                                           **kwargs)
        # Get number of clusters.
        k = utils.compute_number_of_clusters(eigenvalues, self.max_clusters,
                                             self.stop_eigenvalue)
        if self.min_clusters is not None:
            k = max(k, self.min_clusters)

        return k, affinity, eigenvectors, eigenvalues
    def predict(self, X):
        """Perform spectral clustering on data X.

        Args:
            X: numpy array of shape (n_samples, n_features)

        Returns:
            labels: numpy array of shape (n_samples,)

        Raises:
            TypeError: if X has wrong type
            ValueError: if X has wrong shape, or we see an unknown refinement
                operation
        """
        if not isinstance(X, np.ndarray):
            raise TypeError("X must be a numpy array")
        if len(X.shape) != 2:
            raise ValueError("X must be 2-dimensional")
        #  Compute affinity matrix.
        affinity = utils.compute_affinity_matrix(X)

        # Refinement opertions on the affinity matrix.
        for op in self.refinement_sequence:
            if op == "CropDiagonal":
                affinity = refinement.CropDiagonal().refine(affinity)
            elif op == "GaussianBlur":
                affinity = refinement.GaussianBlur(
                    self.gaussian_blur_sigma).refine(affinity)
            elif op == "RowWiseThreshold":
                affinity = refinement.RowWiseThreshold(
                    self.p_percentile,
                    self.thresholding_soft_multiplier).refine(affinity)
            elif op == "Symmetrize":
                affinity = refinement.Symmetrize().refine(affinity)
            elif op == "Diffuse":
                affinity = refinement.Diffuse().refine(affinity)
            elif op == "RowWiseNormalize":
                affinity = refinement.RowWiseNormalize().refine(affinity)
            else:
                raise ValueError("Unknown refinement operation: {}".format(op))

        # Perform eigen decomposion.
        (eigenvalues,
         eigenvectors) = utils.compute_sorted_eigenvectors(affinity)
        # Get number of clusters.
        k = utils.compute_number_of_clusters(eigenvalues, self.stop_eigenvalue)
        if self.min_clusters is not None:
            k = max(k, self.min_clusters)
        if self.max_clusters is not None:
            k = min(k, self.max_clusters)

        # Get spectral embeddings.
        spectral_embeddings = eigenvectors[:, :k]

        # Run K-Means++ on spectral embeddings.
        # Note: The correct way should be using a K-Means implementation
        # that supports customized distance measure such as cosine distance.
        # This implemention from scikit-learn does NOT, which is inconsistent
        # with the paper.
        kmeans_clusterer = KMeans(n_clusters=k,
                                  init="k-means++",
                                  max_iter=300,
                                  random_state=0)
        labels = kmeans_clusterer.fit_predict(spectral_embeddings)
        return labels
示例#9
0
 def test_affinity(self):
     matrix = np.array([[3, 4], [-4, 3], [6, 8], [-3, -4]])
     affinity = utils.compute_affinity_matrix(matrix)
     result = laplacian.compute_laplacian(
         affinity, laplacian_type=LaplacianType.Affinity)
     self.assertTrue(np.array_equal(affinity, result))