def _get_refinement_operator(self, name): """Get the refinement operator. Args: name: operator class name as a string Returns: object of the operator Raises: ValueError: if name is an unknown refinement operation """ if name == "CropDiagonal": return refinement.CropDiagonal() elif name == "GaussianBlur": return refinement.GaussianBlur(self.gaussian_blur_sigma) elif name == "RowWiseThreshold": return refinement.RowWiseThreshold( self.p_percentile, self.thresholding_soft_multiplier, self.thresholding_with_row_max) elif name == "Symmetrize": return refinement.Symmetrize() elif name == "Diffuse": return refinement.Diffuse() elif name == "RowWiseNormalize": return refinement.RowWiseNormalize() else: raise ValueError("Unknown refinement operation: {}".format(name))
def test_3by3_matrix(self): X = np.array([ [0.5, 2.0, 3.0], [3.0, 4.0, 5.0], [4.0, 2.0, 1.0]]) Y = refinement.RowWiseNormalize().refine(X) expected = np.array([ [0.167, 0.667, 1.0], [0.6, 0.8, 1.0], [1.0, 0.5, 0.25]]) self.assertTrue(np.allclose(expected, Y, atol=0.001))
def predict(self, X): """Perform spectral clustering on data X. Args: X: numpy array of shape (n_samples, n_features) Returns: labels: numpy array of shape (n_samples,) Raises: TypeError: if X has wrong type ValueError: if X has wrong shape, or we see an unknown refinement operation """ if not isinstance(X, np.ndarray): raise TypeError("X must be a numpy array") if len(X.shape) != 2: raise ValueError("X must be 2-dimensional") # Compute affinity matrix. affinity = utils.compute_affinity_matrix(X) # Refinement opertions on the affinity matrix. for op in self.refinement_sequence: if op == "CropDiagonal": affinity = refinement.CropDiagonal().refine(affinity) elif op == "GaussianBlur": affinity = refinement.GaussianBlur( self.gaussian_blur_sigma).refine(affinity) elif op == "RowWiseThreshold": affinity = refinement.RowWiseThreshold( self.p_percentile, self.thresholding_soft_multiplier).refine(affinity) elif op == "Symmetrize": affinity = refinement.Symmetrize().refine(affinity) elif op == "Diffuse": affinity = refinement.Diffuse().refine(affinity) elif op == "RowWiseNormalize": affinity = refinement.RowWiseNormalize().refine(affinity) else: raise ValueError("Unknown refinement operation: {}".format(op)) # Perform eigen decomposion. (eigenvalues, eigenvectors) = utils.compute_sorted_eigenvectors(affinity) # Get number of clusters. k = utils.compute_number_of_clusters(eigenvalues, self.stop_eigenvalue) if self.min_clusters is not None: k = max(k, self.min_clusters) if self.max_clusters is not None: k = min(k, self.max_clusters) # Get spectral embeddings. spectral_embeddings = eigenvectors[:, :k] # Run K-Means++ on spectral embeddings. # Note: The correct way should be using a K-Means implementation # that supports customized distance measure such as cosine distance. # This implemention from scikit-learn does NOT, which is inconsistent # with the paper. kmeans_clusterer = KMeans(n_clusters=k, init="k-means++", max_iter=300, random_state=0) labels = kmeans_clusterer.fit_predict(spectral_embeddings) return labels