def test_cosine(self): biadjacency = test_bigraph() method = GSVD(3, solver='lanczos') embedding = method.fit_transform(biadjacency) embedding_col = method.embedding_col_ fit, div, modularity = cosine_modularity(biadjacency, embedding, embedding_col, return_all=True) modularity = cosine_modularity(biadjacency, embedding, embedding_col, return_all=False) self.assertAlmostEqual(modularity, fit - div) adjacency = test_graph() embedding = method.fit_transform(adjacency) fit, div, modularity = cosine_modularity(adjacency, embedding, return_all=True) self.assertAlmostEqual(modularity, fit - div) with self.assertRaises(ValueError): cosine_modularity(biadjacency, embedding) louvain = LouvainEmbedding() embedding = louvain.fit_transform(adjacency) fit, div, modularity = cosine_modularity(adjacency, embedding, return_all=True) self.assertAlmostEqual(modularity, fit - div)
def setUp(self): self.kmeans = KMeans(3, GSVD(2)) self.bikmeans = BiKMeans(3, GSVD(2)) self.kmeans_options = KMeans(4, SVD(3), sort_clusters=False) self.bikmeans_options = BiKMeans(4, BiSpectral(3), co_cluster=True, sort_clusters=False)
def test_undirected(self): adjacency = test_graph() adjacency_bool = test_graph_bool() n = adjacency.shape[0] seeds_array = -np.ones(n) seeds_array[:2] = np.arange(2) seeds_dict = {0: 0, 1: 1} classifiers = [ PageRankClassifier(), DiffusionClassifier(), KNN(embedding_method=GSVD(3), n_neighbors=1), CoPageRankClassifier(), Propagation() ] for clf in classifiers: labels1 = clf.fit_transform(adjacency, seeds_array) labels2 = clf.fit_transform(adjacency, seeds_dict) labels3 = clf.fit_transform(adjacency_bool, seeds_array) self.assertTrue((labels1 == labels2).all()) self.assertTrue((labels1 == labels3).all()) self.assertEqual(labels2.shape[0], n) self.assertTupleEqual(clf.membership_.shape, (n, 2)) seeds1 = {0: 0, 1: 1} seeds2 = {0: 0, 1: 2} for clf in classifiers: labels1 = (clf.fit_transform(adjacency, seeds1) == 1) labels2 = (clf.fit_transform(adjacency, seeds2) == 2) self.assertTrue((labels1 == labels2).all())
def test_undirected(self): adjacency = test_graph() n = adjacency.shape[0] seeds_array = -np.ones(n) seeds_array[:2] = np.arange(2) seeds_dict = {0: 0, 1: 1} classifiers = [ PageRankClassifier(), DiffusionClassifier(), KNN(embedding_method=GSVD(3), n_neighbors=1), Propagation(), DirichletClassifier() ] with self.assertRaises(ValueError): classifiers[0].score(0) for clf in classifiers: labels1 = clf.fit_transform(adjacency, seeds_array) labels2 = clf.fit_transform(adjacency, seeds_dict) scores = clf.score(0) self.assertTrue((labels1 == labels2).all()) self.assertEqual(labels2.shape, (n, )) self.assertTupleEqual(clf.membership_.shape, (n, 2)) self.assertEqual(scores.shape, (n, )) seeds1 = {0: 0, 1: 1} seeds2 = {0: 0, 1: 2} for clf in classifiers: labels1 = (clf.fit_transform(adjacency, seeds1) == 1) labels2 = (clf.fit_transform(adjacency, seeds2) == 2) self.assertTrue((labels1 == labels2).all())
def test_bipartite(self): biadjacency = movie_actor(metadata=False) n_row, n_col = biadjacency.shape seeds_row_array = -np.ones(n_row) seeds_row_array[:2] = np.arange(2) seeds_row_dict = {0: 0, 1: 1} seeds_col_dict = {0: 0} classifiers = [ BiPageRankClassifier(), BiDiffusionClassifier(), BiKNN(embedding_method=GSVD(3), n_neighbors=1), BiPropagation(), BiDirichletClassifier() ] for clf in classifiers: clf.fit(biadjacency, seeds_row_array) labels_row1, labels_col1 = clf.labels_row_, clf.labels_col_ clf.fit(biadjacency, seeds_row_dict) labels_row2, labels_col2 = clf.labels_row_, clf.labels_col_ self.assertTrue(np.allclose(labels_row1, labels_row2)) self.assertTrue(np.allclose(labels_col1, labels_col2)) self.assertEqual(labels_col2.shape[0], n_col) self.assertTupleEqual(clf.membership_row_.shape, (n_row, 2)) self.assertTupleEqual(clf.membership_col_.shape, (n_col, 2)) clf.fit(biadjacency, seeds_row_dict, seeds_col_dict)
def __init__(self, embedding_method: BaseEmbedding = GSVD(10), co_cluster: bool = False): super(Ward, self).__init__() self.embedding_method = embedding_method self.co_cluster = co_cluster self.bipartite = None
def test_undirected(self): adjacency = test_graph() n = adjacency.shape[0] for algo in [Paris(), Ward(GSVD(3)), LouvainHierarchy()]: dendrogram = algo.fit_transform(adjacency) self.assertTupleEqual(dendrogram.shape, (n - 1, 4))
def test_options(self): biadjacency = star_wars(metadata=False) n_row, n_col = biadjacency.shape min_dim = min(n_row, n_col) - 1 gsvd = GSVD(n_components=5, regularization=0., solver='halko', relative_regularization=True) with self.assertWarns(Warning): gsvd.fit(biadjacency) self.assertEqual(gsvd.embedding_row_.shape, (n_row, min_dim)) self.assertEqual(gsvd.embedding_col_.shape, (n_col, min_dim)) gsvd = GSVD(n_components=1, regularization=0.1, solver='halko', relative_regularization=True) gsvd.fit(biadjacency) gsvd.predict(np.random.rand(n_col))
def __init__(self, embedding_method: BaseEmbedding = GSVD(10), n_neighbors: int = 5, factor_distance: float = 2, leaf_size: int = 16, p: float = 2, tol_nn: float = 0.01, n_jobs: int = 1): super(BiKNN, self).__init__(embedding_method, n_neighbors, factor_distance, leaf_size, p, tol_nn, n_jobs)
def __init__(self, n_clusters: int = 8, embedding_method: BaseEmbedding = GSVD(10), sort_clusters: bool = True, return_membership: bool = True, return_aggregate: bool = True): super(KMeans, self).__init__(sort_clusters=sort_clusters, return_membership=return_membership, return_aggregate=return_aggregate) self.n_clusters = n_clusters self.embedding_method = embedding_method
def __init__(self, n_clusters: int = 2, embedding_method: BaseBiEmbedding = GSVD(10), co_cluster: bool = False, sort_clusters: bool = True, return_membership: bool = True, return_aggregate: bool = True): super(BiKMeans, self).__init__(sort_clusters=sort_clusters, return_membership=return_membership, return_aggregate=return_aggregate, n_clusters=n_clusters, embedding_method=embedding_method) self.co_cluster = co_cluster
def test_bipartite(self): biadjacency = test_bigraph() n_row, n_col = biadjacency.shape hierarchy = [ BiParis(), BiWard(GSVD(3), cluster_col=True, cluster_both=True), BiLouvainHierarchy() ] for algo in hierarchy: algo.fit_transform(biadjacency) self.assertTupleEqual(algo.dendrogram_row_.shape, (n_row - 1, 4)) self.assertTupleEqual(algo.dendrogram_col_.shape, (n_col - 1, 4)) if algo.dendrogram_full_ is not None: self.assertTupleEqual(algo.dendrogram_full_.shape, (n_row + n_col - 1, 4))
def _init_vem(self, init): n = self.X.shape[0] if init == "kmeans": kmeans = KMeans(n_clusters=self.n_clusters, embedding_method=GSVD(self.n_clusters)) labels_k = kmeans.fit_transform(self.X) self.taus = np.zeros(shape=(n, self.n_clusters)) self.taus[:] = np.eye(self.n_clusters)[labels_k] else: self.taus = np.random.rand(n, self.n_clusters) for i in range(n): self.taus[i, :] /= np.sum(self.taus[i, :]) self.alphas = np.ones(shape=(self.n_clusters, )) / self.n_clusters self.pis = np.zeros(shape=(self.n_clusters, self.n_clusters))
def init_vem(self, init): n = self.X.shape[0] if init == "k-means": # kmeans = KMeans(n_clusters = self.Q, embedding_method=GSVD(self.Q)) # labels_k = kmeans.fit_transform(self.X) embedding = GSVD(self.Q).fit_transform(self.X) centroid, labels_k, inertia = k_means(embedding, self.Q, n_init=5) self.taus = np.zeros(shape=(n, self.Q)) self.taus[:] = np.eye(self.Q)[labels_k] else: self.taus = np.random.rand(n, self.Q) for i in range(n): self.taus[i, :] /= np.sum(self.taus[i, :]) self.alphas = np.ones(shape=(self.Q, )) / self.Q self.pis = np.zeros(shape=(self.Q, self.Q))
def test_undirected(self): n_clusters = 3 algo = KMeans(n_clusters, GSVD(2)) algo_options = KMeans(n_clusters, Spectral(3), co_cluster=True, sort_clusters=False) for adjacency in [ test_graph(), test_graph_disconnect(), test_digraph() ]: n = adjacency.shape[0] labels = algo.fit_transform(adjacency) self.assertEqual(len(set(labels)), n_clusters) self.assertEqual(algo.membership_.shape, (n, n_clusters)) self.assertEqual(algo.aggregate_.shape, (n_clusters, n_clusters)) labels = algo_options.fit_transform(adjacency) self.assertEqual(len(set(labels)), n_clusters)
def __init__(self, embedding_method: BaseEmbedding = GSVD(10), n_neighbors: int = 5, factor_distance: float = 2, leaf_size: int = 16, p: float = 2, tol_nn: float = 0.01, n_jobs: Optional[int] = None): super(KNN, self).__init__() self.embedding_method = embedding_method self.n_neighbors = n_neighbors self.factor_distance = factor_distance self.leaf_size = leaf_size self.p = p self.tol_nn = tol_nn self.n_jobs = check_n_jobs(n_jobs) # special case of scipy API for tree.query if self.n_jobs is None: self.n_jobs = -1
def test_bipartite(self): algo = KMeans(3, GSVD(2)) algo_options = KMeans(4, Spectral(3), co_cluster=True, sort_clusters=False) for biadjacency in [test_bigraph(), test_bigraph_disconnect()]: n_row, n_col = biadjacency.shape algo.fit(biadjacency) self.assertEqual(len(algo.labels_), n_row) self.assertEqual(algo.membership_.shape, (n_row, 3)) self.assertEqual(algo.membership_row_.shape, (n_row, 3)) self.assertEqual(algo.membership_col_.shape, (n_col, 3)) self.assertEqual(algo.aggregate_.shape, (3, 3)) algo_options.fit(biadjacency) labels = np.hstack( (algo_options.labels_row_, algo_options.labels_col_)) self.assertEqual(len(set(labels)), 4) self.assertEqual(algo_options.membership_.shape, (n_row, 4)) self.assertEqual(algo_options.membership_row_.shape, (n_row, 4)) self.assertEqual(algo_options.membership_col_.shape, (n_col, 4)) self.assertEqual(algo_options.aggregate_.shape, (4, 4))
def setUp(self): """Algorithms by input types.""" self.methods = [Spectral(), GSVD(), SVD()] self.bimethods = [BiSpectral(), GSVD(), SVD()]
def test_disconnected(self): adjacency = test_graph_disconnect() hierarchy = [Paris(), Ward(GSVD(3)), LouvainHierarchy()] for hier_algo in hierarchy: dendrogram = hier_algo.fit_transform(adjacency) self.assertEqual(dendrogram.shape, (9, 4))
def __init__(self, embedding_method: BaseBiEmbedding = GSVD(10), cluster_col: bool = False, cluster_both: bool = False): super(BiWard, self).__init__(embedding_method=embedding_method) self.cluster_col = cluster_col self.cluster_both = cluster_both
def __init__(self, embedding_method: BaseEmbedding = GSVD(10)): super(Ward, self).__init__() self.embedding_method = embedding_method