示例#1
0
    def test_cosine(self):
        biadjacency = test_bigraph()
        method = GSVD(3, solver='lanczos')

        embedding = method.fit_transform(biadjacency)
        embedding_col = method.embedding_col_
        fit, div, modularity = cosine_modularity(biadjacency,
                                                 embedding,
                                                 embedding_col,
                                                 return_all=True)
        modularity = cosine_modularity(biadjacency,
                                       embedding,
                                       embedding_col,
                                       return_all=False)
        self.assertAlmostEqual(modularity, fit - div)

        adjacency = test_graph()
        embedding = method.fit_transform(adjacency)
        fit, div, modularity = cosine_modularity(adjacency,
                                                 embedding,
                                                 return_all=True)
        self.assertAlmostEqual(modularity, fit - div)

        with self.assertRaises(ValueError):
            cosine_modularity(biadjacency, embedding)

        louvain = LouvainEmbedding()
        embedding = louvain.fit_transform(adjacency)
        fit, div, modularity = cosine_modularity(adjacency,
                                                 embedding,
                                                 return_all=True)
        self.assertAlmostEqual(modularity, fit - div)
示例#2
0
 def setUp(self):
     self.kmeans = KMeans(3, GSVD(2))
     self.bikmeans = BiKMeans(3, GSVD(2))
     self.kmeans_options = KMeans(4, SVD(3), sort_clusters=False)
     self.bikmeans_options = BiKMeans(4,
                                      BiSpectral(3),
                                      co_cluster=True,
                                      sort_clusters=False)
示例#3
0
    def test_undirected(self):
        adjacency = test_graph()
        adjacency_bool = test_graph_bool()
        n = adjacency.shape[0]
        seeds_array = -np.ones(n)
        seeds_array[:2] = np.arange(2)
        seeds_dict = {0: 0, 1: 1}

        classifiers = [
            PageRankClassifier(),
            DiffusionClassifier(),
            KNN(embedding_method=GSVD(3), n_neighbors=1),
            CoPageRankClassifier(),
            Propagation()
        ]
        for clf in classifiers:
            labels1 = clf.fit_transform(adjacency, seeds_array)
            labels2 = clf.fit_transform(adjacency, seeds_dict)
            labels3 = clf.fit_transform(adjacency_bool, seeds_array)
            self.assertTrue((labels1 == labels2).all())
            self.assertTrue((labels1 == labels3).all())
            self.assertEqual(labels2.shape[0], n)
            self.assertTupleEqual(clf.membership_.shape, (n, 2))

        seeds1 = {0: 0, 1: 1}
        seeds2 = {0: 0, 1: 2}
        for clf in classifiers:
            labels1 = (clf.fit_transform(adjacency, seeds1) == 1)
            labels2 = (clf.fit_transform(adjacency, seeds2) == 2)
            self.assertTrue((labels1 == labels2).all())
示例#4
0
    def test_undirected(self):
        adjacency = test_graph()
        n = adjacency.shape[0]
        seeds_array = -np.ones(n)
        seeds_array[:2] = np.arange(2)
        seeds_dict = {0: 0, 1: 1}

        classifiers = [
            PageRankClassifier(),
            DiffusionClassifier(),
            KNN(embedding_method=GSVD(3), n_neighbors=1),
            Propagation(),
            DirichletClassifier()
        ]

        with self.assertRaises(ValueError):
            classifiers[0].score(0)

        for clf in classifiers:
            labels1 = clf.fit_transform(adjacency, seeds_array)
            labels2 = clf.fit_transform(adjacency, seeds_dict)
            scores = clf.score(0)
            self.assertTrue((labels1 == labels2).all())
            self.assertEqual(labels2.shape, (n, ))
            self.assertTupleEqual(clf.membership_.shape, (n, 2))
            self.assertEqual(scores.shape, (n, ))

        seeds1 = {0: 0, 1: 1}
        seeds2 = {0: 0, 1: 2}
        for clf in classifiers:
            labels1 = (clf.fit_transform(adjacency, seeds1) == 1)
            labels2 = (clf.fit_transform(adjacency, seeds2) == 2)
            self.assertTrue((labels1 == labels2).all())
示例#5
0
    def test_bipartite(self):
        biadjacency = movie_actor(metadata=False)
        n_row, n_col = biadjacency.shape
        seeds_row_array = -np.ones(n_row)
        seeds_row_array[:2] = np.arange(2)
        seeds_row_dict = {0: 0, 1: 1}
        seeds_col_dict = {0: 0}

        classifiers = [
            BiPageRankClassifier(),
            BiDiffusionClassifier(),
            BiKNN(embedding_method=GSVD(3), n_neighbors=1),
            BiPropagation(),
            BiDirichletClassifier()
        ]
        for clf in classifiers:
            clf.fit(biadjacency, seeds_row_array)
            labels_row1, labels_col1 = clf.labels_row_, clf.labels_col_
            clf.fit(biadjacency, seeds_row_dict)
            labels_row2, labels_col2 = clf.labels_row_, clf.labels_col_

            self.assertTrue(np.allclose(labels_row1, labels_row2))
            self.assertTrue(np.allclose(labels_col1, labels_col2))
            self.assertEqual(labels_col2.shape[0], n_col)
            self.assertTupleEqual(clf.membership_row_.shape, (n_row, 2))
            self.assertTupleEqual(clf.membership_col_.shape, (n_col, 2))

            clf.fit(biadjacency, seeds_row_dict, seeds_col_dict)
示例#6
0
 def __init__(self,
              embedding_method: BaseEmbedding = GSVD(10),
              co_cluster: bool = False):
     super(Ward, self).__init__()
     self.embedding_method = embedding_method
     self.co_cluster = co_cluster
     self.bipartite = None
示例#7
0
    def test_undirected(self):
        adjacency = test_graph()
        n = adjacency.shape[0]

        for algo in [Paris(), Ward(GSVD(3)), LouvainHierarchy()]:
            dendrogram = algo.fit_transform(adjacency)
            self.assertTupleEqual(dendrogram.shape, (n - 1, 4))
示例#8
0
    def test_options(self):
        biadjacency = star_wars(metadata=False)
        n_row, n_col = biadjacency.shape
        min_dim = min(n_row, n_col) - 1
        gsvd = GSVD(n_components=5,
                    regularization=0.,
                    solver='halko',
                    relative_regularization=True)

        with self.assertWarns(Warning):
            gsvd.fit(biadjacency)
        self.assertEqual(gsvd.embedding_row_.shape, (n_row, min_dim))
        self.assertEqual(gsvd.embedding_col_.shape, (n_col, min_dim))

        gsvd = GSVD(n_components=1,
                    regularization=0.1,
                    solver='halko',
                    relative_regularization=True)
        gsvd.fit(biadjacency)
        gsvd.predict(np.random.rand(n_col))
示例#9
0
 def __init__(self,
              embedding_method: BaseEmbedding = GSVD(10),
              n_neighbors: int = 5,
              factor_distance: float = 2,
              leaf_size: int = 16,
              p: float = 2,
              tol_nn: float = 0.01,
              n_jobs: int = 1):
     super(BiKNN,
           self).__init__(embedding_method, n_neighbors, factor_distance,
                          leaf_size, p, tol_nn, n_jobs)
示例#10
0
 def __init__(self,
              n_clusters: int = 8,
              embedding_method: BaseEmbedding = GSVD(10),
              sort_clusters: bool = True,
              return_membership: bool = True,
              return_aggregate: bool = True):
     super(KMeans, self).__init__(sort_clusters=sort_clusters,
                                  return_membership=return_membership,
                                  return_aggregate=return_aggregate)
     self.n_clusters = n_clusters
     self.embedding_method = embedding_method
示例#11
0
 def __init__(self,
              n_clusters: int = 2,
              embedding_method: BaseBiEmbedding = GSVD(10),
              co_cluster: bool = False,
              sort_clusters: bool = True,
              return_membership: bool = True,
              return_aggregate: bool = True):
     super(BiKMeans, self).__init__(sort_clusters=sort_clusters,
                                    return_membership=return_membership,
                                    return_aggregate=return_aggregate,
                                    n_clusters=n_clusters,
                                    embedding_method=embedding_method)
     self.co_cluster = co_cluster
示例#12
0
    def test_bipartite(self):
        biadjacency = test_bigraph()
        n_row, n_col = biadjacency.shape

        hierarchy = [
            BiParis(),
            BiWard(GSVD(3), cluster_col=True, cluster_both=True),
            BiLouvainHierarchy()
        ]
        for algo in hierarchy:
            algo.fit_transform(biadjacency)
            self.assertTupleEqual(algo.dendrogram_row_.shape, (n_row - 1, 4))
            self.assertTupleEqual(algo.dendrogram_col_.shape, (n_col - 1, 4))
            if algo.dendrogram_full_ is not None:
                self.assertTupleEqual(algo.dendrogram_full_.shape,
                                      (n_row + n_col - 1, 4))
示例#13
0
    def _init_vem(self, init):
        n = self.X.shape[0]

        if init == "kmeans":
            kmeans = KMeans(n_clusters=self.n_clusters,
                            embedding_method=GSVD(self.n_clusters))
            labels_k = kmeans.fit_transform(self.X)

            self.taus = np.zeros(shape=(n, self.n_clusters))
            self.taus[:] = np.eye(self.n_clusters)[labels_k]
        else:
            self.taus = np.random.rand(n, self.n_clusters)
            for i in range(n):
                self.taus[i, :] /= np.sum(self.taus[i, :])

        self.alphas = np.ones(shape=(self.n_clusters, )) / self.n_clusters
        self.pis = np.zeros(shape=(self.n_clusters, self.n_clusters))
示例#14
0
    def init_vem(self, init):
        n = self.X.shape[0]

        if init == "k-means":
            # kmeans = KMeans(n_clusters = self.Q, embedding_method=GSVD(self.Q))
            # labels_k = kmeans.fit_transform(self.X)

            embedding = GSVD(self.Q).fit_transform(self.X)
            centroid, labels_k, inertia = k_means(embedding, self.Q, n_init=5)

            self.taus = np.zeros(shape=(n, self.Q))
            self.taus[:] = np.eye(self.Q)[labels_k]
        else:
            self.taus = np.random.rand(n, self.Q)
            for i in range(n):
                self.taus[i, :] /= np.sum(self.taus[i, :])

        self.alphas = np.ones(shape=(self.Q, )) / self.Q
        self.pis = np.zeros(shape=(self.Q, self.Q))
示例#15
0
 def test_undirected(self):
     n_clusters = 3
     algo = KMeans(n_clusters, GSVD(2))
     algo_options = KMeans(n_clusters,
                           Spectral(3),
                           co_cluster=True,
                           sort_clusters=False)
     for adjacency in [
             test_graph(),
             test_graph_disconnect(),
             test_digraph()
     ]:
         n = adjacency.shape[0]
         labels = algo.fit_transform(adjacency)
         self.assertEqual(len(set(labels)), n_clusters)
         self.assertEqual(algo.membership_.shape, (n, n_clusters))
         self.assertEqual(algo.aggregate_.shape, (n_clusters, n_clusters))
         labels = algo_options.fit_transform(adjacency)
         self.assertEqual(len(set(labels)), n_clusters)
示例#16
0
    def __init__(self,
                 embedding_method: BaseEmbedding = GSVD(10),
                 n_neighbors: int = 5,
                 factor_distance: float = 2,
                 leaf_size: int = 16,
                 p: float = 2,
                 tol_nn: float = 0.01,
                 n_jobs: Optional[int] = None):
        super(KNN, self).__init__()

        self.embedding_method = embedding_method
        self.n_neighbors = n_neighbors
        self.factor_distance = factor_distance
        self.leaf_size = leaf_size
        self.p = p
        self.tol_nn = tol_nn
        self.n_jobs = check_n_jobs(n_jobs)
        # special case of scipy API for tree.query
        if self.n_jobs is None:
            self.n_jobs = -1
示例#17
0
 def test_bipartite(self):
     algo = KMeans(3, GSVD(2))
     algo_options = KMeans(4,
                           Spectral(3),
                           co_cluster=True,
                           sort_clusters=False)
     for biadjacency in [test_bigraph(), test_bigraph_disconnect()]:
         n_row, n_col = biadjacency.shape
         algo.fit(biadjacency)
         self.assertEqual(len(algo.labels_), n_row)
         self.assertEqual(algo.membership_.shape, (n_row, 3))
         self.assertEqual(algo.membership_row_.shape, (n_row, 3))
         self.assertEqual(algo.membership_col_.shape, (n_col, 3))
         self.assertEqual(algo.aggregate_.shape, (3, 3))
         algo_options.fit(biadjacency)
         labels = np.hstack(
             (algo_options.labels_row_, algo_options.labels_col_))
         self.assertEqual(len(set(labels)), 4)
         self.assertEqual(algo_options.membership_.shape, (n_row, 4))
         self.assertEqual(algo_options.membership_row_.shape, (n_row, 4))
         self.assertEqual(algo_options.membership_col_.shape, (n_col, 4))
         self.assertEqual(algo_options.aggregate_.shape, (4, 4))
示例#18
0
 def setUp(self):
     """Algorithms by input types."""
     self.methods = [Spectral(), GSVD(), SVD()]
     self.bimethods = [BiSpectral(), GSVD(), SVD()]
示例#19
0
 def test_disconnected(self):
     adjacency = test_graph_disconnect()
     hierarchy = [Paris(), Ward(GSVD(3)), LouvainHierarchy()]
     for hier_algo in hierarchy:
         dendrogram = hier_algo.fit_transform(adjacency)
         self.assertEqual(dendrogram.shape, (9, 4))
示例#20
0
 def __init__(self, embedding_method: BaseBiEmbedding = GSVD(10), cluster_col: bool = False,
              cluster_both: bool = False):
     super(BiWard, self).__init__(embedding_method=embedding_method)
     self.cluster_col = cluster_col
     self.cluster_both = cluster_both
示例#21
0
 def __init__(self, embedding_method: BaseEmbedding = GSVD(10)):
     super(Ward, self).__init__()
     self.embedding_method = embedding_method