def kmean_poincare_ball(): n_samples = 20 dim = 2 n_clusters = 2 manifold = Hyperbolic(dimension=dim, point_type='ball') metric = manifold.metric cluster_1 = gs.random.uniform(low=0.5, high=0.6, size=(n_samples, dim)) cluster_2 = gs.random.uniform(low=0, high=-0.2, size=(n_samples, dim)) data = gs.concatenate((cluster_1, cluster_2), axis=0) kmeans = RiemannianKMeans(riemannian_metric=metric, n_clusters=n_clusters, init='random', mean_method='frechet-poincare-ball' ) centroids = kmeans.fit(X=data, max_iter=100) labels = kmeans.predict(X=data) plt.figure(1) colors = ['red', 'blue'] ax = visualization.plot( data, space='H2_poincare_disk', marker='.', color='black', point_type=manifold.point_type) for i in range(n_clusters): ax = visualization.plot( data[labels == i], ax=ax, space='H2_poincare_disk', marker='.', color=colors[i], point_type=manifold.point_type) ax = visualization.plot( centroids, ax=ax, space='H2_poincare_disk', marker='*', color='green', s=100, point_type=manifold.point_type) ax.set_title('Kmeans on Poincaré Ball Manifold') return plt
def kmean_hypersphere(): """Run K-means on the sphere.""" n_samples = 50 dim = 2 n_clusters = 2 manifold = Hypersphere(dim) metric = manifold.metric # Generate data on north pole cluster_1 = manifold.random_von_mises_fisher(kappa=50, n_samples=n_samples) # Generate data on south pole cluster_2 = manifold.random_von_mises_fisher(kappa=50, n_samples=n_samples) for point in cluster_2: point[2] = -point[2] data = gs.concatenate((cluster_1, cluster_2), axis=0) kmeans = RiemannianKMeans(metric, n_clusters, tol=1e-3) kmeans.fit(data) labels = kmeans.predict(data) centroids = kmeans.centroids plt.figure(2) colors = ['red', 'blue'] ax = visualization.plot( data, space='S2', marker='.', color='black') for i in range(n_clusters): if len(data[labels == i]) > 0: ax = visualization.plot( points=data[labels == i], ax=ax, space='S2', marker='.', color=colors[i]) ax = visualization.plot( centroids, ax=ax, space='S2', marker='*', s=200, color='green') ax.set_title('Kmeans on Hypersphere Manifold') return plt
def kmean_poincare_ball(): """Run K-means on the Poincare ball.""" n_samples = 20 dim = 2 n_clusters = 2 manifold = PoincareBall(dim=dim) metric = manifold.metric cluster_1 = gs.random.uniform(low=0.5, high=0.6, size=(n_samples, dim)) cluster_2 = gs.random.uniform(low=0, high=-0.2, size=(n_samples, dim)) data = gs.concatenate((cluster_1, cluster_2), axis=0) kmeans = RiemannianKMeans(metric=metric, n_clusters=n_clusters, init="random") centroids = kmeans.fit(X=data) labels = kmeans.predict(X=data) plt.figure(1) colors = ["red", "blue"] ax = visualization.plot( data, space="H2_poincare_disk", marker=".", color="black", point_type=manifold.point_type, ) for i in range(n_clusters): ax = visualization.plot( data[labels == i], ax=ax, space="H2_poincare_disk", marker=".", color=colors[i], point_type=manifold.point_type, ) ax = visualization.plot( centroids, ax=ax, space="H2_poincare_disk", marker="*", color="green", s=100, point_type=manifold.point_type, ) ax.set_title("Kmeans on Poincaré Ball Manifold") return plt
def test_hypersphere_kmeans_predict(self): gs.random.seed(1234) manifold = hypersphere.Hypersphere(2) metric = hypersphere.HypersphereMetric(2) x = manifold.random_von_mises_fisher(kappa=100, n_samples=200) kmeans = RiemannianKMeans(metric, 5, tol=1e-5) kmeans.fit(x, max_iter=100) result = kmeans.predict(x) centroids = kmeans.centroids expected = gs.array([int(metric.closest_neighbor_index(x_i, centroids)) for x_i in x]) self.assertAllClose(expected, result)
def kmean_hypersphere(): """Run K-means on the sphere.""" n_samples = 50 dim = 2 n_clusters = 2 manifold = Hypersphere(dim) metric = manifold.metric # Generate data on north pole cluster_1 = manifold.random_von_mises_fisher(kappa=50, n_samples=n_samples) # Generate data on south pole cluster_2 = manifold.random_von_mises_fisher(kappa=50, n_samples=n_samples) cluster_2 = -cluster_2 data = gs.concatenate((cluster_1, cluster_2), axis=0) kmeans = RiemannianKMeans(metric, n_clusters, tol=1e-3) kmeans.fit(data) labels = kmeans.predict(data) centroids = kmeans.centroids plt.figure(2) colors = ["red", "blue"] ax = visualization.plot(data, space="S2", marker=".", color="black") for i in range(n_clusters): if len(data[labels == i]) > 0: ax = visualization.plot( points=data[labels == i], ax=ax, space="S2", marker=".", color=colors[i] ) ax = visualization.plot( centroids, ax=ax, space="S2", marker="*", s=200, color="green" ) ax.set_title("Kmeans on the sphere") return plt
def main(): cluster_1 = gs.random.uniform(low=0.5, high=0.6, size=(20, 2)) cluster_2 = gs.random.uniform(low=0, high=-0.2, size=(20, 2)) ax = plt.gca() merged_clusters = gs.concatenate((cluster_1, cluster_2), axis=0) manifold = Hyperbolic(dimension=2, point_type='ball') metric = HyperbolicMetric(dimension=2, point_type='ball') visualization.plot( merged_clusters, ax=ax, space='H2_poincare_disk', marker='.', color='black', point_type=manifold.point_type) kmeans = RiemannianKMeans( riemannian_metric=metric, n_clusters=2, init='random', ) centroids = kmeans.fit(X=merged_clusters, max_iter=1) labels = kmeans.predict(X=merged_clusters) visualization.plot( centroids, ax=ax, space='H2_poincare_disk', marker='.', color='red', point_type=manifold.point_type) print('Data_labels', labels) plt.show()
def fit(self, data): """Fit a Gaussian mixture model (GMM) given the data. Alternates between Expectation and Maximization steps for some number of iterations. Parameters ---------- data : array-like, shape=[n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. Returns ------- self : object Return the components of the computed Gaussian mixture model: means, variances and mixture_coefficients. """ self._dimension = data.shape[-1] if self.initialisation_method == 'kmeans': kmeans = RiemannianKMeans(metric=self.metric, n_clusters=self.n_gaussians, init='random', mean_method='batch', lr=self.lr_mean) centroids = kmeans.fit(X=data) labels = kmeans.predict(X=data) self.means = centroids self.variances = gs.zeros(self.n_gaussians) labeled_data = gs.vstack([labels, gs.transpose(data)]) labeled_data = gs.transpose(labeled_data) for label, centroid in enumerate(centroids): label_mask = gs.where(labeled_data[:, 0] == label) grouped_by_label = labeled_data[label_mask][:, 1:] v = variance(grouped_by_label, centroid, self.metric) if grouped_by_label.shape[0] == 1: v += MIN_VAR_INIT self.variances[label] = v else: self.means = (gs.random.rand(self.n_gaussians, self._dimension) - 0.5) / self._dimension self.variances = gs.random.rand(self.n_gaussians) / 10 + 0.8 self.mixture_coefficients = \ gs.ones(self.n_gaussians) / self.n_gaussians posterior_probabilities = gs.ones((data.shape[0], self.means.shape[0])) self.variances_range,\ self.normalization_factor_var, \ self.phi_inv_var =\ self.normalization_factor_init( gs.arange( ZETA_LOWER_BOUND, ZETA_UPPER_BOUND, ZETA_STEP)) for epoch in range(self.max_iter): old_posterior_probabilities = posterior_probabilities posterior_probabilities = self._expectation(data) condition = gs.mean( gs.abs(old_posterior_probabilities - posterior_probabilities)) if condition < EM_CONV_RATE and epoch > MINIMUM_EPOCHS: logging.info('EM converged in %s iterations', epoch) return self.means, self.variances, self.mixture_coefficients self._maximization(data, posterior_probabilities) logging.info('WARNING: EM did not converge \n' 'Please increase MINIMUM_EPOCHS.') return self.means, self.variances, self.mixture_coefficients
def main(): """Learning Poincaré graph embedding. Learns Poincaré Ball embedding by using Riemannian gradient descent algorithm. Then K-means is applied to learn labels of each data sample. """ gs.random.seed(1234) karate_graph = load_karate_graph() hyperbolic_embedding = HyperbolicEmbedding() embeddings = hyperbolic_embedding.embed(karate_graph) colors = {1: 'b', 2: 'r'} group_1 = mpatches.Patch(color=colors[1], label='Group 1') group_2 = mpatches.Patch(color=colors[2], label='Group 2') circle = visualization.PoincareDisk(point_type='ball') _, ax = plt.subplots(figsize=(8, 8)) ax.axes.xaxis.set_visible(False) ax.axes.yaxis.set_visible(False) circle.set_ax(ax) circle.draw(ax=ax) for i_embedding, embedding in enumerate(embeddings): x = embedding[0] y = embedding[1] pt_id = i_embedding plt.scatter( x, y, c=colors[karate_graph.labels[pt_id][0]], s=150 ) ax.annotate(pt_id, (x, y)) plt.tick_params( which='both') plt.title('Poincare Ball Embedding of the Karate Club Network') plt.legend(handles=[group_1, group_2]) plt.show() n_clusters = 2 kmeans = RiemannianKMeans( riemannian_metric=hyperbolic_embedding.manifold.metric, n_clusters=n_clusters, init='random', mean_method='frechet-poincare-ball') centroids = kmeans.fit(X=embeddings, max_iter=100) labels = kmeans.predict(X=embeddings) colors = ['g', 'c', 'm'] circle = visualization.PoincareDisk(point_type='ball') _, ax2 = plt.subplots(figsize=(8, 8)) circle.set_ax(ax2) circle.draw(ax=ax2) ax2.axes.xaxis.set_visible(False) ax2.axes.yaxis.set_visible(False) group_1_predicted = mpatches.Patch( color=colors[0], label='Predicted Group 1') group_2_predicted = mpatches.Patch( color=colors[1], label='Predicted Group 2') group_centroids = mpatches.Patch( color=colors[2], label='Cluster centroids') for _ in range(n_clusters): for i_embedding, embedding in enumerate(embeddings): x = embedding[0] y = embedding[1] pt_id = i_embedding if labels[i_embedding] == 0: color = colors[0] else: color = colors[1] plt.scatter( x, y, c=color, s=150 ) ax2.annotate(pt_id, (x, y)) for _, centroid in enumerate(centroids): x = centroid[0] y = centroid[1] plt.scatter( x, y, c=colors[2], marker='*', s=150, ) plt.title('K-means applied to Karate club embedding') plt.legend(handles=[group_1_predicted, group_2_predicted, group_centroids]) plt.show()