def test_sample_von_mises_fisher_arbitrary_mean(self): """ Check that the maximum likelihood estimates of the mean and concentration parameter are close to the real values. A first estimation of the concentration parameter is obtained by a closed-form expression and improved through the Newton method. """ for dim in [2, 9]: n_points = 10000 sphere = Hypersphere(dim) # check mean value for concentrated distribution for different mean kappa = 1000. mean = sphere.random_uniform() points = sphere.random_von_mises_fisher(mu=mean, kappa=kappa, n_samples=n_points) sum_points = gs.sum(points, axis=0) result = sum_points / gs.linalg.norm(sum_points) expected = mean self.assertAllClose(result, expected, atol=MEAN_ESTIMATION_TOL)
def test_optimal_quantization(self): """ Check that optimal quantization yields the same result as the karcher flow algorithm when we look for one center. """ dim = 2 n_points = 1000 n_centers = 1 sphere = Hypersphere(dim) points = sphere.random_von_mises_fisher( kappa=10, n_samples=n_points ) mean = sphere.metric.mean(points) centers, weights, clusters, n_iterations = sphere.metric.\ optimal_quantization(points=points, n_centers=n_centers) error = sphere.metric.dist(mean, centers) diameter = sphere.metric.diameter(points) result = error / diameter expected = 0.0 self.assertAllClose( result, expected, atol=OPTIMAL_QUANTIZATION_TOL)
def test_sample_random_von_mises_fisher_kappa(self, dim, kappa, n_points): # check concentration parameter for dispersed distribution sphere = Hypersphere(dim) points = sphere.random_von_mises_fisher(kappa=kappa, n_samples=n_points) sum_points = gs.sum(points, axis=0) mean_norm = gs.linalg.norm(sum_points) / n_points kappa_estimate = ( mean_norm * (dim + 1.0 - mean_norm**2) / (1.0 - mean_norm**2) ) kappa_estimate = gs.cast(kappa_estimate, gs.float64) p = dim + 1 n_steps = 100 for _ in range(n_steps): bessel_func_1 = scipy.special.iv(p / 2.0, kappa_estimate) bessel_func_2 = scipy.special.iv(p / 2.0 - 1.0, kappa_estimate) ratio = bessel_func_1 / bessel_func_2 denominator = 1.0 - ratio**2 - (p - 1.0) * ratio / kappa_estimate mean_norm = gs.cast(mean_norm, gs.float64) kappa_estimate = kappa_estimate - (ratio - mean_norm) / denominator result = kappa_estimate expected = kappa self.assertAllClose(result, expected, atol=KAPPA_ESTIMATION_TOL)
class TestOnlineKmeans(geomstats.tests.TestCase): def setUp(self): gs.random.seed(1234) self.dimension = 2 self.space = Hypersphere(dim=self.dimension) self.metric = self.space.metric self.data = self.space.random_von_mises_fisher(kappa=100, n_samples=50) @geomstats.tests.np_only def test_fit(self): X = self.data clustering = OnlineKMeans(metric=self.metric, n_clusters=1, n_repetitions=10) clustering.fit(X) center = clustering.cluster_centers_ mean = FrechetMean(metric=self.metric, lr=1.) mean.fit(X) result = self.metric.dist(center, mean.estimate_) expected = 0. self.assertAllClose(expected, result, atol=1e-3) @geomstats.tests.np_only def test_predict(self): X = self.data clustering = OnlineKMeans(metric=self.metric, n_clusters=3, n_repetitions=1) clustering.fit(X) point = self.data[0, :] prediction = clustering.predict(point) result = prediction expected = clustering.labels_[0] self.assertAllClose(expected, result)
def _init_double_cluster( seed=10, num_of_samples=20, size_of_dim=2, kappa_value=20, orthogonality_of_sphere=3, bandwidth=0.3, tol=1e-4, num_of_centers=2, ): gs.random.seed(seed) number_of_samples = num_of_samples sphere = Hypersphere(size_of_dim) metric = sphere.metric cluster = sphere.random_von_mises_fisher( kappa=kappa_value, n_samples=number_of_samples ) special_orthogonal = SpecialOrthogonal(orthogonality_of_sphere) rotation1 = special_orthogonal.random_uniform() rotation2 = special_orthogonal.random_uniform() cluster_1 = cluster @ rotation1 cluster_2 = cluster @ rotation2 combined_cluster = gs.concatenate((cluster_1, cluster_2)) rms = riemannian_mean_shift( manifold=sphere, metric=metric, bandwidth=bandwidth, tol=tol, n_centers=num_of_centers, ) rms.fit(combined_cluster) return combined_cluster, rms
def test_double_cluster_riemannian_mean_shift(self): gs.random.seed(10) number_of_samples = 20 sphere = Hypersphere(dim=2) metric = HypersphereMetric(2) cluster = sphere.random_von_mises_fisher(kappa=20, n_samples=number_of_samples) special_orthogonal = SpecialOrthogonal(3) rotation1 = special_orthogonal.random_uniform() rotation2 = special_orthogonal.random_uniform() cluster_1 = cluster @ rotation1 cluster_2 = cluster @ rotation2 combined_cluster = gs.concatenate((cluster_1, cluster_2)) rms = riemannian_mean_shift(manifold=sphere, metric=metric, bandwidth=0.3, tol=1e-4, n_centers=2) rms.fit(combined_cluster) closest_centers = rms.predict(combined_cluster) count_in_first_cluster = 0 for point in closest_centers: if gs.allclose(point, rms.centers[0]): count_in_first_cluster += 1 count_in_second_cluster = 0 for point in closest_centers: if gs.allclose(point, rms.centers[1]): count_in_second_cluster += 1 self.assertEqual(combined_cluster.shape[0], count_in_first_cluster + count_in_second_cluster)
def main(): sphere = Hypersphere(dimension=2) data = sphere.random_von_mises_fisher(kappa=10, n_samples=1000) n_clusters = 4 clustering = OnlineKMeans(metric=sphere.metric, n_clusters=n_clusters) clustering = clustering.fit(data) plt.figure(0) ax = plt.subplot(111, projection="3d") visualization.plot(points=clustering.cluster_centers_, ax=ax, space='S2', c='r') plt.show() plt.figure(1) ax = plt.subplot(111, projection="3d") sphere_plot = visualization.Sphere() sphere_plot.draw(ax=ax) for i in range(n_clusters): cluster = data[clustering.labels_ == i, :] sphere_plot.draw_points(ax=ax, points=cluster) plt.show()
def main(): """Plot a Kernel Density Estimation Classification on the sphere.""" sphere = Hypersphere(dim=2) sphere_distance = sphere.metric.dist n_labels = 2 n_samples_per_dataset = 10 n_targets = 200 radius = np.inf kernel = triangular_radial_kernel bandwidth = 3 n_training_samples = n_labels * n_samples_per_dataset dataset_1 = sphere.random_von_mises_fisher( kappa=10, n_samples=n_samples_per_dataset) dataset_2 = - sphere.random_von_mises_fisher( kappa=10, n_samples=n_samples_per_dataset) training_dataset = gs.concatenate((dataset_1, dataset_2), axis=0) labels_dataset_1 = gs.zeros([n_samples_per_dataset], dtype=gs.int64) labels_dataset_2 = gs.ones([n_samples_per_dataset], dtype=gs.int64) labels = gs.concatenate((labels_dataset_1, labels_dataset_2)) target = sphere.random_uniform(n_samples=n_targets) labels_colors = gs.zeros([n_labels, 3]) labels_colors[0, :] = gs.array([0, 0, 1]) labels_colors[1, :] = gs.array([1, 0, 0]) kde = KernelDensityEstimationClassifier( radius=radius, distance=sphere_distance, kernel=kernel, bandwidth=bandwidth, outlier_label='most_frequent') kde.fit(training_dataset, labels) target_labels = kde.predict(target) target_labels_proba = kde.predict_proba(target) plt.figure(0) ax = plt.subplot(111, projection='3d') plt.title('Training set') sphere_plot = visualization.Sphere() sphere_plot.draw(ax=ax) colors = gs.zeros([n_training_samples, 3]) for i_sample in range(n_training_samples): colors[i_sample, :] = labels_colors[labels[i_sample], :] sphere_plot.draw_points(ax=ax, points=training_dataset, c=colors) plt.figure(1) ax = plt.subplot(111, projection='3d') plt.title('Classification') sphere_plot = visualization.Sphere() sphere_plot.draw(ax=ax) colors = gs.zeros([n_targets, 3]) for i_target in range(n_targets): colors[i_target, :] = labels_colors[target_labels[i_target], :] sphere_plot.draw_points(ax=ax, points=target, c=colors) plt.figure(2) ax = plt.subplot(111, projection='3d') plt.title('Probabilistic classification') sphere_plot = visualization.Sphere() sphere_plot.draw(ax=ax) colors = target_labels_proba @ labels_colors sphere_plot.draw_points(ax=ax, points=target, c=colors) plt.show()