def kmean_poincare_ball():

    n_samples = 20
    dim = 2
    n_clusters = 2
    manifold = Hyperbolic(dimension=dim, point_type='ball')
    metric = manifold.metric

    cluster_1 = gs.random.uniform(low=0.5, high=0.6, size=(n_samples, dim))
    cluster_2 = gs.random.uniform(low=0, high=-0.2, size=(n_samples, dim))
    data = gs.concatenate((cluster_1, cluster_2), axis=0)

    kmeans = RiemannianKMeans(riemannian_metric=metric,
                              n_clusters=n_clusters,
                              init='random',
                              mean_method='frechet-poincare-ball'
                              )

    centroids = kmeans.fit(X=data, max_iter=100)
    labels = kmeans.predict(X=data)

    plt.figure(1)
    colors = ['red', 'blue']

    ax = visualization.plot(
        data,
        space='H2_poincare_disk',
        marker='.',
        color='black',
        point_type=manifold.point_type)

    for i in range(n_clusters):
        ax = visualization.plot(
            data[labels == i],
            ax=ax,
            space='H2_poincare_disk',
            marker='.',
            color=colors[i],
            point_type=manifold.point_type)

    ax = visualization.plot(
        centroids,
        ax=ax,
        space='H2_poincare_disk',
        marker='*',
        color='green',
        s=100,
        point_type=manifold.point_type)

    ax.set_title('Kmeans on Poincaré Ball Manifold')

    return plt
Exemplo n.º 2
0
def kmean_hypersphere():
    """Run K-means on the sphere."""
    n_samples = 50
    dim = 2
    n_clusters = 2
    manifold = Hypersphere(dim)
    metric = manifold.metric

    # Generate data on north pole
    cluster_1 = manifold.random_von_mises_fisher(kappa=50, n_samples=n_samples)

    # Generate data on south pole
    cluster_2 = manifold.random_von_mises_fisher(kappa=50, n_samples=n_samples)
    for point in cluster_2:
        point[2] = -point[2]

    data = gs.concatenate((cluster_1, cluster_2), axis=0)

    kmeans = RiemannianKMeans(metric, n_clusters, tol=1e-3)
    kmeans.fit(data)
    labels = kmeans.predict(data)
    centroids = kmeans.centroids

    plt.figure(2)
    colors = ['red', 'blue']

    ax = visualization.plot(
        data,
        space='S2',
        marker='.',
        color='black')

    for i in range(n_clusters):
        if len(data[labels == i]) > 0:
            ax = visualization.plot(
                points=data[labels == i],
                ax=ax,
                space='S2',
                marker='.',
                color=colors[i])

    ax = visualization.plot(
        centroids,
        ax=ax,
        space='S2',
        marker='*',
        s=200,
        color='green')

    ax.set_title('Kmeans on Hypersphere Manifold')

    return plt
Exemplo n.º 3
0
def kmean_poincare_ball():
    """Run K-means on the Poincare ball."""
    n_samples = 20
    dim = 2
    n_clusters = 2
    manifold = PoincareBall(dim=dim)
    metric = manifold.metric

    cluster_1 = gs.random.uniform(low=0.5, high=0.6, size=(n_samples, dim))
    cluster_2 = gs.random.uniform(low=0, high=-0.2, size=(n_samples, dim))
    data = gs.concatenate((cluster_1, cluster_2), axis=0)

    kmeans = RiemannianKMeans(metric=metric, n_clusters=n_clusters, init="random")

    centroids = kmeans.fit(X=data)
    labels = kmeans.predict(X=data)

    plt.figure(1)
    colors = ["red", "blue"]

    ax = visualization.plot(
        data,
        space="H2_poincare_disk",
        marker=".",
        color="black",
        point_type=manifold.point_type,
    )

    for i in range(n_clusters):
        ax = visualization.plot(
            data[labels == i],
            ax=ax,
            space="H2_poincare_disk",
            marker=".",
            color=colors[i],
            point_type=manifold.point_type,
        )

    ax = visualization.plot(
        centroids,
        ax=ax,
        space="H2_poincare_disk",
        marker="*",
        color="green",
        s=100,
        point_type=manifold.point_type,
    )

    ax.set_title("Kmeans on Poincaré Ball Manifold")

    return plt
Exemplo n.º 4
0
    def test_hypersphere_kmeans_predict(self):
        gs.random.seed(1234)

        manifold = hypersphere.Hypersphere(2)
        metric = hypersphere.HypersphereMetric(2)

        x = manifold.random_von_mises_fisher(kappa=100, n_samples=200)

        kmeans = RiemannianKMeans(metric, 5, tol=1e-5)
        kmeans.fit(x, max_iter=100)
        result = kmeans.predict(x)

        centroids = kmeans.centroids
        expected = gs.array([int(metric.closest_neighbor_index(x_i, centroids))
                             for x_i in x])
        self.assertAllClose(expected, result)
Exemplo n.º 5
0
def kmean_hypersphere():
    """Run K-means on the sphere."""
    n_samples = 50
    dim = 2
    n_clusters = 2
    manifold = Hypersphere(dim)
    metric = manifold.metric

    # Generate data on north pole
    cluster_1 = manifold.random_von_mises_fisher(kappa=50, n_samples=n_samples)

    # Generate data on south pole
    cluster_2 = manifold.random_von_mises_fisher(kappa=50, n_samples=n_samples)
    cluster_2 = -cluster_2

    data = gs.concatenate((cluster_1, cluster_2), axis=0)

    kmeans = RiemannianKMeans(metric, n_clusters, tol=1e-3)
    kmeans.fit(data)
    labels = kmeans.predict(data)
    centroids = kmeans.centroids

    plt.figure(2)
    colors = ["red", "blue"]

    ax = visualization.plot(data, space="S2", marker=".", color="black")

    for i in range(n_clusters):
        if len(data[labels == i]) > 0:
            ax = visualization.plot(
                points=data[labels == i], ax=ax, space="S2", marker=".", color=colors[i]
            )

    ax = visualization.plot(
        centroids, ax=ax, space="S2", marker="*", s=200, color="green"
    )

    ax.set_title("Kmeans on the sphere")

    return plt
Exemplo n.º 6
0
def main():

    cluster_1 = gs.random.uniform(low=0.5, high=0.6, size=(20, 2))
    cluster_2 = gs.random.uniform(low=0, high=-0.2, size=(20, 2))

    ax = plt.gca()

    merged_clusters = gs.concatenate((cluster_1, cluster_2), axis=0)
    manifold = Hyperbolic(dimension=2, point_type='ball')
    metric = HyperbolicMetric(dimension=2, point_type='ball')

    visualization.plot(
        merged_clusters,
        ax=ax,
        space='H2_poincare_disk',
        marker='.',
        color='black',
        point_type=manifold.point_type)

    kmeans = RiemannianKMeans(
        riemannian_metric=metric,
        n_clusters=2,
        init='random',
    )

    centroids = kmeans.fit(X=merged_clusters, max_iter=1)

    labels = kmeans.predict(X=merged_clusters)

    visualization.plot(
        centroids,
        ax=ax,
        space='H2_poincare_disk',
        marker='.',
        color='red',
        point_type=manifold.point_type)

    print('Data_labels', labels)

    plt.show()
    def fit(self, data):
        """Fit a Gaussian mixture model (GMM) given the data.

        Alternates between Expectation and Maximization steps
        for some number of iterations.

        Parameters
        ----------
        data : array-like, shape=[n_samples, n_features]
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        Returns
        -------
        self : object
            Return the components of the computed
            Gaussian mixture model: means, variances and mixture_coefficients.
        """
        self._dimension = data.shape[-1]
        if self.initialisation_method == 'kmeans':
            kmeans = RiemannianKMeans(metric=self.metric,
                                      n_clusters=self.n_gaussians,
                                      init='random',
                                      mean_method='batch',
                                      lr=self.lr_mean)

            centroids = kmeans.fit(X=data)
            labels = kmeans.predict(X=data)

            self.means = centroids
            self.variances = gs.zeros(self.n_gaussians)

            labeled_data = gs.vstack([labels, gs.transpose(data)])
            labeled_data = gs.transpose(labeled_data)
            for label, centroid in enumerate(centroids):
                label_mask = gs.where(labeled_data[:, 0] == label)
                grouped_by_label = labeled_data[label_mask][:, 1:]
                v = variance(grouped_by_label, centroid, self.metric)
                if grouped_by_label.shape[0] == 1:
                    v += MIN_VAR_INIT
                self.variances[label] = v
        else:
            self.means = (gs.random.rand(self.n_gaussians, self._dimension) -
                          0.5) / self._dimension
            self.variances = gs.random.rand(self.n_gaussians) / 10 + 0.8

        self.mixture_coefficients = \
            gs.ones(self.n_gaussians) / self.n_gaussians
        posterior_probabilities = gs.ones((data.shape[0], self.means.shape[0]))

        self.variances_range,\
            self.normalization_factor_var, \
            self.phi_inv_var =\
            self.normalization_factor_init(
                gs.arange(
                    ZETA_LOWER_BOUND, ZETA_UPPER_BOUND, ZETA_STEP))

        for epoch in range(self.max_iter):
            old_posterior_probabilities = posterior_probabilities

            posterior_probabilities = self._expectation(data)

            condition = gs.mean(
                gs.abs(old_posterior_probabilities - posterior_probabilities))

            if condition < EM_CONV_RATE and epoch > MINIMUM_EPOCHS:
                logging.info('EM converged in %s iterations', epoch)
                return self.means, self.variances, self.mixture_coefficients

            self._maximization(data, posterior_probabilities)

        logging.info('WARNING: EM did not converge \n'
                     'Please increase MINIMUM_EPOCHS.')

        return self.means, self.variances, self.mixture_coefficients
def main():
    """Learning Poincaré graph embedding.

    Learns Poincaré Ball embedding by using Riemannian
    gradient descent algorithm. Then K-means is applied
    to learn labels of each data sample.
    """
    gs.random.seed(1234)

    karate_graph = load_karate_graph()

    hyperbolic_embedding = HyperbolicEmbedding()

    embeddings = hyperbolic_embedding.embed(karate_graph)

    colors = {1: 'b', 2: 'r'}
    group_1 = mpatches.Patch(color=colors[1], label='Group 1')
    group_2 = mpatches.Patch(color=colors[2], label='Group 2')

    circle = visualization.PoincareDisk(point_type='ball')

    _, ax = plt.subplots(figsize=(8, 8))
    ax.axes.xaxis.set_visible(False)
    ax.axes.yaxis.set_visible(False)
    circle.set_ax(ax)
    circle.draw(ax=ax)
    for i_embedding, embedding in enumerate(embeddings):
        x = embedding[0]
        y = embedding[1]
        pt_id = i_embedding
        plt.scatter(
            x, y,
            c=colors[karate_graph.labels[pt_id][0]],
            s=150
        )
        ax.annotate(pt_id, (x, y))

    plt.tick_params(
        which='both')
    plt.title('Poincare Ball Embedding of the Karate Club Network')
    plt.legend(handles=[group_1, group_2])
    plt.show()

    n_clusters = 2

    kmeans = RiemannianKMeans(
        riemannian_metric=hyperbolic_embedding.manifold.metric,
        n_clusters=n_clusters,
        init='random',
        mean_method='frechet-poincare-ball')

    centroids = kmeans.fit(X=embeddings, max_iter=100)
    labels = kmeans.predict(X=embeddings)

    colors = ['g', 'c', 'm']
    circle = visualization.PoincareDisk(point_type='ball')
    _, ax2 = plt.subplots(figsize=(8, 8))
    circle.set_ax(ax2)
    circle.draw(ax=ax2)
    ax2.axes.xaxis.set_visible(False)
    ax2.axes.yaxis.set_visible(False)
    group_1_predicted = mpatches.Patch(
        color=colors[0], label='Predicted Group 1')
    group_2_predicted = mpatches.Patch(
        color=colors[1], label='Predicted Group 2')
    group_centroids = mpatches.Patch(
        color=colors[2], label='Cluster centroids')

    for _ in range(n_clusters):
        for i_embedding, embedding in enumerate(embeddings):
            x = embedding[0]
            y = embedding[1]
            pt_id = i_embedding
            if labels[i_embedding] == 0:
                color = colors[0]
            else:
                color = colors[1]
            plt.scatter(
                x, y,
                c=color,
                s=150
            )
            ax2.annotate(pt_id, (x, y))

    for _, centroid in enumerate(centroids):
        x = centroid[0]
        y = centroid[1]
        plt.scatter(
            x, y,
            c=colors[2],
            marker='*',
            s=150,
        )

    plt.title('K-means applied to Karate club embedding')
    plt.legend(handles=[group_1_predicted, group_2_predicted, group_centroids])
    plt.show()