def regularize(self, point):
        """
        In 3D, regularize the norm of the rotation vector,
        to be between 0 and pi, following the axis-angle
        representation's convention.

        If the angle angle is between pi and 2pi,
        the function computes its complementary in 2pi and
        inverts the direction of the rotation axis.
        """
        point = gs.to_ndarray(point, to_ndim=2)
        assert self.belongs(point)
        n_points, vec_dim = point.shape

        if vec_dim == 3:
            angle = gs.linalg.norm(point, axis=1)
            regularized_point = point.astype('float64')
            mask_not_0 = angle != 0

            k = gs.floor(angle / (2 * gs.pi) + .5)
            norms_ratio = gs.zeros_like(angle).astype('float64')
            norms_ratio[mask_not_0] = (
                1. - 2. * gs.pi * k[mask_not_0] / angle[mask_not_0])
            norms_ratio[angle == 0] = 1
            for i in range(n_points):
                regularized_point[i, :] = norms_ratio[i] * point[i]
        else:
            # TODO(nina): regularization needed in nD?
            regularized_point = point

        assert regularized_point.ndim == 2
        return regularized_point
    def regularize(self, rot_vec):
        """
        In 3D, regularize the norm of the rotation vector,
        to be between 0 and pi, following the axis-angle
        representation's convention.

        If the angle angle is between pi and 2pi,
        the function computes its complementary in 2pi and
        inverts the direction of the rotation axis.

        :param rot_vec: 3d vector
        :returns self.regularized_rot_vec: 3d vector with: 0 < norm < pi
        """
        rot_vec = gs.to_ndarray(rot_vec, to_ndim=2)
        assert self.belongs(rot_vec)
        n_rot_vecs, vec_dim = rot_vec.shape

        if vec_dim == 3:
            angle = gs.linalg.norm(rot_vec, axis=1)
            regularized_rot_vec = rot_vec.astype('float64')
            mask_not_0 = angle != 0

            k = gs.floor(angle / (2 * gs.pi) + .5)
            norms_ratio = gs.zeros_like(angle).astype('float64')
            norms_ratio[mask_not_0] = (
                1. - 2. * gs.pi * k[mask_not_0] / angle[mask_not_0])
            norms_ratio[angle == 0] = 1
            for i in range(n_rot_vecs):
                regularized_rot_vec[i, :] = norms_ratio[i] * rot_vec[i]
        else:
            # TODO(nina): regularization needed in nD?
            regularized_rot_vec = rot_vec

        assert regularized_rot_vec.ndim == 2
        return regularized_rot_vec
示例#3
0
    def regularize(self, point):
        """Regularize a point to be in accordance with convention.

        In 3D, regularize the norm of the rotation vector,
        to be between 0 and pi, following the axis-angle
        representation's convention.

        If the angle angle is between pi and 2pi,
        the function computes its complementary in 2pi and
        inverts the direction of the rotation axis.

        Parameters
        ----------
        point : array-like, shape=[...,3]

        Returns
        -------
        regularized_point : array-like, shape=[..., 3]
        """
        regularized_point = point
        angle = gs.linalg.norm(regularized_point, axis=-1)

        mask_0 = gs.isclose(angle, 0.)
        mask_not_0 = ~mask_0
        mask_pi = gs.isclose(angle, gs.pi)

        # This avoids division by 0.
        mask_0_float = gs.cast(mask_0, gs.float32) + self.epsilon
        mask_not_0_float = (
            gs.cast(mask_not_0, gs.float32)
            + self.epsilon)
        mask_pi_float = gs.cast(mask_pi, gs.float32) + self.epsilon

        k = gs.floor(angle / (2 * gs.pi) + .5)
        angle += mask_0_float

        norms_ratio = gs.zeros_like(angle)
        norms_ratio += mask_not_0_float * (
            1. - 2. * gs.pi * k / angle)
        norms_ratio += mask_0_float
        norms_ratio += mask_pi_float * (
            gs.pi / angle
            - (1. - 2. * gs.pi * k / angle))

        regularized_point = gs.einsum(
            '...,...i->...i', norms_ratio, regularized_point)

        return regularized_point
示例#4
0
    def regularize(self, point, point_type=None):
        """
        In 3D, regularize the norm of the rotation vector,
        to be between 0 and pi, following the axis-angle
        representation's convention.

        If the angle angle is between pi and 2pi,
        the function computes its complementary in 2pi and
        inverts the direction of the rotation axis.
        """
        if point_type is None:
            point_type = self.default_point_type

        if point_type == 'vector':
            point = gs.to_ndarray(point, to_ndim=2)
            assert self.belongs(point, point_type)
            n_points, _ = point.shape

            regularized_point = gs.copy(point)
            if self.n == 3:
                angle = gs.linalg.norm(regularized_point, axis=1)
                mask_0 = gs.isclose(angle, 0)
                mask_not_0 = ~mask_0

                mask_pi = gs.isclose(angle, gs.pi)

                k = gs.floor(angle / (2 * gs.pi) + .5)
                norms_ratio = gs.zeros_like(angle)
                norms_ratio[mask_not_0] = (
                    1. - 2. * gs.pi * k[mask_not_0] / angle[mask_not_0])
                norms_ratio[mask_0] = 1
                norms_ratio[mask_pi] = gs.pi / angle[mask_pi]
                for i in range(n_points):
                    regularized_point[i, :] = (norms_ratio[i] *
                                               regularized_point[i, :])
            else:
                # TODO(nina): regularization needed in nD?
                regularized_point = gs.copy(point)

            assert gs.ndim(regularized_point) == 2

        elif point_type == 'matrix':
            point = gs.to_ndarray(point, to_ndim=3)
            # TODO(nina): regularization for matrices?
            regularized_point = gs.copy(point)

        return regularized_point
示例#5
0
def online_kmeans(X,
                  metric,
                  n_clusters,
                  n_repetitions=20,
                  atol=1e-5,
                  max_iter=5e4):
    """Perform online K-means clustering.

    Perform online version of k-means algorithm on data contained in X.
    The data points are treated sequentially and the cluster centers are
    updated one at a time. This version of k-means avoids computing the
    mean of each cluster at each iteration and is therefore less
    computationally intensive than the offline version.

    In the setting of quantization of probability distributions, this
    algorithm is also known as Competitive Learning Riemannian Quantization.
    It computes the closest approximation of the empirical distribution of
    data by a discrete distribution supported by a smaller number of points
    with respect to the Wasserstein distance. This smaller number of points
    is n_clusters.

    Parameters
    ----------
    X : array-like, shape=[..., n_features]
        Input data. It is treated sequentially by the algorithm, i.e.
        one datum is chosen randomly at each iteration.
    metric : object
        Metric of the space in which the data lives. At each iteration,
        one of the cluster centers is moved in the direction of the new
        datum, according the exponential map of the underlying space, which
        is a method of metric.
    n_clusters : int
        Number of clusters of the k-means clustering, or number of desired
        atoms of the quantized distribution.
    n_repetitions : int, default=20
        The cluster centers are updated using decreasing step sizes, each
        of which stays constant for n_repetitions iterations to allow a better
        exploration of the data points.
    max_iter : int, default=5e4
        Maximum number of iterations. If it is reached, the
        quantization may be inacurate.

    Returns
    -------
    cluster_centers : array, shape=[n_clusters, n_features]
        Coordinates of cluster centers.
    labels : array, shape=[n_samples]
        Cluster labels for each point.
    """
    n_samples = X.shape[0]

    random_indices = gs.random.randint(low=0,
                                       high=n_samples,
                                       size=(n_clusters, ))
    cluster_centers = gs.get_slice(X, gs.cast(random_indices, gs.int32))

    gap = 1.0
    iteration = 0

    while iteration < max_iter:
        iteration += 1
        step_size = gs.floor(gs.array(iteration / n_repetitions)) + 1

        random_index = gs.random.randint(low=0, high=n_samples, size=(1, ))
        point = gs.get_slice(X, gs.cast(random_index, gs.int32))

        index_to_update = metric.closest_neighbor_index(point, cluster_centers)
        center_to_update = gs.copy(
            gs.get_slice(cluster_centers, index_to_update))

        tangent_vec_update = metric.log(
            point=point, base_point=center_to_update) / (step_size + 1)
        new_center = metric.exp(tangent_vec=tangent_vec_update,
                                base_point=center_to_update)
        gap = metric.dist(center_to_update, new_center)
        if gap == 0 and iteration == 1:
            gap = gs.array(1.0)

        cluster_centers[index_to_update, :] = new_center

        if gs.isclose(gap, 0, atol=atol):
            break

    if iteration == max_iter - 1:
        logging.warning("Maximum number of iterations {} reached. The"
                        "clustering may be inaccurate".format(max_iter))

    labels = gs.zeros(n_samples)
    for i in range(n_samples):
        labels[i] = int(metric.closest_neighbor_index(X[i], cluster_centers))

    return cluster_centers, labels
示例#6
0
    def optimal_quantization(self,
                             points,
                             n_centers=N_CENTERS,
                             n_repetitions=N_REPETITIONS,
                             tolerance=TOLERANCE,
                             n_max_iterations=N_MAX_ITERATIONS):
        """
        Compute the optimal approximation of points by a smaller number
        of weighted centers using the Competitive Learning Riemannian
        Quantization algorithm. The centers are updated using decreasing
        step sizes, each of which stays constant for n_repetitions iterations
        to allow a better exploration of the data points.
        See https://arxiv.org/abs/1806.07605.
        Return :
            - n_centers centers
            - n_centers weights between 0 and 1
            - a dictionary containing the clusters, where each key is the
              cluster index, and its value is the lists of points belonging
              to the cluster
            - the number of steps needed to converge.
        """
        n_points = points.shape[0]
        dimension = points.shape[-1]

        random_indices = gs.random.randint(low=0,
                                           high=n_points,
                                           size=(n_centers, ))
        centers = points[gs.ix_(random_indices, gs.arange(dimension))]

        gap = 1.0
        iteration = 0

        while iteration < n_max_iterations:
            iteration += 1
            step_size = gs.floor(iteration / n_repetitions) + 1

            random_index = gs.random.randint(low=0, high=n_points, size=(1, ))
            point = points[gs.ix_(random_index, gs.arange(dimension))]

            index_to_update = self.closest_neighbor_index(point, centers)
            center_to_update = centers[index_to_update, :]

            tangent_vec_update = self.log(
                point=point, base_point=center_to_update) / (step_size + 1)
            new_center = self.exp(tangent_vec=tangent_vec_update,
                                  base_point=center_to_update)
            gap = self.dist(center_to_update, new_center)
            gap = (gap != 0) * gap + (gap == 0)

            centers[index_to_update, :] = new_center

            if gs.isclose(gap, 0, atol=tolerance):
                break

        if iteration == n_max_iterations - 1:
            print('Maximum number of iterations {} reached. The'
                  'quantization may be inaccurate'.format(n_max_iterations))

        clusters = dict()
        weights = gs.zeros((n_centers, ))
        index_list = list()

        for point in points:
            index = self.closest_neighbor_index(point, centers)
            if index not in index_list:
                clusters[index] = list()
                index_list.append(index)
            clusters[index].append(point)
            weights[index] += 1

        weights = weights / n_points

        return centers, weights, clusters, iteration