示例#1
0
    def parallel_transport(self,
                           tangent_vector_a,
                           tangent_vector_b,
                           base_point,
                           n_points=1):
        """
        Parallel transport of tangent vector a integrating the connection
        along the (affine connection) geodesic starting at the initial point
        base_point with initial tangent vector the tangent vector b.

        Returns a tangent vector at the point
        exp_(base_point)(tangent_vector_b).
        """
        current_point = gs.copy(base_point)
        geodesic_tangent_vector = 1. / n_points * tangent_vector_b
        transported_tangent_vector = gs.copy(tangent_vector_a)
        for i_point in range(1, n_points):
            transported_tangent_vector = self.pole_ladder_transport(
                tangent_vector_a=transported_tangent_vector,
                tangent_vector_b=geodesic_tangent_vector,
                base_point=current_point)
            current_point = self.exp(base_point=current_point,
                                     tangent_vector=geodesic_tangent_vector)

            frac_tangent_vector_b = (i_point + 1) / n_points * tangent_vector_b
            next_point = self.exp(base_point=base_point,
                                  tangent_vector=frac_tangent_vector_b)
            geodesic_tangent_vector = self.log(base_point=current_point,
                                               point=next_point)

        return transported_tangent_vector
示例#2
0
    def pole_ladder_parallel_transport(self,
                                       tangent_vec_a,
                                       tangent_vec_b,
                                       base_point,
                                       n_steps=1):
        """Approximate parallel transport using the pole ladder scheme.

        Approximate Parallel transport using the pole ladder scheme [LP2013b]_
        [GJSP2019]_. `tangent_vec_a` is transported along the geodesic starting
        at the base_point with initial tangent vector `tangent_vec_b`.

        Returns a tangent vector at the point
        exp_(`base_point`)(`tangent_vec_b`).

        Parameters
        ----------
        tangent_vec_a : array-like, shape=[n_samples, dimension]
                                   or shape=[1, dimension]
        tangent_vec_b : array-like, shape=[n_samples, dimension]
                                   or shape=[1, dimension]
        base_point : array-like, shape=[n_samples, dimension]
                                or shape=[1, dimension]
        n_steps: int
            the number of pole ladder steps

        Returns
        -------
        transported_tangent_vector : array-like, shape=[n_samples, dimension]
                                                or shape=[1, dimension]

        References
        ----------
        .. [LP2013b] Marco Lorenzi, Xavier Pennec. Efficient Parallel Transpor
          of Deformations in Time Series of Images: from Schild's to
          Pole Ladder.Journal of Mathematical Imaging and Vision, Springer
          Verlag, 2013, 50 (1-2), pp.5-17. ⟨10.1007/s10851-013-0470-3⟩

        .. [GJSP2019] N. Guigui, Shuman Jia, Maxime Sermesant, Xavier Pennec.
          Symmetric Algorithmic Components for Shape Analysis with
          Diffeomorphisms. GSI 2019, Aug 2019, Toulouse, France. pp.10.
          ⟨hal-02148832⟩
        """
        current_point = gs.copy(base_point)
        transported_tangent_vector = gs.copy(tangent_vec_a)
        base_shoot = self.exp(base_point=current_point,
                              tangent_vec=transported_tangent_vector)
        for i_point in range(0, n_steps):
            frac_tangent_vector_b = (i_point + 1) / n_steps * tangent_vec_b
            next_point = self.exp(base_point=base_point,
                                  tangent_vec=frac_tangent_vector_b)
            transported_tangent_vector, base_shoot = self.pole_ladder_step(
                base_point=current_point,
                next_point=next_point,
                base_shoot=base_shoot)
            current_point = next_point

        return transported_tangent_vector
示例#3
0
    def fit(self, X, max_iter=100):
        """Predict for each data point the closest center in terms of
            riemannian_metric distance

        Parameters
        ----------
        X : array-like, shape=[n_samples, n_features]
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        max_iter : Maximum number of iterations

        Returns
        -------
        self : object
            Return centroids array
        """
        n_samples = X.shape[0]
        belongs = gs.zeros(n_samples)
        self.centroids = [
            gs.expand_dims(X[randint(0, n_samples - 1)], 0)
            for i in range(self.n_clusters)
        ]
        self.centroids = gs.concatenate(self.centroids)
        index = 0
        while index < max_iter:
            index += 1

            dists = [
                gs.to_ndarray(
                    self.riemannian_metric.dist(self.centroids[i], X), 2, 1)
                for i in range(self.n_clusters)
            ]
            dists = gs.hstack(dists)
            belongs = gs.argmin(dists, 1)
            old_centroids = gs.copy(self.centroids)
            for i in range(self.n_clusters):
                fold = gs.squeeze(X[belongs == i])
                if len(fold) > 0:
                    self.centroids[i] = self.riemannian_metric.mean(fold)

                else:
                    self.centroids[i] = X[randint(0, n_samples - 1)]

            centroids_distances = self.riemannian_metric.dist(
                old_centroids, self.centroids)

            if gs.mean(centroids_distances) < self.tol:
                if self.verbose > 0:
                    print("Convergence Reached after ", index, " iterations")

                return gs.copy(self.centroids)

        return gs.copy(self.centroids)
示例#4
0
    def regularize(self, point, point_type=None):
        """
        Regularize a point to the canonical representation
        chosen for SE(n).
        """
        if point_type is None:
            point_type = self.default_point_type

        if point_type == 'vector':
            point = gs.to_ndarray(point, to_ndim=2)
            assert self.belongs(point, point_type=point_type)

            rotations = self.rotations
            dim_rotations = rotations.dimension

            regularized_point = gs.zeros_like(point)
            rot_vec = point[:, :dim_rotations]
            regularized_point[:, :dim_rotations] = rotations.regularize(
                rot_vec, point_type=point_type)
            regularized_point[:, dim_rotations:] = point[:, dim_rotations:]

        elif point_type == 'matrix':
            point = gs.to_ndarray(point, to_ndim=3)
            # TODO(nina): regularization for matrices?
            regularized_point = gs.copy(point)

        return regularized_point
示例#5
0
def _circle_mean(points):
    """Determine the mean on a circle.

    Data are expected in radians in the range [-pi, pi). The mean is returned
    in the same range. If the mean is unique, this algorithm is guaranteed to
    find it. It is not vulnerable to local minima of the Frechet function. If
    the mean is not unique, the algorithm only returns one of the means. Which
    mean is returned depends on numerical rounding errors.

    Reference
    ---------
    ..[HH15]     Hotz, T. and S. F. Huckemann (2015), "Intrinsic means on the circle:
                 Uniqueness, locus and asymptotics", Annals of the Institute of
                 Statistical Mathematics 67 (1), 177–193.
                 https://arxiv.org/abs/1108.2141
    """
    if points.ndim > 1:
        points_ = Hypersphere.extrinsic_to_angle(points)
    else:
        points_ = gs.copy(points)
    sample_size = points_.shape[0]
    mean0 = gs.mean(points_)
    var0 = gs.sum((points_ - mean0) ** 2)
    sorted_points = gs.sort(points_)
    means = _circle_variances(mean0, var0, sample_size, sorted_points)
    return means[gs.argmin(means[:, 1]), 0]
示例#6
0
    def _procrustes_preprocessing(p, matrix_v, matrix_m, matrix_n):
        """Procrustes preprocessing.

        Parameters
        ----------
        matrix_v : array-like
        matrix_m : array-like
        matrix_n : array-like

        Returns
        -------
        matrix_v : array-like
        """
        [matrix_d, _, matrix_r] = gs.linalg.svd(matrix_v[..., p:, p:])
        matrix_v_final = gs.copy(matrix_v)
        for i in range(1, p + 1):
            matrix_rd = Matrices.mul(matrix_r, Matrices.transpose(matrix_d))
            sub_matrix_v = gs.matmul(matrix_v[..., :, p:], matrix_rd)
            matrix_v_final = gs.concatenate(
                [gs.concatenate([matrix_m, matrix_n], axis=-2), sub_matrix_v],
                axis=-1)
            det = gs.linalg.det(matrix_v_final)
            if gs.all(det > 0):
                break
            ones = gs.ones(p)
            reflection_vec = gs.concatenate(
                [ones[:-i], gs.array([-1.0] * i)], axis=0)
            mask = gs.cast(det < 0, matrix_v.dtype)
            sign = mask[..., None] * reflection_vec + (1.0 - mask)[...,
                                                                   None] * ones
            matrix_d = gs.einsum("...ij,...i->...ij",
                                 Matrices.transpose(matrix_d), sign)
        return matrix_v_final
    def fit(self, X, y, weights=None, compute_training_score=False):
        """Estimate the parameters of the geodesic regression.

        Estimate the intercept and the coefficient defining the
        geodesic regression model.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape=[...,}]
            Training input samples.
        y : array-like, shape=[..., {dim, [n,n]}]
            Training target values.
        weights : array-like, shape=[...,]
            Weights associated to the points.
            Optional, default: None.
        compute_training_score : bool
            Whether to compute R^2.
            Optional, default: False.

        Returns
        -------
        self : object
            Returns self.
        """
        times = gs.copy(X)
        if self.center_X:
            self.mean_ = gs.mean(X)
            times -= self.mean_

        if self.method == "extrinsic":
            return self._fit_extrinsic(times, y, weights,
                                       compute_training_score)
        if self.method == "riemannian":
            return self._fit_riemannian(times, y, weights,
                                        compute_training_score)
示例#8
0
    def regularize(self, point, point_type=None):
        """
        Regularize a point to the canonical representation
        chosen for SE(n).
        """
        if point_type is None:
            point_type = self.default_point_type

        if point_type == 'vector':
            point = gs.to_ndarray(point, to_ndim=2)

            rotations = self.rotations
            dim_rotations = rotations.dimension

            rot_vec = point[:, :dim_rotations]
            regularized_rot_vec = rotations.regularize(rot_vec,
                                                       point_type=point_type)

            translation = point[:, dim_rotations:]

            regularized_point = gs.concatenate(
                [regularized_rot_vec, translation], axis=1)

        elif point_type == 'matrix':
            point = gs.to_ndarray(point, to_ndim=3)
            regularized_point = gs.copy(point)

        return regularized_point
示例#9
0
    def regularize(self, point, point_type=None):
        """
        In 3D, regularize the norm of the rotation vector,
        to be between 0 and pi, following the axis-angle
        representation's convention.

        If the angle angle is between pi and 2pi,
        the function computes its complementary in 2pi and
        inverts the direction of the rotation axis.
        """
        if point_type is None:
            point_type = self.default_point_type

        if point_type == 'vector':
            point = gs.to_ndarray(point, to_ndim=2)
            assert self.belongs(point, point_type)
            n_points, _ = point.shape

            regularized_point = gs.copy(point)
            if self.n == 3:
                angle = gs.linalg.norm(regularized_point, axis=1)
                mask_0 = gs.isclose(angle, 0)
                mask_not_0 = ~mask_0

                mask_pi = gs.isclose(angle, gs.pi)

                k = gs.floor(angle / (2 * gs.pi) + .5)
                norms_ratio = gs.zeros_like(angle)
                norms_ratio[mask_not_0] = (
                    1. - 2. * gs.pi * k[mask_not_0] / angle[mask_not_0])
                norms_ratio[mask_0] = 1
                norms_ratio[mask_pi] = gs.pi / angle[mask_pi]
                for i in range(n_points):
                    regularized_point[i, :] = (norms_ratio[i] *
                                               regularized_point[i, :])
            else:
                # TODO(nina): regularization needed in nD?
                regularized_point = gs.copy(point)

            assert gs.ndim(regularized_point) == 2

        elif point_type == 'matrix':
            point = gs.to_ndarray(point, to_ndim=3)
            # TODO(nina): regularization for matrices?
            regularized_point = gs.copy(point)

        return regularized_point
示例#10
0
    def pole_ladder_parallel_transport(self,
                                       tangent_vec_a,
                                       tangent_vec_b,
                                       base_point,
                                       n_steps=1):
        """Approximate parallel transport using the pole ladder scheme.

        Approximation of Parallel transport using the pole ladder scheme of
        tangent vector a along the geodesic starting at the initial point
        base_point with initial tangent vector the tangent vector b.

        Returns a tangent vector at the point
        exp_(base_point)(tangent_vector_b).

        Parameters
        ----------
        tangent_vec_a: array-like, shape=[n_samples, dimension]
                                   or shape=[1, dimension]

        tangent_vec_b: array-like, shape=[n_samples, dimension]
                                   or shape=[1, dimension]

        base_point: array-like, shape=[n_samples, dimension]
                                or shape=[1, dimension]

        n_steps: int, the number of pole ladder steps

        Returns
        -------
        transported_tangent_vector: array-like, shape=[n_samples, dimension]
                                                or shape=[1, dimension]
        """
        current_point = gs.copy(base_point)
        transported_tangent_vector = gs.copy(tangent_vec_a)
        base_shoot = self.exp(base_point=current_point,
                              tangent_vec=transported_tangent_vector)
        for i_point in range(0, n_steps):
            frac_tangent_vector_b = (i_point + 1) / n_steps * tangent_vec_b
            next_point = self.exp(base_point=base_point,
                                  tangent_vec=frac_tangent_vector_b)
            transported_tangent_vector, base_shoot = self.pole_ladder_step(
                base_point=current_point,
                next_point=next_point,
                base_shoot=base_shoot)
            current_point = next_point

        return transported_tangent_vector
示例#11
0
    def set_to_array(self, points):
        r"""Sample in Graph Space.

        Parameters
        ----------
        points : list of Graph or array-like, shape=[..., n, n].
                Points to be turned into an array
        Returns
        -------
        graph_array : array-like, shape=[..., nodes, nodes]
                An array containing all the Graphs.
        """
        return gs.copy(points)
示例#12
0
    def fit(self, data, max_iter=100):
        """Provide clusters centroids and data labels.

        Labels data by minimizing the distance between data points
        and cluster centroids chosen from the data points.
        Minimization is performed by swapping the centroids and data points.

        Parameters
        ----------
        data : array-like, shape=[n_samples, dim]
            Training data, where n_samples is the number of samples and
            dim is the number of dimensions.
        max_iter : int
            Maximum number of iterations.
            Optional, default: 100.

        Returns
        -------
        self : array-like, shape=[n_clusters,]
            Centroids.
        """
        distances = self.metric.dist_pairwise(data)

        medoids_indices = self._initialize_medoids(distances)

        for iteration in range(max_iter):

            old_medoids_indices = gs.copy(medoids_indices)

            labels = gs.argmin(distances[medoids_indices, :], axis=0)

            self._update_medoid_indexes(distances, labels, medoids_indices)

            if gs.all(old_medoids_indices == medoids_indices):
                break
            if iteration == max_iter - 1:
                logging.warning('Maximum number of iteration reached before '
                                'convergence. Consider increasing max_iter to '
                                'improve the fit.')

        self.cluster_centers_ = data[medoids_indices]
        self.labels_ = labels
        self.medoid_indices_ = medoids_indices

        return self.cluster_centers_
示例#13
0
    def predict(self, X, y=None):
        """Predict the manifold value for each input.

        Parameters
        ----------
        X : array-like, shape=[...,
            Input data.

        Returns
        -------
        self : array-like, shape=[...,]
            Array of predicted cluster indices for each sample.
        """
        times = gs.copy(X)

        if self.center_X:
            times = times - self.mean_

        if self.coef_ is None:
            raise RuntimeError("Fit method must be called before predict.")

        return self._model(times, self.coef_, self.intercept_)
示例#14
0
    def regularize(self, point):
        """Regularize a point to the default representation for SE(n).

        Parameters
        ----------
        point : array-like, shape=[..., dim]
            Point to regularize.

        Returns
        -------
        point : array-like, shape=[..., dim]
            Regularized point.
        """
        rotations = self.rotations
        dim_rotations = rotations.dim

        regularized_point = gs.copy(point)
        rot_vec = regularized_point[..., :dim_rotations]
        regularized_rot_vec = rotations.regularize(rot_vec)

        translation = regularized_point[..., dim_rotations:]

        return gs.concatenate([regularized_rot_vec, translation], axis=-1)
示例#15
0
    def regularize(self, point, point_type=None):
        """Regularize a point to the default representation for SE(n).

        Parameters
        ----------
        point : array-like, shape=[n_samples, {dimension, [n + 1, n + 1]}]
            the point which should be regularized
        point_type : str, {'vector', 'matrix'}, optional
            default: self.default_point_type

        Returns
        -------
        point : array-like, shape=[n_samples, {dimension, [n + 1, n + 1]}]
        """
        if point_type is None:
            point_type = self.default_point_type

        if point_type == 'vector':
            point = gs.to_ndarray(point, to_ndim=2)

            rotations = self.rotations
            dim_rotations = rotations.dimension

            rot_vec = point[:, :dim_rotations]
            regularized_rot_vec = rotations.regularize(rot_vec,
                                                       point_type=point_type)

            translation = point[:, dim_rotations:]

            regularized_point = gs.concatenate(
                [regularized_rot_vec, translation], axis=1)

        elif point_type == 'matrix':
            point = gs.to_ndarray(point, to_ndim=3)
            regularized_point = gs.copy(point)

        return regularized_point
示例#16
0
    def fit(self, X, max_iter=100):
        """Provide clusters centroids and data labels.

        Alternate between computing the mean of each cluster
        and labelling data according to the new positions of the centroids.

        Parameters
        ----------
        X : array-like, shape=[..., n_features]
            Training data, where n_samples is the number of samples and
            n_features is the number of features.
        max_iter : int
            Maximum number of iterations.
            Optional, default: 100.

        Returns
        -------
        self : array-like, shape=[n_clusters,]
            Centroids.
        """
        n_samples = X.shape[0]
        self.centroids = [gs.expand_dims(X[randint(0, n_samples - 1)], 0)
                          for i in range(self.n_clusters)]
        self.centroids = gs.concatenate(self.centroids, axis=0)
        index = 0
        while index < max_iter:
            index += 1

            dists = [gs.to_ndarray(
                     self.metric.dist(self.centroids[i], X), 2, 1)**2
                     for i in range(self.n_clusters)]
            dists = gs.hstack(dists)

            if self.fuzzy:
                dists[np.where(dists == 0)] = 0.00001
                weights = 1 / (dists * np.sum(1 / dists, axis=1)[:, None])
            else:
                belongs = gs.argmin(dists, 1)

            old_centroids = gs.copy(self.centroids)
            for i in range(self.n_clusters):

                if self.fuzzy:

                    mean = FrechetMean(
                        metric=self.metric,
                        method=self.mean_method,
                        max_iter=150,
                        lr=self.lr,
                        point_type=self.point_type,
                        )

                    mean.fit(X, weights=weights[:, i])
                    self.centroids[i] = mean.estimate_

                else:
                    fold = gs.squeeze(X[belongs == i])

                    if len(fold) > 0:

                        mean = FrechetMean(
                            metric=self.metric,
                            method=self.mean_method,
                            max_iter=150,
                            lr=self.lr,
                            point_type=self.point_type)
                        mean.fit(fold)

                        self.centroids[i] = mean.estimate_
                    else:
                        self.centroids[i] = X[randint(0, n_samples - 1)]

            centroids_distances = self.metric.dist(
                old_centroids, self.centroids)

            if gs.mean(centroids_distances) < self.tol:
                if self.verbose > 0:
                    logging.info('Convergence reached after {} '
                                 'iterations'.format(index))

                if self.n_clusters == 1:
                    self.centroids = gs.squeeze(self.centroids, axis=0)

                return gs.copy(self.centroids)

        if index == max_iter:
            logging.warning('K-means maximum number of iterations {} reached. '
                            'The mean may be inaccurate'.format(max_iter))

        if self.n_clusters == 1:
            self.centroids = gs.squeeze(self.centroids, axis=0)
        return gs.copy(self.centroids)
示例#17
0
def online_kmeans(X,
                  metric,
                  n_clusters,
                  n_repetitions=20,
                  atol=1e-5,
                  max_iter=5e4):
    """Perform online K-means clustering.

    Perform online version of k-means algorithm on data contained in X.
    The data points are treated sequentially and the cluster centers are
    updated one at a time. This version of k-means avoids computing the
    mean of each cluster at each iteration and is therefore less
    computationally intensive than the offline version.

    In the setting of quantization of probability distributions, this
    algorithm is also known as Competitive Learning Riemannian Quantization.
    It computes the closest approximation of the empirical distribution of
    data by a discrete distribution supported by a smaller number of points
    with respect to the Wasserstein distance. This smaller number of points
    is n_clusters.

    Parameters
    ----------
    X : array-like, shape=[..., n_features]
        Input data. It is treated sequentially by the algorithm, i.e.
        one datum is chosen randomly at each iteration.
    metric : object
        Metric of the space in which the data lives. At each iteration,
        one of the cluster centers is moved in the direction of the new
        datum, according the exponential map of the underlying space, which
        is a method of metric.
    n_clusters : int
        Number of clusters of the k-means clustering, or number of desired
        atoms of the quantized distribution.
    n_repetitions : int, default=20
        The cluster centers are updated using decreasing step sizes, each
        of which stays constant for n_repetitions iterations to allow a better
        exploration of the data points.
    max_iter : int, default=5e4
        Maximum number of iterations. If it is reached, the
        quantization may be inacurate.

    Returns
    -------
    cluster_centers : array, shape=[n_clusters, n_features]
        Coordinates of cluster centers.
    labels : array, shape=[n_samples]
        Cluster labels for each point.
    """
    n_samples = X.shape[0]

    random_indices = gs.random.randint(low=0,
                                       high=n_samples,
                                       size=(n_clusters, ))
    cluster_centers = gs.get_slice(X, gs.cast(random_indices, gs.int32))

    gap = 1.0
    iteration = 0

    while iteration < max_iter:
        iteration += 1
        step_size = gs.floor(gs.array(iteration / n_repetitions)) + 1

        random_index = gs.random.randint(low=0, high=n_samples, size=(1, ))
        point = gs.get_slice(X, gs.cast(random_index, gs.int32))

        index_to_update = metric.closest_neighbor_index(point, cluster_centers)
        center_to_update = gs.copy(
            gs.get_slice(cluster_centers, index_to_update))

        tangent_vec_update = metric.log(
            point=point, base_point=center_to_update) / (step_size + 1)
        new_center = metric.exp(tangent_vec=tangent_vec_update,
                                base_point=center_to_update)
        gap = metric.dist(center_to_update, new_center)
        if gap == 0 and iteration == 1:
            gap = gs.array(1.0)

        cluster_centers[index_to_update, :] = new_center

        if gs.isclose(gap, 0, atol=atol):
            break

    if iteration == max_iter - 1:
        logging.warning("Maximum number of iterations {} reached. The"
                        "clustering may be inaccurate".format(max_iter))

    labels = gs.zeros(n_samples)
    for i in range(n_samples):
        labels[i] = int(metric.closest_neighbor_index(X[i], cluster_centers))

    return cluster_centers, labels
示例#18
0
文件: kmeans.py 项目: nkoep/geomstats
    def fit(self, X, max_iter=100):
        """Provide clusters centroids and data labels.

        Alternate between computing the mean of each cluster
        and labelling data according to the new positions of the centroids.

        Parameters
        ----------
        X : array-like, shape=[n_samples, n_features]
            Training data, where n_samples is the number of samples and
            n_features is the number of features.

        max_iter : int
            Maximum number of iterations

        Returns
        -------
        self : array-like, shape=[n_clusters,]
            centroids array
        """
        n_samples = X.shape[0]
        belongs = gs.zeros(n_samples)
        self.centroids = [
            gs.expand_dims(X[randint(0, n_samples - 1)], 0)
            for i in range(self.n_clusters)
        ]
        self.centroids = gs.concatenate(self.centroids)
        index = 0
        while index < max_iter:
            index += 1

            dists = [
                gs.to_ndarray(
                    self.riemannian_metric.dist(self.centroids[i], X), 2, 1)
                for i in range(self.n_clusters)
            ]
            dists = gs.hstack(dists)
            belongs = gs.argmin(dists, 1)
            old_centroids = gs.copy(self.centroids)
            for i in range(self.n_clusters):
                fold = gs.squeeze(X[belongs == i])

                if len(fold) > 0:

                    mean = FrechetMean(metric=self.riemannian_metric,
                                       method=self.mean_method,
                                       max_iter=150)
                    mean.fit(fold)

                    self.centroids[i] = mean.estimate_
                else:
                    self.centroids[i] = X[randint(0, n_samples - 1)]

            centroids_distances = self.riemannian_metric.dist(
                old_centroids, self.centroids)

            if gs.mean(centroids_distances) < self.tol:
                if self.verbose > 0:
                    logging.info('Convergence reached after {} '
                                 'iterations'.format(index))

                return gs.copy(self.centroids)

        if index == max_iter:
            logging.warning('K-means maximum number of iterations {} reached. '
                            'The mean may be inaccurate'.format(max_iter))

        return gs.copy(self.centroids)
示例#19
0
    def _fit(self, X, base_point=None):
        """Fit the model by computing full SVD on X.

        Parameters
        ----------
        X : array-like, shape=[..., n_features]
            Training data, where n_samples is the number of samples
            and n_features is the number of features.
        y : Ignored (Compliance with scikit-learn interface)
        base_point : array-like, shape=[..., n_features]
            Point at which to perform the tangent PCA.
            Optional, default to Frechet mean if None.

        Returns
        -------
        U, S, V : array-like
            Matrices of the SVD decomposition
        """
        if base_point is None:
            mean = FrechetMean(metric=self.metric, point_type=self.point_type)
            mean.fit(X)
            base_point = mean.estimate_

        tangent_vecs = self.metric.log(X, base_point=base_point)

        if self.point_type == 'matrix':
            if Matrices.is_symmetric(tangent_vecs).all():
                X = SymmetricMatrices.to_vector(tangent_vecs)
            else:
                X = gs.reshape(tangent_vecs, (len(X), -1))
        else:
            X = tangent_vecs

        if self.n_components is None:
            n_components = min(X.shape)
        else:
            n_components = self.n_components
        n_samples, n_features = X.shape

        if n_components == 'mle':
            if n_samples < n_features:
                raise ValueError("n_components='mle' is only supported "
                                 "if n_samples >= n_features")
        elif not 0 <= n_components <= min(n_samples, n_features):
            raise ValueError("n_components=%r must be between 0 and "
                             "min(n_samples, n_features)=%r with "
                             "svd_solver='full'" %
                             (n_components, min(n_samples, n_features)))
        elif n_components >= 1:
            if not isinstance(n_components, numbers.Integral):
                raise ValueError("n_components=%r must be of type int "
                                 "when greater than or equal to 1, "
                                 "was of type=%r" %
                                 (n_components, type(n_components)))

        # Center data - the mean should be 0 if base_point is the Frechet mean
        self.mean_ = gs.mean(X, axis=0)
        X -= self.mean_

        U, S, V = gs.linalg.svd(X, full_matrices=False)
        # flip eigenvectors' sign to enforce deterministic output
        U, V = svd_flip(U, V)

        components_ = V

        # Get variance explained by singular values
        explained_variance_ = (S**2) / (n_samples - 1)
        total_var = explained_variance_.sum()
        explained_variance_ratio_ = explained_variance_ / total_var
        singular_values_ = gs.copy(S)  # Store the singular values.

        # Postprocess the number of components required
        if n_components == 'mle':
            n_components = \
                _infer_dimension_(explained_variance_, n_samples, n_features)
        elif 0 < n_components < 1.0:
            # number of components for which the cumulated explained
            # variance percentage is superior to the desired threshold
            ratio_cumsum = stable_cumsum(explained_variance_ratio_)
            n_components = gs.searchsorted(ratio_cumsum, n_components) + 1

        # Compute noise covariance using Probabilistic PCA model
        # The sigma2 maximum likelihood (cf. eq. 12.46)
        if n_components < min(n_features, n_samples):
            self.noise_variance_ = explained_variance_[n_components:].mean()
        else:
            self.noise_variance_ = 0.

        self.base_point_fit = base_point
        self.n_samples_, self.n_features_ = n_samples, n_features
        self.components_ = components_[:n_components]
        self.n_components_ = int(n_components)
        self.explained_variance_ = explained_variance_[:n_components]
        self.explained_variance_ratio_ = \
            explained_variance_ratio_[:n_components]
        self.singular_values_ = singular_values_[:n_components]

        return U, S, V
示例#20
0
 def to_array(self):
     """Return the hash of the instance."""
     return gs.copy(self.adj)
示例#21
0
    def pole_ladder_parallel_transport(self,
                                       tangent_vec_a,
                                       tangent_vec_b,
                                       base_point,
                                       n_steps=1,
                                       **single_step_kwargs):
        """Approximate parallel transport using the pole ladder scheme.

        Approximate Parallel transport using the pole ladder scheme [LP2013b]_
        [GJSP2019]_. `tangent_vec_a` is transported along the geodesic starting
        at the base_point with initial tangent vector `tangent_vec_b`.

        Parameters
        ----------
        tangent_vec_a : array-like, shape=[n_samples, dimension]
            Tangent vector at base point to transport.
        tangent_vec_b : array-like, shape=[n_samples, dimension]
            Tangent vector at base point, initial speed of the geodesic along
            which to transport.
        base_point : array-like, shape=[n_samples, dimension]
            Point on the manifold, initial position of the geodesic along
            which to transport.
        n_steps : int
            The number of pole ladder steps.
        **single_step_kwargs : keyword arguments for the step functions

        Returns
        -------
        ladder : dict of array-like and callable with following keys
            transported_tangent_vector : array-like, shape=[n_samples, dim]
                Approximation of the parallel transport of tangent vector a.
            trajectory : list of list of callable, len=n_steps
                List of lists containing the geodesics of the
                construction, only if `return_geodesics=True` in the step
                function. The geodesics are methods of the class connection.

        References
        ----------
        .. [LP2013b] Marco Lorenzi, Xavier Pennec. Efficient Parallel Transpor
          of Deformations in Time Series of Images: from Schild's to
          Pole Ladder.Journal of Mathematical Imaging and Vision, Springer
          Verlag, 2013, 50 (1-2), pp.5-17. ⟨10.1007/s10851-013-0470-3⟩

        .. [GJSP2019] N. Guigui, Shuman Jia, Maxime Sermesant, Xavier Pennec.
          Symmetric Algorithmic Components for Shape Analysis with
          Diffeomorphisms. GSI 2019, Aug 2019, Toulouse, France. pp.10.
          ⟨hal-02148832⟩
        """
        current_point = gs.copy(base_point)
        next_tangent_vec = gs.copy(tangent_vec_a)
        base_shoot = self.exp(base_point=current_point,
                              tangent_vec=next_tangent_vec)
        trajectory = []
        for i_point in range(0, n_steps):
            frac_tangent_vector_b = (i_point + 1) / n_steps * tangent_vec_b
            next_point = self.exp(base_point=base_point,
                                  tangent_vec=frac_tangent_vector_b)
            next_step = self._pole_ladder_step(base_point=current_point,
                                               next_point=next_point,
                                               base_shoot=base_shoot,
                                               **single_step_kwargs)
            current_point = next_point
            base_shoot = next_step['end_point']
            trajectory.append(next_step['geodesics'])

        return {
            'transported_tangent_vec': next_step['next_tangent_vec'],
            'trajectory': trajectory
        }
示例#22
0
    def fit(self, X):
        """Provide clusters centroids and data labels.

        Alternate between computing the mean of each cluster
        and labelling data according to the new positions of the centroids.

        Parameters
        ----------
        X : array-like, shape=[..., n_features]
            Training data, where n_samples is the number of samples and
            n_features is the number of features.
        max_iter : int
            Maximum number of iterations.
            Optional, default: 100.

        Returns
        -------
        self : array-like, shape=[n_clusters,]
            Centroids.
        """
        n_samples = X.shape[0]
        if self.verbose > 0:
            logging.info("Initializing...")
        if self.init == "kmeans++":
            centroids = [gs.expand_dims(X[randint(0, n_samples - 1)], 0)]
            for i in range(self.n_clusters - 1):
                dists = [
                    gs.to_ndarray(self.metric.dist(centroids[j], X), 2, 1)
                    for j in range(i + 1)
                ]
                dists = gs.hstack(dists)
                dists_to_closest_centroid = gs.amin(dists, 1)
                indices = gs.arange(n_samples)
                weights = dists_to_closest_centroid / gs.sum(
                    dists_to_closest_centroid)
                index = rv_discrete(values=(indices, weights)).rvs()
                centroids.append(gs.expand_dims(X[index], 0))
        else:
            centroids = [
                gs.expand_dims(X[randint(0, n_samples - 1)], 0)
                for i in range(self.n_clusters)
            ]
        self.centroids = gs.concatenate(centroids, axis=0)
        self.init_centroids = gs.concatenate(centroids, axis=0)

        dists = [
            gs.to_ndarray(self.metric.dist(self.centroids[i], X), 2, 1)
            for i in range(self.n_clusters)
        ]
        dists = gs.hstack(dists)
        self.labels = gs.argmin(dists, 1)
        index = 0
        while index < self.max_iter:
            index += 1
            if self.verbose > 0:
                logging.info(f"Iteration {index}...")

            old_centroids = gs.copy(self.centroids)
            for i in range(self.n_clusters):
                fold = gs.squeeze(X[self.labels == i])

                if len(fold) > 0:

                    mean = FrechetMean(
                        metric=self.metric,
                        max_iter=self.max_iter_mean,
                        point_type=self.point_type,
                        method=self.mean_method,
                        init_step_size=self.init_step_size,
                    )
                    mean.fit(fold)

                    self.centroids[i] = mean.estimate_
                else:
                    self.centroids[i] = X[randint(0, n_samples - 1)]

            dists = [
                gs.to_ndarray(self.metric.dist(self.centroids[i], X), 2, 1)
                for i in range(self.n_clusters)
            ]
            dists = gs.hstack(dists)
            self.labels = gs.argmin(dists, 1)
            dists_to_closest_centroid = gs.amin(dists, 1)
            self.inertia = gs.sum(dists_to_closest_centroid**2)
            centroids_distances = self.metric.dist(old_centroids,
                                                   self.centroids)
            if self.verbose > 0:
                logging.info(
                    f"Convergence criterion at the end of iteration {index} "
                    f"is {gs.mean(centroids_distances)}.")

            if gs.mean(centroids_distances) < self.tol:
                if self.verbose > 0:
                    logging.info(
                        f"Convergence reached after {index} iterations.")

                if self.n_clusters == 1:
                    self.centroids = gs.squeeze(self.centroids, axis=0)

                return gs.copy(self.centroids)

        if index == self.max_iter:
            logging.warning(
                f"K-means maximum number of iterations {self.max_iter} reached. "
                "The mean may be inaccurate.")

        if self.n_clusters == 1:
            self.centroids = gs.squeeze(self.centroids, axis=0)
        return gs.copy(self.centroids)