def func_scalar_output(tangent_vec_a, tangent_vec_b): result = gs.einsum( '...i,...i->...', tangent_vec_a, tangent_vec_b) result = helper.to_scalar(result) return result
def log(self, point, base_point): """Compute the Riemannian logarithm of a point. Parameters ---------- point : array-like, shape=[..., dim + 1] Point on the hypersphere. base_point : array-like, shape=[..., dim + 1] Point on the hypersphere. Returns ------- log : array-like, shape=[..., dim + 1] Tangent vector at the base point equal to the Riemannian logarithm of point at the base point. """ norm_base_point = self.embedding_metric.norm(base_point) norm_point = self.embedding_metric.norm(point) inner_prod = self.embedding_metric.inner_product(base_point, point) cos_angle = inner_prod / (norm_base_point * norm_point) cos_angle = gs.clip(cos_angle, -1., 1.) angle = gs.arccos(cos_angle) angle = gs.to_ndarray(angle, to_ndim=1) angle = gs.to_ndarray(angle, to_ndim=2, axis=1) mask_0 = gs.isclose(angle, 0.) mask_else = gs.equal(mask_0, gs.array(False)) mask_0_float = gs.cast(mask_0, gs.float32) mask_else_float = gs.cast(mask_else, gs.float32) coef_1 = gs.zeros_like(angle) coef_2 = gs.zeros_like(angle) coef_1 += mask_0_float * (1. + INV_SIN_TAYLOR_COEFFS[1] * angle**2 + INV_SIN_TAYLOR_COEFFS[3] * angle**4 + INV_SIN_TAYLOR_COEFFS[5] * angle**6 + INV_SIN_TAYLOR_COEFFS[7] * angle**8) coef_2 += mask_0_float * (1. + INV_TAN_TAYLOR_COEFFS[1] * angle**2 + INV_TAN_TAYLOR_COEFFS[3] * angle**4 + INV_TAN_TAYLOR_COEFFS[5] * angle**6 + INV_TAN_TAYLOR_COEFFS[7] * angle**8) # This avoids division by 0. angle += mask_0_float * 1. coef_1 += mask_else_float * angle / gs.sin(angle) coef_2 += mask_else_float * angle / gs.tan(angle) log = (gs.einsum('...i,...j->...j', coef_1, point) - gs.einsum('...i,...j->...j', coef_2, base_point)) mask_same_values = gs.isclose(point, base_point) mask_else = gs.equal(mask_same_values, gs.array(False)) mask_else_float = gs.cast(mask_else, gs.float32) mask_else_float = gs.to_ndarray(mask_else_float, to_ndim=1) mask_else_float = gs.to_ndarray(mask_else_float, to_ndim=2) mask_not_same_points = gs.sum(mask_else_float, axis=1) mask_same_points = gs.isclose(mask_not_same_points, 0.) mask_same_points = gs.cast(mask_same_points, gs.float32) mask_same_points = gs.to_ndarray(mask_same_points, to_ndim=2, axis=1) mask_same_points_float = gs.cast(mask_same_points, gs.float32) log -= mask_same_points_float * log return log
def iterated_integrability_tensor_derivative_parallel( self, horizontal_vec_x, horizontal_vec_y, base_point): r"""Compute iterated derivatives of the integrability tensor A. The iterated horizontal covariant derivative :math:`\nabla_X (A_Y A_X Y)` (where :math:`X` and :math:`Y` are horizontal vector fields) is a key ingredient in the computation of the covariant derivative of the directional curvature in a submersion. The components :math:`\nabla_X (A_Y A_X Y)`, :math:`A_X A_Y A_X Y`, :math:`\nabla_X (A_X Y)`, and intermediate computations :math:`A_Y A_X Y` and :math:`A_X Y` are computed here for the Kendall shape space in the special case of quotient-parallel vector fields :math:`X, Y` extending the values horizontal_vec_x and horizontal_vec_y by parallel transport in a neighborhood. Such vector fields verify :math:`\nabla_X^X = A_X X` and :math: `\nabla_X^Y = A_X Y`. Parameters ---------- horizontal_vec_x : array-like, shape=[..., k_landmarks, m_ambient] Tangent vector at `base_point`. horizontal_vec_y : array-like, shape=[..., k_landmarks, m_ambient] Tangent vector at `base_point`. base_point : array-like, shape=[..., k_landmarks, m_ambient] Point of the total space. Returns ------- nabla_x_a_y_a_x_y : array-like, shape=[..., k_landmarks, m_ambient] Tangent vector at `base_point`, result of :math:`\nabla_X^S (A_Y A_X Y)` with `X = horizontal_vec_x` and `Y = horizontal_vec_y`. a_x_a_y_a_x_y : array-like, shape=[..., k_landmarks, m_ambient] Tangent vector at `base_point`, result of :math:`A_X A_Y A_X Y` with `X = horizontal_vec_x` and `Y = horizontal_vec_y`. nabla_x_a_x_y : array-like, shape=[..., k_landmarks, m_ambient] Tangent vector at `base_point`, result of :math:`\nabla_X^S (A_X Y)` with `X = horizontal_vec_x` and `Y = horizontal_vec_y`. a_y_a_x_y : array-like, shape=[..., k_landmarks, m_ambient] Tangent vector at `base_point`, result of :math:`A_Y A_X Y` with `X = horizontal_vec_x` and `Y = horizontal_vec_y`. a_x_y : array-like, shape=[..., k_landmarks, m_ambient] Tangent vector at `base_point`, result of :math:`A_X Y` with `X = horizontal_vec_x` and `Y = horizontal_vec_y`. References ---------- .. [Pennec] Pennec, Xavier. Computing the curvature and its gradient in Kendall shape spaces. Unpublished. """ if not gs.all(self.is_centered(base_point)): raise ValueError("The base_point does not belong to the pre-shape" " space") if not gs.all(self.is_horizontal(horizontal_vec_x, base_point)): raise ValueError("Tangent vector x is not horizontal") if not gs.all(self.is_horizontal(horizontal_vec_y, base_point)): raise ValueError("Tangent vector y is not horizontal") p_top = Matrices.transpose(base_point) p_top_p = gs.matmul(p_top, base_point) def sylv_p(mat_b): """Solves Sylvester equation for vertical component.""" return gs.linalg.solve_sylvester(p_top_p, p_top_p, mat_b - Matrices.transpose(mat_b)) y_top = Matrices.transpose(horizontal_vec_y) x_top = Matrices.transpose(horizontal_vec_x) x_y_top = gs.matmul(y_top, horizontal_vec_x) omega_xy = sylv_p(x_y_top) vertical_vec_v = gs.matmul(base_point, omega_xy) omega_xy_x = gs.matmul(horizontal_vec_x, omega_xy) omega_xy_y = gs.matmul(horizontal_vec_y, omega_xy) v_top = Matrices.transpose(vertical_vec_v) x_v_top = gs.matmul(v_top, horizontal_vec_x) omega_xv = sylv_p(x_v_top) omega_xv_p = gs.matmul(base_point, omega_xv) y_v_top = gs.matmul(v_top, horizontal_vec_y) omega_yv = sylv_p(y_v_top) omega_yv_p = gs.matmul(base_point, omega_yv) nabla_x_v = 3.0 * omega_xv_p + omega_xy_x a_y_a_x_y = omega_yv_p + omega_xy_y tmp_mat = gs.matmul(x_top, a_y_a_x_y) a_x_a_y_a_x_y = -gs.matmul(base_point, sylv_p(tmp_mat)) omega_xv_y = gs.matmul(horizontal_vec_y, omega_xv) omega_yv_x = gs.matmul(horizontal_vec_x, omega_yv) omega_xy_v = gs.matmul(vertical_vec_v, omega_xy) norms = Matrices.frobenius_product(vertical_vec_v, vertical_vec_v) sq_norm_v_p = gs.einsum("...,...ij->...ij", norms, base_point) tmp_mat = gs.matmul(p_top, 3.0 * omega_xv_y + 2.0 * omega_yv_x) + gs.matmul(y_top, omega_xy_x) nabla_x_a_y_v = (3.0 * omega_xv_y + omega_yv_x + omega_xy_v - gs.matmul(base_point, sylv_p(tmp_mat)) + sq_norm_v_p) return nabla_x_a_y_v, a_x_a_y_a_x_y, nabla_x_v, a_y_a_x_y, vertical_vec_v
def foo(tangent_vec_a, tangent_vec_b): result = gs.einsum('ni,ni->ni', tangent_vec_a, tangent_vec_b) result = helper.to_vector(result) return result
def foo_scalar_input_output(tangent_vec_a, tangent_vec_b, in_scalar): aux = gs.einsum('ni,ni->n', tangent_vec_a, tangent_vec_b) result = gs.einsum('n,nk->n', aux, in_scalar) result = helper.to_scalar(result) return result
def random_von_mises_fisher( self, mu=None, kappa=10, n_samples=1, max_iter=100): """Sample with the von Mises-Fisher distribution. This distribution corresponds to the maximum entropy distribution given a mean. In dimension 2, a closed form expression is available. In larger dimension, rejection sampling is used according to [Wood94]_ References ---------- https://en.wikipedia.org/wiki/Von_Mises-Fisher_distribution .. [Wood94] Wood, Andrew T. A. “Simulation of the von Mises Fisher Distribution.” Communications in Statistics - Simulation and Computation, June 27, 2007. https://doi.org/10.1080/03610919408813161. Parameters ---------- mu : array-like, shape=[dim] Mean parameter of the distribution. kappa : float Kappa parameter of the von Mises distribution. Optional, default: 10. n_samples : int Number of samples. Optional, default: 1. max_iter : int Maximum number of trials in the rejection algorithm. In case it is reached, the current number of samples < n_samples is returned. Optional, default: 100. Returns ------- point : array-like, shape=[n_samples, dim + 1] Points sampled on the sphere in extrinsic coordinates in Euclidean space of dimension dim + 1. """ dim = self.dim if dim == 2: angle = 2. * gs.pi * gs.random.rand(n_samples) angle = gs.to_ndarray(angle, to_ndim=2, axis=1) unit_vector = gs.hstack((gs.cos(angle), gs.sin(angle))) scalar = gs.random.rand(n_samples) coord_x = 1. + 1. / kappa * gs.log( scalar + (1. - scalar) * gs.exp(gs.array(-2. * kappa))) coord_x = gs.to_ndarray(coord_x, to_ndim=2, axis=1) coord_yz = gs.sqrt(1. - coord_x ** 2) * unit_vector sample = gs.hstack((coord_x, coord_yz)) else: # rejection sampling in the general case sqrt = gs.sqrt(4 * kappa ** 2. + dim ** 2) envelop_param = (-2 * kappa + sqrt) / dim node = (1. - envelop_param) / (1. + envelop_param) correction = kappa * node + dim * gs.log(1. - node ** 2) n_accepted, n_iter = 0, 0 result = [] while (n_accepted < n_samples) and (n_iter < max_iter): sym_beta = beta.rvs( dim / 2, dim / 2, size=n_samples - n_accepted) sym_beta = gs.cast(sym_beta, node.dtype) coord_x = (1 - (1 + envelop_param) * sym_beta) / ( 1 - (1 - envelop_param) * sym_beta) accept_tol = gs.random.rand(n_samples - n_accepted) criterion = ( kappa * coord_x + dim * gs.log(1 - node * coord_x) - correction) > gs.log(accept_tol) result.append(coord_x[criterion]) n_accepted += gs.sum(criterion) n_iter += 1 if n_accepted < n_samples: logging.warning( 'Maximum number of iteration reached in rejection ' 'sampling before n_samples were accepted.') coord_x = gs.concatenate(result) coord_rest = _Hypersphere(dim - 1).random_uniform(n_accepted) coord_rest = gs.einsum( '...,...i->...i', gs.sqrt(1 - coord_x ** 2), coord_rest) sample = gs.concatenate([coord_x[..., None], coord_rest], axis=1) if mu is not None: sample = utils.rotate_points(sample, mu) return sample if (n_samples > 1) else sample[0]
def log(self, point, base_point): """Riemannian logarithm of a point wrt a base point. If point_type = 'poincare' then base_point belongs to the Poincare ball and point is a vector in the euclidean space of the same dimension as the ball. Parameters ---------- point : array-like, shape=[n_samples, dimension + 1] or shape=[1, dimension + 1] base_point : array-like, shape=[n_samples, dimension + 1] or shape=[1, dimension + 1] Returns ------- log : array-like, shape=[n_samples, dimension + 1] or shape=[1, dimension + 1] """ if self.point_type == 'extrinsic': point = gs.to_ndarray(point, to_ndim=2) base_point = gs.to_ndarray(base_point, to_ndim=2) angle = self.dist(base_point, point) angle = gs.to_ndarray(angle, to_ndim=1) angle = gs.to_ndarray(angle, to_ndim=2) mask_0 = gs.isclose(angle, 0.) mask_else = ~mask_0 mask_0_float = gs.cast(mask_0, gs.float32) mask_else_float = gs.cast(mask_else, gs.float32) coef_1 = gs.zeros_like(angle) coef_2 = gs.zeros_like(angle) coef_1 += mask_0_float * (1. + INV_SINH_TAYLOR_COEFFS[1] * angle**2 + INV_SINH_TAYLOR_COEFFS[3] * angle**4 + INV_SINH_TAYLOR_COEFFS[5] * angle**6 + INV_SINH_TAYLOR_COEFFS[7] * angle**8) coef_2 += mask_0_float * (1. + INV_TANH_TAYLOR_COEFFS[1] * angle**2 + INV_TANH_TAYLOR_COEFFS[3] * angle**4 + INV_TANH_TAYLOR_COEFFS[5] * angle**6 + INV_TANH_TAYLOR_COEFFS[7] * angle**8) # This avoids dividing by 0. angle += mask_0_float * 1. coef_1 += mask_else_float * (angle / gs.sinh(angle)) coef_2 += mask_else_float * (angle / gs.tanh(angle)) log = (gs.einsum('ni,nj->nj', coef_1, point) - gs.einsum('ni,nj->nj', coef_2, base_point)) return log elif self.point_type == 'ball': add_base_point = self.mobius_add(-base_point, point) norm_add = gs.to_ndarray(gs.linalg.norm(add_base_point, axis=-1), 2, -1) norm_add = gs.repeat(norm_add, base_point.shape[-1], -1) norm_base_point = gs.to_ndarray( gs.linalg.norm(base_point, axis=-1), 2, -1) norm_base_point = gs.repeat(norm_base_point, base_point.shape[-1], -1) log = (1 - norm_base_point**2) * gs.arctanh(norm_add)\ * (add_base_point / norm_add) mask_0 = gs.all(gs.isclose(norm_add, 0.)) log[mask_0] = 0 return log else: raise NotImplementedError( 'log is only implemented for ball and extrinsic')
def hvsplit(path_of_curves, a=1, b=1 / 2): ''' Computes the splitting of the speed vector field s|->cs(s) of a path of curves s|->c(s) in horizontal and vertical parts : cs_ver(s) = M(s)v(s) and cs_hor(s) = cs(s) - M(s)v(s). Input : - c [2x(n+1)x(m+1)] : path of curves in R^2. - verif [1 or 0] : computes how orthogonal the horizontal and vertical parts actually are and checks that M is sol of ODE Output : - M [(n+1)xm] : defines splitting of the speed vector of the path c cs_ver(s)=M(s)v(s), cs_hor(s)=cs(s)-M(s)v(s). - K [2x(n+1)xm] : tau(k,j) = log_H(c(k,j),c(k+1,j)) ... if verif ==1 : - L [1x1] : length of c - SP_cs [mx1] : norm of speed vector SP_cs = G(cs,cs) - SP_hor [mx1] : norm of horizont part SP_hor = G(cs-Mv,cs-Mv) - SP_ver [mx1] : norm of vertical part SP_ver = G(Mv,Mv) - SP_hv [mx1] : inner product SP_hv = G(cs-Mv,Mv) ''' n_times = path_of_curves.shape[0] - 1 n_points = path_of_curves.shape[1] - 1 dim = path_of_curves.shape[2] # compute tau [mx(n+1)xdim] : tau(j,k) = log(c(j,k),c(j,k+1)) tau = np.zeros((n_times, n_points + 1, dim)) tau[:, :-1, :] = path_of_curves[:-1, 1:, :] - path_of_curves[:-1, :-1, :] tau[:, -1, :] = tau[:, -2, :] # compute K [mx(n+1)] : K(j,k) = |tau(j,k)| K = np.linalg.norm(tau, axis=-1) # compute v [mx(n+1)xdim] : v(j,k) = tau(j,k)/K(j,k) v = gs.einsum('ijk,ij->ijk', tau, 1 / K) # compute lambda [mxn] : lambda = <v(j,k+1),v(j,k)> v_prod = v[:, 1:, :] * v[:, :-1, :] lambd = np.sum(v_prod, axis=-1) # compute cs [mx(n+1)xdim] cs = n_times * (path_of_curves[1:, :, :] - path_of_curves[:-1, :, :]) # compute Nstau [mxnxdim] Nstau = cs[:, 1:, :] - cs[:, 0:-1, :] # compute A,B,C,D [mx(n-1)] A = K[:, 1:-1] / K[:, :-2] * lambd[:, :-1] B = -(1 + K[:, 1:-1] / K[:, :-2] * ((a / b)**2 + (1 - (a / b)**2) * lambd[:, :-1]**2)) C = lambd[:, 1:] d = (a / b)**2 * v[:, 1:-1, :] + (1 - (a / b)**2) * gs.einsum( 'ij,ijk->ijk', lambd[:, :-1], v[:, :-2, :]) Nstau_vk = np.sum(Nstau[:, 1:, :] * v[:, 1:-1, :], axis=-1) Nstau_d = np.sum(Nstau[:, :-1, :] * d, axis=-1) D = Nstau_vk - K[:, 1:-1] / K[:, :-2] * Nstau_d # compute M [mx(n+1)] : cs(k,j)^{ver} = M(k,j)v(k,j) M = np.zeros((n_times, n_points + 1)) for j in range(n_times): LL = np.diag(A[j, 1:], -1) + np.diag(B[j, :]) + np.diag(C[j, :-1], 1) M[j, 1:-1] = np.linalg.solve(LL, D[j, :]) cs_ver = np.einsum('ij,ijk->ijk', M, v) cs_hor = cs - cs_ver return M, K, cs_ver, cs_hor
def lift(self, point): """Find a representer in top space.""" eigvals, eigvecs = gs.linalg.eigh(point) return gs.einsum( "...ij,...j->...ij", eigvecs[..., -self.k :], eigvals[..., -self.k :] ** 0.5 )
def compose(self, point_1, point_2, point_type=None): r"""Compose two elements of SE(n). Parameters ---------- point_1 : array-like, shape=[n_samples, {dimension, [n + 1, n + 1]}] point_2 : array-like, shape=[n_samples, {dimension, [n + 1, n + 1]}] point_type: str, {'vector', 'matrix'}, optional default: self.default_point_type Equation --------- (:math: `(R_1, t_1) \\cdot (R_2, t_2) = (R_1 R_2, R_1 t_2 + t_1)`) Returns ------- composition : the composition of point_1 and point_2 """ if point_type is None: point_type = self.default_point_type rotations = self.rotations dim_rotations = rotations.dimension point_1 = self.regularize(point_1, point_type=point_type) point_2 = self.regularize(point_2, point_type=point_type) if point_type == 'vector': n_points_1, _ = point_1.shape n_points_2, _ = point_2.shape assert (point_1.shape == point_2.shape or n_points_1 == 1 or n_points_2 == 1) if n_points_1 == 1: point_1 = gs.stack([point_1[0]] * n_points_2) if n_points_2 == 1: point_2 = gs.stack([point_2[0]] * n_points_1) rot_vec_1 = point_1[:, :dim_rotations] rot_mat_1 = rotations.matrix_from_rotation_vector(rot_vec_1) rot_vec_2 = point_2[:, :dim_rotations] rot_mat_2 = rotations.matrix_from_rotation_vector(rot_vec_2) translation_1 = point_1[:, dim_rotations:] translation_2 = point_2[:, dim_rotations:] composition_rot_mat = gs.matmul(rot_mat_1, rot_mat_2) composition_rot_vec = rotations.rotation_vector_from_matrix( composition_rot_mat) composition_translation = gs.einsum('ij,ikj->ik', translation_2, rot_mat_1) + translation_1 composition = gs.concatenate( (composition_rot_vec, composition_translation), axis=1) elif point_type == 'matrix': raise NotImplementedError() composition = self.regularize(composition, point_type=point_type) return composition
def _batch_gradient_descent(points, metric, weights=None, max_iter=32, lr=1e-3, epsilon=5e-3, point_type='vector', verbose=False): """Perform batch gradient descent.""" if point_type == 'vector': if points.ndim < 3: return _default_gradient_descent(points, metric, weights, max_iter, point_type, epsilon, lr, verbose) einsum_str = 'ni,nij->ij' ndim = 1 else: if points.ndim < 4: return _default_gradient_descent(points, metric, weights, max_iter, point_type, epsilon, lr, verbose) einsum_str = 'nk,nkij->kij' ndim = 2 shape = points.shape n_points = shape[0] n_batch = shape[1] if n_points == 1: return points[0] if weights is None: weights = gs.ones((n_points, n_batch)) flat_shape = (n_batch * n_points, ) + shape[-ndim:] estimates = points[0] points_flattened = gs.reshape(points, (n_points * n_batch, ) + shape[-ndim:]) convergence = math.inf iteration = 0 convergence_old = convergence while convergence > epsilon and max_iter > iteration: iteration += 1 estimates_broadcast, _ = gs.broadcast_arrays(estimates, points) estimates_flattened = gs.reshape(estimates_broadcast, flat_shape) tangent_grad = metric.log(points_flattened, estimates_flattened) tangent_grad = gs.reshape(tangent_grad, shape) tangent_mean = gs.einsum(einsum_str, weights, tangent_grad) / n_points next_estimates = metric.exp(lr * tangent_mean, estimates) convergence = gs.sum(metric.squared_norm(tangent_mean, estimates)) estimates = next_estimates if convergence < convergence_old: convergence_old = convergence elif convergence > convergence_old: lr = lr / 2. if iteration == max_iter: logging.warning('Maximum number of iterations {} reached. The ' 'mean may be inaccurate'.format(max_iter)) if verbose: logging.info('n_iter: {}, final dist: {},' 'final step size: {}'.format(iteration, convergence, lr)) return estimates
def adaptive_gradientdescent_mean(self, points, weights=None, n_max_iterations=32, epsilon=1e-12, init_points=[]): """ Frechet mean of (weighted) points using adaptive time-steps The loss function optimized is ||M_1(x)||_x (where M_1(x) is the tangent mean at x) rather than the mean-square-distance (MSD) because this saves computation time. Parameters ---------- points: array-like, shape=[n_samples, dimension] weights: array-like, shape=[n_samples, 1], optional init_points: array-like, shape=[n_init, dimension] epsilon: tolerance for stopping the gradient descent """ # TODO(Xavier): This function assumes that all points are lists # of vectors and not of matrices n_points = gs.shape(points)[0] if weights is None: weights = gs.ones((n_points, 1)) weights = gs.array(weights) weights = gs.to_ndarray(weights, to_ndim=2, axis=1) sum_weights = gs.sum(weights) n_init = len(init_points) if n_init == 0: current_mean = points[0] else: current_mean = init_points[0] if n_points == 1: return gs.to_ndarray(current_mean, to_ndim=2) tau = 1.0 iter = 0 logs = self.log(point=points, base_point=current_mean) current_tangent_mean = gs.einsum('nk,nj->j', weights, logs) current_tangent_mean /= sum_weights norm_current_tangent_mean = gs.linalg.norm(current_tangent_mean) while (norm_current_tangent_mean > epsilon and iter < n_max_iterations): iter = iter + 1 shooting_vector = gs.to_ndarray(tau * current_tangent_mean, to_ndim=2) next_mean = self.exp(tangent_vec=shooting_vector, base_point=current_mean) logs = self.log(point=points, base_point=next_mean) next_tangent_mean = gs.einsum('nk,nj->j', weights, logs) next_tangent_mean /= sum_weights norm_next_tangent_mean = gs.linalg.norm(next_tangent_mean) if norm_next_tangent_mean < norm_current_tangent_mean: current_mean = next_mean current_tangent_mean = next_tangent_mean norm_current_tangent_mean = norm_next_tangent_mean tau = max(1.0, 1.0511111 * tau) else: tau = tau * 0.8 if iter == n_max_iterations: print('Maximum number of iterations {} reached.' 'The mean may be inaccurate'.format(n_max_iterations)) return gs.to_ndarray(current_mean, to_ndim=2)
def path(time): vecs = gs.einsum("t,...ij->...tij", time, tangent_vec) return cls.exp(vecs, base_point)
def func_else(else_a, tangent_vec_a, else_b, tangent_vec_b): result = (else_a + else_b) * gs.einsum( 'ni,ni->n', tangent_vec_a, tangent_vec_b) result = helper.to_scalar(result) return result
def jacobian_translation(self, point, left_or_right='left'): """Compute the jacobian matrix corresponding to translation. Compute the jacobian matrix of the differential of the left/right translations from the identity to point in SO(3). Parameters ---------- point : array-like, shape=[..., 3] left_or_right : str, {'left', 'right'}, optional default: 'left' point_type : str, {'vector', 'matrix'}, optional default: self.default_point_type Returns ------- jacobian : array-like, shape=[..., 3, 3] """ geomstats.error.check_parameter_accepted_values( left_or_right, 'left_or_right', ['left', 'right']) point = self.regularize(point) n_points, _ = point.shape angle = gs.linalg.norm(point, axis=-1) angle = gs.expand_dims(angle, axis=-1) coef_1 = gs.zeros([n_points, 1]) coef_2 = gs.zeros([n_points, 1]) # This avoids dividing by 0. mask_0 = gs.isclose(angle, 0.) mask_0_float = gs.cast(mask_0, gs.float32) + self.epsilon coef_1 += mask_0_float * (TAYLOR_COEFFS_1_AT_0[0] + TAYLOR_COEFFS_1_AT_0[2] * angle**2 + TAYLOR_COEFFS_1_AT_0[4] * angle**4 + TAYLOR_COEFFS_1_AT_0[6] * angle**6) coef_2 += mask_0_float * (TAYLOR_COEFFS_2_AT_0[0] + TAYLOR_COEFFS_2_AT_0[2] * angle**2 + TAYLOR_COEFFS_2_AT_0[4] * angle**4 + TAYLOR_COEFFS_2_AT_0[6] * angle**6) # This avoids dividing by 0. mask_pi = gs.isclose(angle, gs.pi) mask_pi_float = gs.cast(mask_pi, gs.float32) + self.epsilon delta_angle = angle - gs.pi coef_1 += mask_pi_float * (TAYLOR_COEFFS_1_AT_PI[1] * delta_angle + TAYLOR_COEFFS_1_AT_PI[2] * delta_angle**2 + TAYLOR_COEFFS_1_AT_PI[3] * delta_angle**3 + TAYLOR_COEFFS_1_AT_PI[4] * delta_angle**4 + TAYLOR_COEFFS_1_AT_PI[5] * delta_angle**5 + TAYLOR_COEFFS_1_AT_PI[6] * delta_angle**6) angle += mask_0_float coef_2 += mask_pi_float * ((1 - coef_1) / angle**2) # This avoids dividing by 0. mask_else = ~mask_0 & ~mask_pi mask_else_float = gs.cast(mask_else, gs.float32) + self.epsilon # This avoids division by 0. angle += mask_pi_float coef_1 += mask_else_float * ((angle / 2) / gs.tan(angle / 2)) coef_2 += mask_else_float * ((1 - coef_1) / angle**2) jacobian = gs.zeros((n_points, self.dim, self.dim)) n_points_tensor = gs.array(n_points) for i in range(n_points): # This avoids dividing by 0. mask_i_float = (gs.get_mask_i_float(i, n_points_tensor) + self.epsilon) sign = -1. if left_or_right == 'left': sign = +1. jacobian_i = (coef_1[i] * gs.eye(self.dim) + coef_2[i] * gs.outer(point[i], point[i]) + sign * self.skew_matrix_from_vector(point[i]) / 2.) jacobian += gs.einsum('n,ij->nij', mask_i_float, jacobian_i) return jacobian
def _adaptive_gradient_descent(points, metric, weights=None, max_iter=32, epsilon=1e-12, initial_tau=1., init_point=None, point_type='vector', verbose=False): """Perform adaptive gradient descent. Frechet mean of (weighted) points using adaptive time-steps The loss function optimized is :math:`||M_1(x)||_x` (where :math:`M_1(x)` is the tangent mean at x) rather than the mean-square-distance (MSD) because this simplifies computations. Adaptivity is done in a Levenberg-Marquardt style weighting variable tau between the first order and the second order Gauss-Newton gradient descent. Parameters ---------- points : array-like, shape=[..., dim] Points to be averaged. weights : array-like, shape=[..., 1], optional Weights associated to the points. max_iter : int, optional Maximum number of iterations for the gradient descent. init_point : array-like, shape=[n_init, dimension], optional Initial point. epsilon : float, optional Tolerance for stopping the gradient descent. Returns ------- current_mean: array-like, shape=[..., dim] Weighted Frechet mean of the points. """ if point_type == 'vector': points = gs.to_ndarray(points, to_ndim=2) einsum_str = 'n,nj->j' else: points = gs.to_ndarray(points, to_ndim=3) einsum_str = 'n,nij->ij' n_points = gs.shape(points)[0] tau_max = 1e6 tau_mul_up = 1.6511111 tau_min = 1e-6 tau_mul_down = 0.1 if n_points == 1: return points[0] current_mean = points[0] if init_point is None else init_point if weights is None: weights = gs.ones((n_points, )) sum_weights = gs.sum(weights) tau = initial_tau iteration = 0 logs = metric.log(point=points, base_point=current_mean) var = gs.sum( metric.squared_norm(logs, current_mean) * weights) / gs.sum(weights) current_tangent_mean = gs.einsum(einsum_str, weights, logs) current_tangent_mean /= sum_weights sq_norm_current_tangent_mean = metric.squared_norm(current_tangent_mean, base_point=current_mean) while (sq_norm_current_tangent_mean > epsilon**2 and iteration < max_iter): iteration += 1 shooting_vector = tau * current_tangent_mean next_mean = metric.exp(tangent_vec=shooting_vector, base_point=current_mean) logs = metric.log(point=points, base_point=next_mean) var = gs.sum(metric.squared_norm(logs, current_mean) * weights) / gs.sum(weights) next_tangent_mean = gs.einsum(einsum_str, weights, logs) next_tangent_mean /= sum_weights sq_norm_next_tangent_mean = metric.squared_norm(next_tangent_mean, base_point=next_mean) if sq_norm_next_tangent_mean < sq_norm_current_tangent_mean: current_mean = next_mean current_tangent_mean = next_tangent_mean sq_norm_current_tangent_mean = sq_norm_next_tangent_mean tau = min(tau_max, tau_mul_up * tau) else: tau = max(tau_min, tau_mul_down * tau) if iteration == max_iter: logging.warning('Maximum number of iterations {} reached. ' 'The mean may be inaccurate'.format(max_iter)) if verbose: logging.info('n_iter: {}, final variance: {}, final dist: {},' ' final_step_size: {}'.format( iteration, var, sq_norm_current_tangent_mean, tau)) return current_mean
def skew_matrix_from_vector(self, vec): """Get the skew-symmetric matrix derived from the vector. In 3D, compute the skew-symmetric matrix,known as the cross-product of a vector, associated to the vector `vec`. In nD, fill a skew-symmetric matrix with the values of the vector. Parameters ---------- vec : array-like, shape=[..., dim] Returns ------- skew_mat : array-like, shape=[..., n, n] """ n_vecs, vec_dim = gs.shape(vec) if self.n == 2: vec = gs.tile(vec, [1, 2]) vec = gs.reshape(vec, (n_vecs, 2)) id_skew = gs.array(gs.tile([[[0., 1.], [-1., 0.]]], (n_vecs, 1, 1))) skew_mat = gs.einsum('...ij,...i->...ij', gs.cast(id_skew, gs.float32), vec) elif self.n == 3: levi_civita_symbol = gs.tile( [[[[0., 0., 0.], [0., 0., 1.], [0., -1., 0.]], [[0., 0., -1.], [0., 0., 0.], [1., 0., 0.]], [[0., 1., 0.], [-1., 0., 0.], [0., 0., 0.]]]], (n_vecs, 1, 1, 1)) levi_civita_symbol = gs.array(levi_civita_symbol) levi_civita_symbol += self.epsilon # This avoids dividing by 0. basis_vec_1 = gs.array(gs.tile([[1., 0., 0.]], (n_vecs, 1))) + self.epsilon basis_vec_2 = gs.array(gs.tile([[0., 1., 0.]], (n_vecs, 1))) + self.epsilon basis_vec_3 = gs.array(gs.tile([[0., 0., 1.]], (n_vecs, 1))) + self.epsilon cross_prod_1 = gs.einsum('nijk,ni,nj->nk', levi_civita_symbol, basis_vec_1, vec) cross_prod_2 = gs.einsum('nijk,ni,nj->nk', levi_civita_symbol, basis_vec_2, vec) cross_prod_3 = gs.einsum('nijk,ni,nj->nk', levi_civita_symbol, basis_vec_3, vec) cross_prod_1 = gs.to_ndarray(cross_prod_1, to_ndim=3, axis=1) cross_prod_2 = gs.to_ndarray(cross_prod_2, to_ndim=3, axis=1) cross_prod_3 = gs.to_ndarray(cross_prod_3, to_ndim=3, axis=1) skew_mat = gs.concatenate( [cross_prod_1, cross_prod_2, cross_prod_3], axis=1) else: # SO(n) mat_dim = gs.cast(((1. + gs.sqrt(1. + 8. * vec_dim)) / 2.), gs.int32) skew_mat = gs.zeros((n_vecs, ) + (self.n, ) * 2) upper_triangle_indices = gs.triu_indices(mat_dim, k=1) for i in range(n_vecs): skew_mat[i][upper_triangle_indices] = vec[i] skew_mat[i] = skew_mat[i] - gs.transpose(skew_mat[i]) return skew_mat
def _default_gradient_descent(points, metric, weights, max_iter, point_type, epsilon, initial_step_size, verbose): """Perform default gradient descent.""" if point_type == 'vector': points = gs.to_ndarray(points, to_ndim=2) einsum_str = 'n,nj->j' else: points = gs.to_ndarray(points, to_ndim=3) einsum_str = 'n,nij->ij' n_points = gs.shape(points)[0] if weights is None: weights = gs.ones((n_points, )) mean = points[0] if n_points == 1: return mean sum_weights = gs.sum(weights) sq_dists_between_iterates = [] iteration = 0 sq_dist = 0. var = 0. norm_old = gs.linalg.norm(points) step = initial_step_size while iteration < max_iter: logs = metric.log(point=points, base_point=mean) var = gs.sum( metric.squared_norm(logs, mean) * weights) / gs.sum(weights) tangent_mean = gs.einsum(einsum_str, weights, logs) tangent_mean /= sum_weights norm = gs.linalg.norm(tangent_mean) sq_dist = metric.squared_norm(tangent_mean, mean) sq_dists_between_iterates.append(sq_dist) var_is_0 = gs.isclose(var, 0.) sq_dist_is_small = gs.less_equal(sq_dist, epsilon * metric.dim) condition = ~gs.logical_or(var_is_0, sq_dist_is_small) if not (condition or iteration == 0): break estimate_next = metric.exp(step * tangent_mean, mean) mean = estimate_next iteration += 1 if norm < norm_old: norm_old = norm elif norm > norm_old: step = step / 2. if iteration == max_iter: logging.warning('Maximum number of iterations {} reached. ' 'The mean may be inaccurate'.format(max_iter)) if verbose: logging.info('n_iter: {}, final variance: {}, final dist: {}'.format( iteration, var, sq_dist)) return mean
def exp(self, tangent_vec, base_point): """Riemannian exponential of a tangent vector wrt to a base point. Parameters ---------- tangent_vec : array-like, shape=[n_samples, dimension + 1] or shape=[1, dimension + 1] base_point : array-like, shape=[n_samples, dimension + 1] or shape=[1, dimension + 1] Returns ------- exp : array-like, shape=[n_samples, dimension + 1] or shape=[1, dimension + 1] """ if self.point_type == 'extrinsic': tangent_vec = gs.to_ndarray(tangent_vec, to_ndim=2) base_point = gs.to_ndarray(base_point, to_ndim=2) sq_norm_tangent_vec = self.embedding_metric.squared_norm( tangent_vec) norm_tangent_vec = gs.sqrt(sq_norm_tangent_vec) mask_0 = gs.isclose(sq_norm_tangent_vec, 0.) mask_0 = gs.to_ndarray(mask_0, to_ndim=1) mask_else = ~mask_0 mask_else = gs.to_ndarray(mask_else, to_ndim=1) mask_0_float = gs.cast(mask_0, gs.float32) mask_else_float = gs.cast(mask_else, gs.float32) coef_1 = gs.zeros_like(norm_tangent_vec) coef_2 = gs.zeros_like(norm_tangent_vec) coef_1 += mask_0_float * ( 1. + COSH_TAYLOR_COEFFS[2] * norm_tangent_vec**2 + COSH_TAYLOR_COEFFS[4] * norm_tangent_vec**4 + COSH_TAYLOR_COEFFS[6] * norm_tangent_vec**6 + COSH_TAYLOR_COEFFS[8] * norm_tangent_vec**8) coef_2 += mask_0_float * ( 1. + SINH_TAYLOR_COEFFS[3] * norm_tangent_vec**2 + SINH_TAYLOR_COEFFS[5] * norm_tangent_vec**4 + SINH_TAYLOR_COEFFS[7] * norm_tangent_vec**6 + SINH_TAYLOR_COEFFS[9] * norm_tangent_vec**8) # This avoids dividing by 0. norm_tangent_vec += mask_0_float * 1.0 coef_1 += mask_else_float * (gs.cosh(norm_tangent_vec)) coef_2 += mask_else_float * ((gs.sinh(norm_tangent_vec) / (norm_tangent_vec))) exp = (gs.einsum('ni,nj->nj', coef_1, base_point) + gs.einsum('ni,nj->nj', coef_2, tangent_vec)) hyperbolic_space = Hyperbolic(dimension=self.dimension) exp = hyperbolic_space.regularize(exp) return exp elif self.point_type == 'ball': norm_base_point = gs.to_ndarray(gs.linalg.norm(base_point, -1), 2, -1) norm_base_point = gs.repeat(norm_base_point, base_point.shape[-1], -1) den = 1 - norm_base_point**2 norm_tan = gs.to_ndarray(gs.linalg.norm(tangent_vec, axis=-1), 2, -1) norm_tan = gs.repeat(norm_tan, base_point.shape[-1], -1) lambda_base_point = 1 / den direction = tangent_vec / norm_tan factor = gs.tanh(lambda_base_point * norm_tan) exp = self.mobius_add(base_point, direction * factor) return exp else: raise NotImplementedError( 'exp is only implemented for ball and extrinsic')
def _adaptive_gradient_descent(points, metric, weights=None, max_iter=32, epsilon=1e-12, init_point=None, point_type='vector'): """Perform adaptive gradient descent. Frechet mean of (weighted) points using adaptive time-steps The loss function optimized is :math:`||M_1(x)||_x` (where :math:`M_1(x)` is the tangent mean at x) rather than the mean-square-distance (MSD) because this simplifies computations. Adaptivity is done in a Levenberg-Marquardt style weighting variable tau between the first order and the second order Gauss-Newton gradient descent. Parameters ---------- points : array-like, shape=[..., dim] Points to be averaged. weights : array-like, shape=[..., 1], optional Weights associated to the points. max_iter : int, optional Maximum number of iterations for the gradient descent. init_point : array-like, shape=[n_init, dimension], optional Initial point. epsilon : float, optional Tolerance for stopping the gradient descent. Returns ------- current_mean: array-like, shape=[..., dim] Weighted Frechet mean of the points. """ if point_type == 'matrix': raise NotImplementedError( 'The Frechet mean with adaptive gradient descent is only' ' implemented for lists of vectors, and not matrices.') tau_max = 1e6 tau_mul_up = 1.6511111 tau_min = 1e-6 tau_mul_down = 0.1 n_points = geomstats.vectorization.get_n_points(points, point_type) points = gs.to_ndarray(points, to_ndim=2) current_mean = points[0] if init_point is None else init_point if n_points == 1: return current_mean if weights is None: weights = gs.ones((n_points, )) sum_weights = gs.sum(weights) tau = 1.0 iteration = 0 logs = metric.log(point=points, base_point=current_mean) current_tangent_mean = gs.einsum('n,nj->j', weights, logs) current_tangent_mean /= sum_weights sq_norm_current_tangent_mean = metric.squared_norm(current_tangent_mean, base_point=current_mean) while (sq_norm_current_tangent_mean > epsilon**2 and iteration < max_iter): iteration += 1 shooting_vector = tau * current_tangent_mean next_mean = metric.exp(tangent_vec=shooting_vector, base_point=current_mean) logs = metric.log(point=points, base_point=next_mean) next_tangent_mean = gs.einsum('n,nj->j', weights, logs) next_tangent_mean /= sum_weights sq_norm_next_tangent_mean = metric.squared_norm(next_tangent_mean, base_point=next_mean) if sq_norm_next_tangent_mean < sq_norm_current_tangent_mean: current_mean = next_mean current_tangent_mean = next_tangent_mean sq_norm_current_tangent_mean = sq_norm_next_tangent_mean tau = min(tau_max, tau_mul_up * tau) else: tau = max(tau_min, tau_mul_down * tau) if iteration == max_iter: logging.warning('Maximum number of iterations {} reached. ' 'The mean may be inaccurate'.format(max_iter)) return current_mean
def _default_gradient_descent(group, points, weights=None, max_iter=32, step=1., epsilon=EPSILON, verbose=False): """Compute the (weighted) group exponential barycenter of `points`. Parameters ---------- group : LieGroup Instance of the class LieGroup. points : array-like, shape=[n_samples, [n,n]] Input points lying in the Lie Group. weights : array-like, shape=[n_samples,] default is 1 for each point Weights of each point. max_iter : int, optional (defaults to 32) The maximum number of iterations to perform in the gradient descent. epsilon : float, optional (defaults to 1e-6) The tolerance to reach convergence. The exstrinsic norm of the gradient is used as criterion. step : float, optional (defaults to 1.) The learning rate in the gradient descent. verbose : bool Level of verbosity to inform about convergence. Returns ------- exp_bar : array-like, shape=[n,n] The exponential_barycenter of the input points. """ ndim = 2 if group.default_point_type == 'vector' else 3 if gs.ndim(gs.array(points)) < ndim or len(points) == 1: return points[0] if len(points) == 1 else points n_points = points.shape[0] if weights is None: weights = gs.ones((n_points, )) weights = gs.cast(weights, gs.float32) sum_weights = gs.sum(weights) mean = points[0] sq_dists_between_iterates = [] iteration = 0 grad_norm = 0. while iteration < max_iter: if not (grad_norm > epsilon or iteration == 0): break inv_mean = group.inverse(mean) centered_points = group.compose(inv_mean, points) logs = group.log(point=centered_points) tangent_mean = step * gs.einsum('n, nk...->k...', weights / sum_weights, logs) mean_next = group.compose(mean, group.exp(tangent_vec=tangent_mean)) grad_norm = gs.linalg.norm(tangent_mean) sq_dists_between_iterates.append(grad_norm) mean = mean_next iteration += 1 if iteration == max_iter: logging.warning('Maximum number of iterations {} reached. ' 'The mean may be inaccurate'.format(max_iter)) if verbose: logging.info('n_iter: {}, final gradient norm: {}'.format( iteration, grad_norm)) return mean
def tangent_extrinsic_to_spherical(self, tangent_vec, base_point=None, base_point_spherical=None): """Convert tangent vector from extrinsic to spherical coordinates. Convert a tangent vector from the extrinsic coordinates in Euclidean space to the spherical coordinates in the hypersphere for. Spherical coordinates are considered from the north pole [0., 0., 1.]. This method is only implemented in dimension 2. Parameters ---------- tangent_vec : array-like, shape=[..., dim] Tangent vector to the sphere, in spherical coordinates. base_point : array-like, shape=[..., dim] Point on the sphere. Unused if `base_point_spherical` is given. Optional, default : None. base_point_spherical : array-like, shape=[..., dim] Point on the sphere, in spherical coordinates. Either `base_point` or `base_point_spherical` must be given. Optional, default : None. Returns ------- tangent_vec_spherical : array-like, shape=[..., dim + 1] Tangent vector to the sphere, at base point, in spherical coordinates relative to the north pole [0., 0., 1.]. """ if self.dim != 2: raise NotImplementedError( "The conversion from to extrinsic coordinates " "spherical coordinates is implemented" " only in dimension 2.") if base_point is None and base_point_spherical is None: raise ValueError("A base point must be given, either in " "extrinsic or in spherical coordinates.") if base_point_spherical is None and base_point is not None: base_point_spherical = self.extrinsic_to_spherical(base_point) axes = (2, 0, 1) if base_point_spherical.ndim == 2 else (0, 1) theta = base_point_spherical[..., 0] phi = base_point_spherical[..., 1] theta_safe = gs.where(gs.abs(theta) < gs.atol, gs.atol, theta) zeros = gs.zeros_like(theta) jac_close_0 = gs.array([[gs.ones_like(theta), zeros, zeros], [zeros, gs.ones_like(theta), zeros]]) jac = gs.array([ [ gs.cos(theta) * gs.cos(phi), gs.cos(theta) * gs.sin(phi), -gs.sin(theta), ], [ -gs.sin(phi) / gs.sin(theta_safe), gs.cos(phi) / gs.sin(theta_safe), zeros, ], ]) jac = gs.transpose(jac, axes) jac_close_0 = gs.transpose(jac_close_0, axes) theta_criterion = gs.einsum("...,...ij->...ij", theta, gs.ones_like(jac)) jac = gs.where(gs.abs(theta_criterion) < gs.atol, jac_close_0, jac) tangent_vec_spherical = gs.einsum("...ij,...j->...i", jac, tangent_vec) return tangent_vec_spherical
def foo_scalar_output(tangent_vec_a, tangent_vec_b): result = gs.einsum('ni,ni->n', tangent_vec_a, tangent_vec_b) result = helper.to_scalar(result) return result
def test_einsum(self): np_array_1 = _np.array([[1, 4]]) np_array_2 = _np.array([[2, 3]]) array_1 = gs.array([[1, 4]]) array_2 = gs.array([[2, 3]]) np_result = _np.einsum('...i,...i->...', np_array_1, np_array_2) gs_result = gs.einsum('...i,...i->...', array_1, array_2) self.assertAllCloseToNp(gs_result, np_result) np_array_1 = _np.array([[1, 4], [-1, 5]]) np_array_2 = _np.array([[2, 3]]) array_1 = gs.array([[1, 4], [-1, 5]]) array_2 = gs.array([[2, 3]]) np_result = _np.einsum('...i,...i->...', np_array_1, np_array_2) gs_result = gs.einsum('...i,...i->...', array_1, array_2) self.assertAllCloseToNp(gs_result, np_result) np_array_1 = _np.array([[1, 4]]) np_array_2 = _np.array([[2, 3], [5, 6]]) array_1 = gs.array([[1, 4]]) array_2 = gs.array([[2, 3], [5, 6]]) np_result = _np.einsum('...i,...i->...', np_array_1, np_array_2) gs_result = gs.einsum('...i,...i->...', array_1, array_2) self.assertAllCloseToNp(gs_result, np_result) np_array_1 = _np.array([5]) np_array_2 = _np.array([[1, 2, 3]]) array_1 = gs.array([5]) array_2 = gs.array([[1, 2, 3]]) np_result = _np.einsum('...,...i->...i', np_array_1, np_array_2) gs_result = gs.einsum('...,...i->...i', array_1, array_2) self.assertAllCloseToNp(gs_result, np_result) np_array_1 = _np.array(5) np_array_2 = _np.array([[1, 2, 3]]) array_1 = gs.array(5) array_2 = gs.array([[1, 2, 3]]) np_result = _np.einsum('...,...i->...i', np_array_1, np_array_2) gs_result = gs.einsum('...,...i->...i', array_1, array_2) self.assertAllCloseToNp(gs_result, np_result) np_array_1 = _np.array([5]) np_array_2 = _np.array([1, 2, 3]) array_1 = gs.array([5]) array_2 = gs.array([1, 2, 3]) np_result = _np.einsum('...,...i->...i', np_array_1, np_array_2) gs_result = gs.einsum('...,...i->...i', array_1, array_2) self.assertAllCloseToNp(gs_result, np_result) np_array_1 = _np.array(5) np_array_2 = _np.array([1, 2, 3]) array_1 = gs.array(5) array_2 = gs.array([1, 2, 3]) np_result = _np.einsum('...,...i->...i', np_array_1, np_array_2) gs_result = gs.einsum('...,...i->...i', array_1, array_2) self.assertAllCloseToNp(gs_result, np_result)
def random_von_mises_fisher( self, mu=None, kappa=10, n_samples=1, max_iter=100): """Sample with the von Mises-Fisher distribution. This distribution corresponds to the maximum entropy distribution given a mean. In dimension 2, a closed form expression is available. In larger dimension, rejection sampling is used according to [Wood94]_ References ---------- https://en.wikipedia.org/wiki/Von_Mises-Fisher_distribution .. [Wood94] Wood, Andrew T. A. “Simulation of the von Mises Fisher Distribution.” Communications in Statistics - Simulation and Computation, June 27, 2007. https://doi.org/10.1080/03610919408813161. Parameters ---------- mu : array-like, shape=[dim] Mean parameter of the distribution. kappa : float Kappa parameter of the von Mises distribution. Optional, default: 10. n_samples : int Number of samples. Optional, default: 1. Returns ------- point : array-like, shape=[..., 3] Points sampled on the sphere in extrinsic coordinates in Euclidean space of dimension 3. """ dim = self.dim if dim == 2: angle = 2. * gs.pi * gs.random.rand(n_samples) angle = gs.to_ndarray(angle, to_ndim=2, axis=1) unit_vector = gs.hstack((gs.cos(angle), gs.sin(angle))) scalar = gs.random.rand(n_samples) coord_z = 1. + 1. / kappa * gs.log( scalar + (1. - scalar) * gs.exp(gs.array(-2. * kappa))) coord_z = gs.to_ndarray(coord_z, to_ndim=2, axis=1) coord_xy = gs.sqrt(1. - coord_z ** 2) * unit_vector sample = gs.hstack((coord_xy, coord_z)) if mu is not None: rot_vec = gs.cross( gs.array([0., 0., 1.]), mu) rot_vec *= gs.arccos(mu[-1]) / gs.linalg.norm(rot_vec) rot = SpecialOrthogonal( 3, 'vector').matrix_from_rotation_vector(rot_vec) sample = gs.matmul(sample, gs.transpose(rot)) else: if mu is None: mu = gs.array([0.] * dim + [1.]) # rejection sampling in the general case sqrt = gs.sqrt(4 * kappa ** 2. + dim ** 2) envelop_param = (-2 * kappa + sqrt) / dim node = (1. - envelop_param) / (1. + envelop_param) correction = kappa * node + dim * gs.log(1. - node ** 2) n_accepted, n_iter = 0, 0 result = [] while (n_accepted < n_samples) and (n_iter < max_iter): sym_beta = beta.rvs( dim / 2, dim / 2, size=n_samples - n_accepted) coord_z = (1 - (1 + envelop_param) * sym_beta) / ( 1 - (1 - envelop_param) * sym_beta) accept_tol = gs.random.rand(n_samples - n_accepted) criterion = ( kappa * coord_z + dim * gs.log(1 - node * coord_z) - correction) > gs.log(accept_tol) result.append(coord_z[criterion]) n_accepted += gs.sum(criterion) n_iter += 1 if n_accepted < n_samples: logging.warning( 'Maximum number of iteration reached in rejection ' 'sampling before n_samples were accepted.') coord_z = gs.concatenate(result) coord_rest = self.random_uniform(n_accepted) coord_rest = self.to_tangent(coord_rest, mu) coord_rest = self.projection(coord_rest) coord_rest = gs.einsum( '...,...i->...i', gs.sqrt(1 - coord_z ** 2), coord_rest) sample = coord_rest + coord_z[:, None] * mu[None, :] return sample if n_samples > 1 else sample[0]
def log(self, point, base_point): """ Riemannian logarithm of a point wrt a base point. Parameters ---------- point : array-like, shape=[n_samples, dimension + 1] or shape=[1, dimension + 1] base_point : array-like, shape=[n_samples, dimension + 1] or shape=[1, dimension + 1] Returns ------- log : array-like, shape=[n_samples, dimension + 1] or shape=[1, dimension + 1] """ point = gs.to_ndarray(point, to_ndim=2) base_point = gs.to_ndarray(base_point, to_ndim=2) norm_base_point = self.embedding_metric.norm(base_point) norm_point = self.embedding_metric.norm(point) inner_prod = self.embedding_metric.inner_product(base_point, point) cos_angle = inner_prod / (norm_base_point * norm_point) cos_angle = gs.clip(cos_angle, -1., 1.) angle = gs.arccos(cos_angle) angle = gs.to_ndarray(angle, to_ndim=1) angle = gs.to_ndarray(angle, to_ndim=2, axis=1) mask_0 = gs.isclose(angle, 0.) mask_else = gs.equal(mask_0, gs.array(False)) mask_0_float = gs.cast(mask_0, gs.float32) mask_else_float = gs.cast(mask_else, gs.float32) coef_1 = gs.zeros_like(angle) coef_2 = gs.zeros_like(angle) coef_1 += mask_0_float * (1. + INV_SIN_TAYLOR_COEFFS[1] * angle**2 + INV_SIN_TAYLOR_COEFFS[3] * angle**4 + INV_SIN_TAYLOR_COEFFS[5] * angle**6 + INV_SIN_TAYLOR_COEFFS[7] * angle**8) coef_2 += mask_0_float * (1. + INV_TAN_TAYLOR_COEFFS[1] * angle**2 + INV_TAN_TAYLOR_COEFFS[3] * angle**4 + INV_TAN_TAYLOR_COEFFS[5] * angle**6 + INV_TAN_TAYLOR_COEFFS[7] * angle**8) # This avoids division by 0. angle += mask_0_float * 1. coef_1 += mask_else_float * angle / gs.sin(angle) coef_2 += mask_else_float * angle / gs.tan(angle) log = (gs.einsum('ni,nj->nj', coef_1, point) - gs.einsum('ni,nj->nj', coef_2, base_point)) mask_same_values = gs.isclose(point, base_point) mask_else = gs.equal(mask_same_values, gs.array(False)) mask_else_float = gs.cast(mask_else, gs.float32) mask_else_float = gs.to_ndarray(mask_else_float, to_ndim=1) mask_else_float = gs.to_ndarray(mask_else_float, to_ndim=2) mask_not_same_points = gs.sum(mask_else_float, axis=1) mask_same_points = gs.isclose(mask_not_same_points, 0.) mask_same_points = gs.cast(mask_same_points, gs.float32) mask_same_points = gs.to_ndarray(mask_same_points, to_ndim=2, axis=1) mask_same_points_float = gs.cast(mask_same_points, gs.float32) log -= mask_same_points_float * log return log
def integrability_tensor_derivative( self, horizontal_vec_x, horizontal_vec_y, nabla_x_y, tangent_vec_e, nabla_x_e, base_point, ): r"""Compute the covariant derivative of the integrability tensor A. The horizontal covariant derivative :math:`\nabla_X (A_Y E)` is necessary to compute the covariant derivative of the curvature in a submersion. The components :math:`\nabla_X (A_Y E)` and :math:`A_Y E` are computed here for the Kendall shape space at base-point :math:`P = base\_point` for horizontal vector fields fields :math: `X, Y` extending the values :math:`X|_P = horizontal\_vec\_x`, :math:`Y|_P = horizontal\_vec\_y` and a general vector field :math:`E` extending :math:`E|_P = tangent\_vec\_e` in a neighborhood of the base-point P with covariant derivatives :math:`\nabla_X Y |_P = nabla_x_y` and :math:`\nabla_X E |_P = nabla_x_e`. Parameters ---------- horizontal_vec_x : array-like, shape=[..., k_landmarks, m_ambient] Horizontal tangent vector at `base_point`. horizontal_vec_y : array-like, shape=[..., k_landmarks, m_ambient] Horizontal tangent vector at `base_point`. nabla_x_y : array-like, shape=[..., k_landmarks, m_ambient] Tangent vector at `base_point`. tangent_vec_e : array-like, shape=[..., k_landmarks, m_ambient] Tangent vector at `base_point`. nabla_x_e : array-like, shape=[..., k_landmarks, m_ambient] Tangent vector at `base_point`. base_point : array-like, shape=[..., k_landmarks, m_ambient] Point of the total space. Returns ------- nabla_x_a_y_e : array-like, shape=[..., k_landmarks, m_ambient] Tangent vector at `base_point`, result of :math:`\nabla_X^S (A_Y E)`. a_y_e : array-like, shape=[..., k_landmarks, m_ambient] Tangent vector at `base_point`, result of :math:`A_Y E`. References ---------- .. [Pennec] Pennec, Xavier. Computing the curvature and its gradient in Kendall shape spaces. Unpublished. """ if not gs.all(self.belongs(base_point)): raise ValueError("The base_point does not belong to the pre-shape" " space") if not gs.all(self.is_horizontal(horizontal_vec_x, base_point)): raise ValueError("Tangent vector x is not horizontal") if not gs.all(self.is_horizontal(horizontal_vec_y, base_point)): raise ValueError("Tangent vector y is not horizontal") if not gs.all(self.is_tangent(nabla_x_y, base_point)): raise ValueError("Vector nabla_x_y is not tangent") a_x_y = self.integrability_tensor(horizontal_vec_x, horizontal_vec_y, base_point) if not gs.all(self.is_horizontal(nabla_x_y - a_x_y, base_point)): raise ValueError("Tangent vector nabla_x_y is not the gradient " "of a horizontal distrinbution") if not gs.all(self.is_tangent(tangent_vec_e, base_point)): raise ValueError("Tangent vector e is not tangent") if not gs.all(self.is_tangent(nabla_x_e, base_point)): raise ValueError("Vector nabla_x_e is not tangent") p_top = Matrices.transpose(base_point) p_top_p = gs.matmul(p_top, base_point) e_top = Matrices.transpose(tangent_vec_e) x_top = Matrices.transpose(horizontal_vec_x) y_top = Matrices.transpose(horizontal_vec_y) def sylv_p(mat_b): """Solves Sylvester equation for vertical component.""" return gs.linalg.solve_sylvester(p_top_p, p_top_p, mat_b - Matrices.transpose(mat_b)) omega_ep = sylv_p(gs.matmul(p_top, tangent_vec_e)) omega_ye = sylv_p(gs.matmul(e_top, horizontal_vec_y)) tangent_vec_b = gs.matmul(horizontal_vec_x, omega_ye) tangent_vec_e_sym = tangent_vec_e - 2.0 * gs.matmul( base_point, omega_ep) a_y_e = gs.matmul(base_point, omega_ye) + gs.matmul( horizontal_vec_y, omega_ep) tmp_tangent_vec_p = (gs.matmul(e_top, nabla_x_y) - gs.matmul(y_top, nabla_x_e) - 2.0 * gs.matmul(p_top, tangent_vec_b)) tmp_tangent_vec_y = gs.matmul(p_top, nabla_x_e) + gs.matmul( x_top, tangent_vec_e_sym) scal_x_a_y_e = self.ambient_metric.inner_product( horizontal_vec_x, a_y_e, base_point) nabla_x_a_y_e = ( gs.matmul(base_point, sylv_p(tmp_tangent_vec_p)) + gs.matmul(horizontal_vec_y, sylv_p(tmp_tangent_vec_y)) + gs.matmul(nabla_x_y, omega_ep) + tangent_vec_b + gs.einsum("...,...ij->...ij", scal_x_a_y_e, base_point)) return nabla_x_a_y_e, a_y_e
def exp(self, tangent_vec, base_point): """Compute the Riemannian exponential of a tangent vector. Parameters ---------- tangent_vec : array-like, shape=[..., n, p] Tangent vector at a base point. base_point : array-like, shape=[..., n, p] Point in the Stiefel manifold. Returns ------- exp : array-like, shape=[..., n, p] Point in the Stiefel manifold equal to the Riemannian exponential of tangent_vec at the base point. """ n_tangent_vecs, _, _ = tangent_vec.shape n_base_points, _, p = base_point.shape if not (n_tangent_vecs == n_base_points or n_tangent_vecs == 1 or n_base_points == 1): raise NotImplementedError if n_tangent_vecs == 1: tangent_vec = gs.tile(tangent_vec, (n_base_points, 1, 1)) if n_base_points == 1: base_point = gs.tile(base_point, (n_tangent_vecs, 1, 1)) matrix_a = gs.einsum( 'nij, njk->nik', gs.transpose(base_point, axes=(0, 2, 1)), tangent_vec) matrix_k = (tangent_vec - gs.einsum('nij,njk->nik', base_point, matrix_a)) matrix_q, matrix_r = gs.linalg.qr(matrix_k) matrix_ar = gs.concatenate( [matrix_a, -gs.transpose(matrix_r, axes=(0, 2, 1))], axis=2) zeros = gs.zeros( (gs.maximum(n_base_points, n_tangent_vecs), p, p)) matrix_rz = gs.concatenate( [matrix_r, zeros], axis=2) block = gs.concatenate([matrix_ar, matrix_rz], axis=1) matrix_mn_e = gs.linalg.expm(block) exp = gs.einsum( 'nij,njk->nik', gs.concatenate( [base_point, matrix_q], axis=2), matrix_mn_e[:, :, 0:p]) return exp
def jacobian_christoffels(self, base_point): """Compute the Jacobian of the Christoffel symbols. Compute the Jacobian of the Christoffel symbols of the Fisher information metric. Parameters ---------- base_point : array-like, shape=[..., dim] Base point. Returns ------- jac : array-like, shape=[..., dim, dim, dim, dim] Jacobian of the Christoffel symbols. :math: 'jac[..., i, j, k, l] = dGamma^i_{jk} / dx_l' """ n_dim = base_point.ndim param = gs.transpose(base_point) sum_param = gs.sum(param, 0) term_1 = 1 / gs.polygamma(1, param) term_2 = 1 / gs.polygamma(1, sum_param) term_3 = - gs.polygamma(2, param) / gs.polygamma(1, param)**2 term_4 = - gs.polygamma(2, sum_param) / gs.polygamma(1, sum_param)**2 term_5 = term_3 / term_1 term_6 = term_4 / term_2 term_7 = (gs.polygamma(2, param)**2 - gs.polygamma(1, param) * gs.polygamma(3, param)) / gs.polygamma(1, param)**2 term_8 = (gs.polygamma(2, sum_param)**2 - gs.polygamma(1, sum_param) * gs.polygamma(3, sum_param)) / gs.polygamma(1, sum_param)**2 term_9 = term_2 - gs.sum(term_1, 0) jac_1 = term_1 * term_8 / term_9 jac_1_mat = gs.squeeze( gs.tile(jac_1, (self.dim, self.dim, self.dim, 1, 1))) jac_2 = - term_6 / term_9**2 * gs.einsum( 'j...,i...->ji...', term_4 - term_3, term_1) jac_2_mat = gs.squeeze( gs.tile(jac_2, (self.dim, self.dim, 1, 1, 1))) jac_3 = term_3 * term_6 / term_9 jac_3_mat = gs.transpose( from_vector_to_diagonal_matrix(gs.transpose(jac_3))) jac_3_mat = gs.squeeze( gs.tile(jac_3_mat, (self.dim, self.dim, 1, 1, 1))) jac_4 = 1 / term_9**2 * gs.einsum( 'k...,j...,i...->kji...', term_5, term_4 - term_3, term_1) jac_4_mat = gs.transpose( from_vector_to_diagonal_matrix(gs.transpose(jac_4))) jac_5 = - gs.einsum('j...,i...->ji...', term_7, term_1) / term_9 jac_5_mat = from_vector_to_diagonal_matrix( gs.transpose(jac_5)) jac_5_mat = gs.transpose(from_vector_to_diagonal_matrix( jac_5_mat)) jac_6 = - gs.einsum('k...,j...->kj...', term_5, term_3) / term_9 jac_6_mat = gs.transpose(from_vector_to_diagonal_matrix( gs.transpose(jac_6))) jac_6_mat = gs.transpose(from_vector_to_diagonal_matrix( gs.transpose(jac_6_mat, [0, 1, 3, 2])), [0, 1, 3, 4, 2]) \ if n_dim > 1 else from_vector_to_diagonal_matrix( jac_6_mat) jac_7 = - from_vector_to_diagonal_matrix(gs.transpose(term_7)) jac_7_mat = from_vector_to_diagonal_matrix(jac_7) jac_7_mat = gs.transpose( from_vector_to_diagonal_matrix(jac_7_mat)) jac = 1 / 2 * ( jac_1_mat + jac_2_mat + jac_3_mat + jac_4_mat + jac_5_mat + jac_6_mat + jac_7_mat) return gs.transpose(jac, [3, 1, 0, 2]) if n_dim == 1 else \ gs.transpose(jac, [4, 3, 1, 0, 2])
def func(tangent_vec_a, tangent_vec_b): result = gs.einsum( '...i,...i->...i', tangent_vec_a, tangent_vec_b) result = helper.to_vector(result) return result