def metric_matrix(self, base_point=None): """Compute the inner-product matrix. Compute the inner-product matrix of the Fisher information metric at the tangent space at base point. Parameters ---------- base_point : array-like, shape=[..., dim] Base point. Returns ------- mat : array-like, shape=[..., dim, dim] Inner-product matrix. """ if base_point is None: raise ValueError( "A base point must be given to compute the " "metric matrix" ) base_point = gs.to_ndarray(base_point, to_ndim=2) n_points = base_point.shape[0] mat_ones = gs.ones((n_points, self.dim, self.dim)) poly_sum = gs.polygamma(1, gs.sum(base_point, -1)) mat_diag = from_vector_to_diagonal_matrix(gs.polygamma(1, base_point)) mat = mat_diag - gs.einsum("i,ijk->ijk", poly_sum, mat_ones) return gs.squeeze(mat)
def metric_matrix(self, base_point=None): """Compute inner-product matrix at the tangent space at base point. Parameters ---------- base_point : array-like, shape=[..., 2] Base point. Returns ------- mat : array-like, shape=[..., 2, 2] Inner-product matrix. """ if base_point is None: raise ValueError('A base point must be given to compute the ' 'metric matrix') param_a = base_point[..., 0] param_b = base_point[..., 1] polygamma_ab = gs.polygamma(1, param_a + param_b) polygamma_a = gs.polygamma(1, param_a) polygamma_b = gs.polygamma(1, param_b) vector = gs.stack( [polygamma_a - polygamma_ab, - polygamma_ab, polygamma_b - polygamma_ab], axis=-1) return SymmetricMatrices.from_vector(vector)
def coefficients(ind_k): """Christoffel symbols for contravariant index ind_k.""" param_k = base_point[..., ind_k] param_sum = gs.sum(base_point, -1) c1 = ( 1 / gs.polygamma(1, param_k) / ( 1 / gs.polygamma(1, param_sum) - gs.sum(1 / gs.polygamma(1, base_point), -1) ) ) c2 = -c1 * gs.polygamma(2, param_sum) / gs.polygamma(1, param_sum) mat_ones = gs.ones((n_points, self.dim, self.dim)) mat_diag = from_vector_to_diagonal_matrix( -gs.polygamma(2, base_point) / gs.polygamma(1, base_point) ) arrays = [ gs.zeros((1, ind_k)), gs.ones((1, 1)), gs.zeros((1, self.dim - ind_k - 1)), ] vec_k = gs.tile(gs.hstack(arrays), (n_points, 1)) val_k = gs.polygamma(2, param_k) / gs.polygamma(1, param_k) vec_k = gs.einsum("i,ij->ij", val_k, vec_k) mat_k = from_vector_to_diagonal_matrix(vec_k) mat = ( gs.einsum("i,ijk->ijk", c2, mat_ones) - gs.einsum("i,ijk->ijk", c1, mat_diag) + mat_k ) return 1 / 2 * mat
def coefficients(ind_k): param_k = base_point[..., ind_k] param_sum = gs.sum(base_point, -1) c1 = 1 / gs.polygamma( 1, param_k) / (1 / gs.polygamma(1, param_sum) - gs.sum(1 / gs.polygamma(1, base_point), -1)) c2 = -c1 * gs.polygamma(2, param_sum) / gs.polygamma(1, param_sum) mat_ones = gs.ones((n_points, self.dim, self.dim)) mat_diag = from_vector_to_diagonal_matrix( -gs.polygamma(2, base_point) / gs.polygamma(1, base_point)) arrays = [ gs.zeros((1, ind_k)), gs.ones((1, 1)), gs.zeros((1, self.dim - ind_k - 1)) ] vec_k = gs.tile(gs.hstack(arrays), (n_points, 1)) val_k = gs.polygamma(2, param_k) / gs.polygamma(1, param_k) vec_k = gs.einsum('i,ij->ij', val_k, vec_k) mat_k = from_vector_to_diagonal_matrix(vec_k) mat = gs.einsum('i,ijk->ijk', c2, mat_ones)\ - gs.einsum('i,ijk->ijk', c1, mat_diag) + mat_k return 1 / 2 * mat
def coefficients(param_a, param_b): metric_det = 2 * self.metric_det(param_a, param_b) poly_2_ab = gs.polygamma(2, param_a + param_b) poly_1_ab = gs.polygamma(1, param_a + param_b) poly_1_b = gs.polygamma(1, param_b) c1 = (gs.polygamma(2, param_a) * (poly_1_b - poly_1_ab) - poly_1_b * poly_2_ab) / metric_det c2 = - poly_1_b * poly_2_ab / metric_det c3 = (gs.polygamma(2, param_b) * poly_1_ab - poly_1_b * poly_2_ab) / metric_det return c1, c2, c3
def test_metric_matrix_dim_2(self, point): param_a = point[..., 0] param_b = point[..., 1] vector = gs.stack( [ gs.polygamma(1, param_a) - gs.polygamma(1, param_a + param_b), -gs.polygamma(1, param_a + param_b), gs.polygamma(1, param_b) - gs.polygamma(1, param_a + param_b), ], axis=-1, ) expected = SymmetricMatrices.from_vector(vector) return self.assertAllClose( self.metric(2).metric_matrix(point), expected)
def metric_matrix(self, base_point=None): """Compute the inner-product matrix. Compute the inner-product matrix of the Fisher information metric at the tangent space at base point. Parameters ---------- base_point : array-like, shape=[..., 2] Base point. Returns ------- mat : array-like, shape=[..., 2, 2] Inner-product matrix. """ if base_point is None: raise ValueError("A base point must be given to compute the " "metric matrix") base_point = gs.to_ndarray(base_point, to_ndim=2) kappa, gamma = base_point[:, 0], base_point[:, 1] mat_diag = gs.transpose( gs.array([gs.polygamma(1, kappa) - 1 / kappa, kappa / gamma**2])) mat = from_vector_to_diagonal_matrix(mat_diag) return gs.squeeze(mat)
def metric_det(param_a, param_b): """Compute the determinant of the metric. Parameters ---------- param_a : array-like, shape=[...,] param_b : array-like, shape=[...,] Returns ------- metric_det : array-like, shape=[...,] """ metric_det = gs.polygamma(1, param_a) * gs.polygamma(1, param_b) - \ gs.polygamma(1, param_a + param_b) * (gs.polygamma(1, param_a) + gs.polygamma(1, param_b)) return metric_det
def cost_jacobian(param): """Compute the jacobian of the cost function at polynomial curve. Parameters ---------- param : array-like, shape=(degree - 1, dim) Parameters of the curve coordinates' polynomial functions of time. Returns ------- jac : array-like, shape=(dim * (degree - 1),) Jacobian of the cost function at polynomial curve. """ last_coef = end_point - initial_point - gs.sum(param, axis=0) coef = gs.vstack((initial_point, param, last_coef)) t = gs.linspace(0.0, 1.0, n_times) t_position = [t**i for i in range(degree + 1)] t_position = gs.stack(t_position) position = gs.einsum("ij,ik->kj", coef, t_position) t_velocity = [i * t**(i - 1) for i in range(1, degree + 1)] t_velocity = gs.stack(t_velocity) velocity = gs.einsum("ij,ik->kj", coef[1:], t_velocity) kappa, gamma = position[:, 0], position[:, 1] kappa_dot, gamma_dot = velocity[:, 0], velocity[:, 1] jac_kappa_0 = ( (gs.polygamma(2, kappa) + 1 / kappa**2) * kappa_dot + gamma_dot**2 / gamma) * t_position[1:-1] jac_kappa_1 = (2 * gs.polygamma(1, kappa) * kappa_dot) * t_velocity[:-1] jac_kappa = jac_kappa_0 + jac_kappa_1 jac_gamma_0 = (-kappa * gamma_dot**2 / gamma**2) * t_position[1:-1] jac_gamma_1 = (2 * kappa * gamma_dot / gamma) * t_velocity[:-1] jac_gamma = jac_gamma_0 + jac_gamma_1 jac = gs.vstack([jac_kappa, jac_gamma]) cost_jac = gs.sum(jac, axis=1) return cost_jac
def cost_jacobian(param): """Compute the jacobian of the cost function at polynomial curve. Parameters ---------- param : array-like, shape=(degree - 1, dim) Parameters of the curve coordinates' polynomial functions of time. Returns ------- jac : array-like, shape=(dim * (degree - 1),) Jacobian of the cost function at polynomial curve. """ last_coef = end_point - initial_point - gs.sum(param, 0) coef = gs.vstack((initial_point, param, last_coef)) t = gs.linspace(0.0, 1.0, n_times) t_position = [t**i for i in range(degree + 1)] t_position = gs.stack(t_position) position = gs.einsum("ij,ik->kj", coef, t_position) t_velocity = [i * t ** (i - 1) for i in range(1, degree + 1)] t_velocity = gs.stack(t_velocity) velocity = gs.einsum("ij,ik->kj", coef[1:], t_velocity) fac1 = gs.stack( [ k * t ** (k - 1) - degree * t ** (degree - 1) for k in range(1, degree) ] ) fac2 = gs.stack([t**k - t**degree for k in range(1, degree)]) fac3 = (velocity * gs.polygamma(1, position)).T - gs.sum( velocity, 1 ) * gs.polygamma(1, gs.sum(position, 1)) fac4 = (velocity**2 * gs.polygamma(2, position)).T - gs.sum( velocity, 1 ) ** 2 * gs.polygamma(2, gs.sum(position, 1)) cost_jac = ( 2 * gs.einsum("ij,kj->ik", fac1, fac3) + gs.einsum("ij,kj->ik", fac2, fac4) ) / n_times return cost_jac.T.reshape(dim * (degree - 1))
def metric_det(param_a, param_b): """Compute the determinant of the metric. Parameters ---------- param_a : array-like, shape=[...,] First parameter of the beta distribution. param_b : array-like, shape=[...,] Second parameter of the beta distribution. Returns ------- metric_det : array-like, shape=[...,] Determinant of the metric. """ metric_det = gs.polygamma(1, param_a) * gs.polygamma(1, param_b) - \ gs.polygamma(1, param_a + param_b) * (gs.polygamma(1, param_a) + gs.polygamma(1, param_b)) return metric_det
def test_metric_matrix_dim2(self): """Test metric matrix in dimension 2. Check the metric matrix in dimension 2. """ dirichlet2 = DirichletDistributions(2) points = dirichlet2.random_point(self.n_points) result = dirichlet2.metric.metric_matrix(points) param_a = points[:, 0] param_b = points[:, 1] polygamma_ab = gs.polygamma(1, param_a + param_b) polygamma_a = gs.polygamma(1, param_a) polygamma_b = gs.polygamma(1, param_b) vector = gs.stack( [polygamma_a - polygamma_ab, -polygamma_ab, polygamma_b - polygamma_ab], axis=-1, ) expected = SymmetricMatrices.from_vector(vector) self.assertAllClose(result, expected)
def inner_product_matrix(self, base_point): """Compute inner-product matrix at the tangent space at base point. Parameters ---------- base_point : array-like, shape=[..., 2] Base point. Returns ------- mat : array-like, shape=[..., 2, 2] Inner-product matrix. """ param_a = base_point[..., 0] param_b = base_point[..., 1] polygamma_ab = gs.polygamma(1, param_a + param_b) polygamma_a = gs.polygamma(1, param_a) polygamma_b = gs.polygamma(1, param_b) vector = gs.stack( [polygamma_a - polygamma_ab, - polygamma_ab, polygamma_b - polygamma_ab], axis=-1) return SymmetricMatrices.from_vector(vector)
def inner_product_matrix(self, base_point=None): """Compute inner product matrix at the tangent space at base point. Parameters ---------- base_point : array-like, shape=[..., 2] Returns ------- base_point : array-like, shape=[..., 2, 2] """ if base_point is None: raise ValueError('The metric depends on the base point.') param_a = base_point[..., 0] param_b = base_point[..., 1] polygamma_ab = gs.polygamma(1, param_a + param_b) polygamma_a = gs.polygamma(1, param_a) polygamma_b = gs.polygamma(1, param_b) vector = gs.stack([ polygamma_a - polygamma_ab, -polygamma_ab, polygamma_b - polygamma_ab ], axis=-1) return SymmetricMatrices.from_vector(vector)
def coefficients(param_a, param_b): poly1a = gs.polygamma(1, param_a) poly2a = gs.polygamma(2, param_a) poly1b = gs.polygamma(1, param_b) poly2b = gs.polygamma(2, param_b) poly1ab = gs.polygamma(1, param_a + param_b) poly2ab = gs.polygamma(2, param_a + param_b) metric_det = 2 * (poly1a * poly1b - poly1ab * (poly1a + poly1b)) c1 = (poly2a * (poly1b - poly1ab) - poly1b * poly2ab) / metric_det c2 = -poly1b * poly2ab / metric_det c3 = (poly2b * poly1ab - poly1b * poly2ab) / metric_det return c1, c2, c3
def coefficients(param_a, param_b): """Christoffel coefficients for the beta distributions.""" poly1a = gs.polygamma(1, param_a) poly2a = gs.polygamma(2, param_a) poly1b = gs.polygamma(1, param_b) poly2b = gs.polygamma(2, param_b) poly1ab = gs.polygamma(1, param_a + param_b) poly2ab = gs.polygamma(2, param_a + param_b) metric_det = 2 * (poly1a * poly1b - poly1ab * (poly1a + poly1b)) c1 = (poly2a * (poly1b - poly1ab) - poly1b * poly2ab) / metric_det c2 = -poly1b * poly2ab / metric_det c3 = (poly2b * poly1ab - poly1b * poly2ab) / metric_det return c1, c2, c3
def jacobian_christoffels(self, base_point): """Compute the Jacobian of the Christoffel symbols. Compute the Jacobian of the Christoffel symbols of the Fisher information metric. Parameters ---------- base_point : array-like, shape=[..., dim] Base point. Returns ------- jac : array-like, shape=[..., dim, dim, dim, dim] Jacobian of the Christoffel symbols. :math: 'jac[..., i, j, k, l] = dGamma^i_{jk} / dx_l' """ n_dim = base_point.ndim param = gs.transpose(base_point) sum_param = gs.sum(param, 0) term_1 = 1 / gs.polygamma(1, param) term_2 = 1 / gs.polygamma(1, sum_param) term_3 = -gs.polygamma(2, param) / gs.polygamma(1, param) ** 2 term_4 = -gs.polygamma(2, sum_param) / gs.polygamma(1, sum_param) ** 2 term_5 = term_3 / term_1 term_6 = term_4 / term_2 term_7 = ( gs.polygamma(2, param) ** 2 - gs.polygamma(1, param) * gs.polygamma(3, param) ) / gs.polygamma(1, param) ** 2 term_8 = ( gs.polygamma(2, sum_param) ** 2 - gs.polygamma(1, sum_param) * gs.polygamma(3, sum_param) ) / gs.polygamma(1, sum_param) ** 2 term_9 = term_2 - gs.sum(term_1, 0) jac_1 = term_1 * term_8 / term_9 jac_1_mat = gs.squeeze(gs.tile(jac_1, (self.dim, self.dim, self.dim, 1, 1))) jac_2 = ( -term_6 / term_9**2 * gs.einsum("j...,i...->ji...", term_4 - term_3, term_1) ) jac_2_mat = gs.squeeze(gs.tile(jac_2, (self.dim, self.dim, 1, 1, 1))) jac_3 = term_3 * term_6 / term_9 jac_3_mat = gs.transpose(from_vector_to_diagonal_matrix(gs.transpose(jac_3))) jac_3_mat = gs.squeeze(gs.tile(jac_3_mat, (self.dim, self.dim, 1, 1, 1))) jac_4 = ( 1 / term_9**2 * gs.einsum("k...,j...,i...->kji...", term_5, term_4 - term_3, term_1) ) jac_4_mat = gs.transpose(from_vector_to_diagonal_matrix(gs.transpose(jac_4))) jac_5 = -gs.einsum("j...,i...->ji...", term_7, term_1) / term_9 jac_5_mat = from_vector_to_diagonal_matrix(gs.transpose(jac_5)) jac_5_mat = gs.transpose(from_vector_to_diagonal_matrix(jac_5_mat)) jac_6 = -gs.einsum("k...,j...->kj...", term_5, term_3) / term_9 jac_6_mat = gs.transpose(from_vector_to_diagonal_matrix(gs.transpose(jac_6))) jac_6_mat = ( gs.transpose( from_vector_to_diagonal_matrix(gs.transpose(jac_6_mat, [0, 1, 3, 2])), [0, 1, 3, 4, 2], ) if n_dim > 1 else from_vector_to_diagonal_matrix(jac_6_mat) ) jac_7 = -from_vector_to_diagonal_matrix(gs.transpose(term_7)) jac_7_mat = from_vector_to_diagonal_matrix(jac_7) jac_7_mat = gs.transpose(from_vector_to_diagonal_matrix(jac_7_mat)) jac = ( 1 / 2 * ( jac_1_mat + jac_2_mat + jac_3_mat + jac_4_mat + jac_5_mat + jac_6_mat + jac_7_mat ) ) return ( gs.transpose(jac, [3, 1, 0, 2]) if n_dim == 1 else gs.transpose(jac, [4, 3, 1, 0, 2]) )
def jacobian_christoffels(self, base_point): """Compute the Jacobian of the Christoffel symbols. Compute the Jacobian of the Christoffel symbols of the Fisher information metric. For computation purposes, we replace the value of (gs.polygamma(1, x) - 1/x) and (gs.polygamma(2,x) + 1/x**2) by an equivalent (close bounds) when they become too difficult to compute. References ---------- ..[GQ2015] Guo, B. N., Qi, F., Zhao, J. L., & Luo, Q. M. (2015). Sharp inequalities for polygamma functions. Mathematica Slovaca, 65(1), 103-120. Parameters ---------- base_point : array-like, shape=[..., 2] Base point. Returns ------- jac : array-like, shape=[..., 2, 2, 2, 2] Jacobian of the Christoffel symbols. :math: 'jac[..., i, j, k, l] = dGamma^i_{jk} / dx_l' """ base_point = gs.to_ndarray(base_point, 2) n_points = base_point.shape[0] kappa, gamma = base_point[:, 0], base_point[:, 1] term_0 = gs.zeros((n_points)) term_1 = 1 / gamma**2 term_2 = gs.where( gs.polygamma(1, kappa) - 1 / kappa > gs.atol, kappa / (gamma**3 * (kappa * gs.polygamma(1, kappa) - 1)), kappa**2 / gamma**3, ) term_3 = -1 / (2 * kappa**2) term_4 = gs.where( gs.polygamma(1, kappa) - 1 / kappa > gs.atol, (kappa**2 * gs.polygamma(2, kappa) + 1) / (2 * gamma**2 * (kappa * gs.polygamma(1, kappa) - 1)**2), (kappa**4 * gs.polygamma(2, kappa) + kappa**2) / (2 * gamma**2), ) term_5 = gs.where( gs.polygamma(1, kappa) - 1 / kappa > gs.atol, (kappa**4 * (gs.polygamma(1, kappa) * gs.polygamma(3, kappa) - gs.polygamma(2, kappa)**2) - kappa**3 * gs.polygamma(3, kappa) - 2 * kappa**2 * gs.polygamma(2, kappa) - 2 * kappa * gs.polygamma(1, kappa) + 1) / (2 * (kappa**2 * gs.polygamma(1, kappa) - kappa)**2), 0.5 * (kappa**4 * (gs.polygamma(1, kappa) * gs.polygamma(3, kappa) - gs.polygamma(2, kappa)**2) - kappa**3 * gs.polygamma(3, kappa) - 2 * kappa**2 * gs.polygamma(2, kappa) - 2 * kappa * gs.polygamma(1, kappa) + 1), ) jac = gs.array([ [ [[term_5, term_0], [term_0, term_0]], [[term_0, term_0], [term_4, term_2]], ], [ [[term_0, term_0], [term_3, term_0]], [[term_3, term_0], [term_0, term_1]], ], ]) if n_points > 1: jac = gs.transpose(jac, [4, 0, 1, 2, 3]) return gs.squeeze(jac)
def christoffels(self, base_point): """Compute the Christoffel symbols. Compute the Christoffel symbols of the Fisher information metric. For computation purposes, we replace the value of (gs.polygamma(1, x) - 1/x) by an equivalent (close lower-bound) when it becomes too difficult to compute, as per in the second reference. References ---------- .. [AD2008] Arwini, K. A., & Dodson, C. T. (2008). Information geometry (pp. 31-54). Springer Berlin Heidelberg. .. [GQ2015] Guo, B. N., Qi, F., Zhao, J. L., & Luo, Q. M. (2015). Sharp inequalities for polygamma functions. Mathematica Slovaca, 65(1), 103-120. Parameters ---------- base_point : array-like, shape=[..., 2] Base point. Returns ------- christoffels : array-like, shape=[..., 2, 2, 2] Christoffel symbols, with the contravariant index on the first dimension. :math: 'christoffels[..., i, j, k] = Gamma^i_{jk}' """ base_point = gs.to_ndarray(base_point, to_ndim=2) kappa, gamma = base_point[:, 0], base_point[:, 1] if gs.any(kappa > 4e15): raise ValueError( "Christoffels computation overflows with values of kappa. " "All values of kappa < 4e15 work.") shape = kappa.shape c111 = gs.where( gs.polygamma(1, kappa) - 1 / kappa > gs.atol, (gs.polygamma(2, kappa) + gs.array(kappa, dtype=gs.float32)**-2) / (2 * (gs.polygamma(1, kappa) - 1 / kappa)), 0.25 * (kappa**2 * gs.polygamma(2, kappa) + 1), ) c122 = gs.where( gs.polygamma(1, kappa) - 1 / kappa > gs.atol, -1 / (2 * gamma**2 * (gs.polygamma(1, kappa) - 1 / kappa)), -(kappa**2) / (4 * gamma**2), ) c1 = gs.squeeze( from_vector_to_diagonal_matrix(gs.transpose(gs.array([c111, c122])))) c2 = gs.squeeze( gs.transpose( gs.array([[gs.zeros(shape), 1 / (2 * kappa)], [1 / (2 * kappa), -1 / gamma]]))) christoffels = gs.array([c1, c2]) if len(christoffels.shape) == 4: christoffels = gs.transpose(christoffels, [1, 0, 2, 3]) return gs.squeeze(christoffels)