示例#1
0
    def metric_matrix(self, base_point=None):
        """Compute the inner-product matrix.

        Compute the inner-product matrix of the Fisher information metric
        at the tangent space at base point.

        Parameters
        ----------
        base_point : array-like, shape=[..., dim]
            Base point.

        Returns
        -------
        mat : array-like, shape=[..., dim, dim]
            Inner-product matrix.
        """
        if base_point is None:
            raise ValueError(
                "A base point must be given to compute the " "metric matrix"
            )
        base_point = gs.to_ndarray(base_point, to_ndim=2)
        n_points = base_point.shape[0]

        mat_ones = gs.ones((n_points, self.dim, self.dim))
        poly_sum = gs.polygamma(1, gs.sum(base_point, -1))
        mat_diag = from_vector_to_diagonal_matrix(gs.polygamma(1, base_point))

        mat = mat_diag - gs.einsum("i,ijk->ijk", poly_sum, mat_ones)
        return gs.squeeze(mat)
示例#2
0
    def metric_matrix(self, base_point=None):
        """Compute inner-product matrix at the tangent space at base point.

        Parameters
        ----------
        base_point : array-like, shape=[..., 2]
            Base point.

        Returns
        -------
        mat : array-like, shape=[..., 2, 2]
            Inner-product matrix.
        """
        if base_point is None:
            raise ValueError('A base point must be given to compute the '
                             'metric matrix')
        param_a = base_point[..., 0]
        param_b = base_point[..., 1]
        polygamma_ab = gs.polygamma(1, param_a + param_b)
        polygamma_a = gs.polygamma(1, param_a)
        polygamma_b = gs.polygamma(1, param_b)
        vector = gs.stack(
            [polygamma_a - polygamma_ab,
             - polygamma_ab,
             polygamma_b - polygamma_ab], axis=-1)
        return SymmetricMatrices.from_vector(vector)
示例#3
0
        def coefficients(ind_k):
            """Christoffel symbols for contravariant index ind_k."""
            param_k = base_point[..., ind_k]
            param_sum = gs.sum(base_point, -1)
            c1 = (
                1
                / gs.polygamma(1, param_k)
                / (
                    1 / gs.polygamma(1, param_sum)
                    - gs.sum(1 / gs.polygamma(1, base_point), -1)
                )
            )
            c2 = -c1 * gs.polygamma(2, param_sum) / gs.polygamma(1, param_sum)

            mat_ones = gs.ones((n_points, self.dim, self.dim))
            mat_diag = from_vector_to_diagonal_matrix(
                -gs.polygamma(2, base_point) / gs.polygamma(1, base_point)
            )
            arrays = [
                gs.zeros((1, ind_k)),
                gs.ones((1, 1)),
                gs.zeros((1, self.dim - ind_k - 1)),
            ]
            vec_k = gs.tile(gs.hstack(arrays), (n_points, 1))
            val_k = gs.polygamma(2, param_k) / gs.polygamma(1, param_k)
            vec_k = gs.einsum("i,ij->ij", val_k, vec_k)
            mat_k = from_vector_to_diagonal_matrix(vec_k)

            mat = (
                gs.einsum("i,ijk->ijk", c2, mat_ones)
                - gs.einsum("i,ijk->ijk", c1, mat_diag)
                + mat_k
            )

            return 1 / 2 * mat
示例#4
0
        def coefficients(ind_k):
            param_k = base_point[..., ind_k]
            param_sum = gs.sum(base_point, -1)
            c1 = 1 / gs.polygamma(
                1, param_k) / (1 / gs.polygamma(1, param_sum) -
                               gs.sum(1 / gs.polygamma(1, base_point), -1))
            c2 = -c1 * gs.polygamma(2, param_sum) / gs.polygamma(1, param_sum)

            mat_ones = gs.ones((n_points, self.dim, self.dim))
            mat_diag = from_vector_to_diagonal_matrix(
                -gs.polygamma(2, base_point) / gs.polygamma(1, base_point))
            arrays = [
                gs.zeros((1, ind_k)),
                gs.ones((1, 1)),
                gs.zeros((1, self.dim - ind_k - 1))
            ]
            vec_k = gs.tile(gs.hstack(arrays), (n_points, 1))
            val_k = gs.polygamma(2, param_k) / gs.polygamma(1, param_k)
            vec_k = gs.einsum('i,ij->ij', val_k, vec_k)
            mat_k = from_vector_to_diagonal_matrix(vec_k)

            mat = gs.einsum('i,ijk->ijk', c2, mat_ones)\
                - gs.einsum('i,ijk->ijk', c1, mat_diag) + mat_k

            return 1 / 2 * mat
示例#5
0
 def coefficients(param_a, param_b):
     metric_det = 2 * self.metric_det(param_a, param_b)
     poly_2_ab = gs.polygamma(2, param_a + param_b)
     poly_1_ab = gs.polygamma(1, param_a + param_b)
     poly_1_b = gs.polygamma(1, param_b)
     c1 = (gs.polygamma(2, param_a) *
           (poly_1_b - poly_1_ab) - poly_1_b * poly_2_ab) / metric_det
     c2 = - poly_1_b * poly_2_ab / metric_det
     c3 = (gs.polygamma(2, param_b) * poly_1_ab - poly_1_b *
           poly_2_ab) / metric_det
     return c1, c2, c3
示例#6
0
 def test_metric_matrix_dim_2(self, point):
     param_a = point[..., 0]
     param_b = point[..., 1]
     vector = gs.stack(
         [
             gs.polygamma(1, param_a) - gs.polygamma(1, param_a + param_b),
             -gs.polygamma(1, param_a + param_b),
             gs.polygamma(1, param_b) - gs.polygamma(1, param_a + param_b),
         ],
         axis=-1,
     )
     expected = SymmetricMatrices.from_vector(vector)
     return self.assertAllClose(
         self.metric(2).metric_matrix(point), expected)
示例#7
0
    def metric_matrix(self, base_point=None):
        """Compute the inner-product matrix.

        Compute the inner-product matrix of the Fisher information metric
        at the tangent space at base point.

        Parameters
        ----------
        base_point : array-like, shape=[..., 2]
            Base point.

        Returns
        -------
        mat : array-like, shape=[..., 2, 2]
            Inner-product matrix.
        """
        if base_point is None:
            raise ValueError("A base point must be given to compute the "
                             "metric matrix")
        base_point = gs.to_ndarray(base_point, to_ndim=2)

        kappa, gamma = base_point[:, 0], base_point[:, 1]

        mat_diag = gs.transpose(
            gs.array([gs.polygamma(1, kappa) - 1 / kappa, kappa / gamma**2]))
        mat = from_vector_to_diagonal_matrix(mat_diag)
        return gs.squeeze(mat)
示例#8
0
    def metric_det(param_a, param_b):
        """Compute the determinant of the metric.

        Parameters
        ----------
        param_a : array-like, shape=[...,]
        param_b : array-like, shape=[...,]

        Returns
        -------
        metric_det : array-like, shape=[...,]
        """
        metric_det = gs.polygamma(1, param_a) * gs.polygamma(1, param_b) - \
            gs.polygamma(1, param_a + param_b) * (gs.polygamma(1, param_a) +
                                                  gs.polygamma(1, param_b))
        return metric_det
示例#9
0
        def cost_jacobian(param):
            """Compute the jacobian of the cost function at polynomial curve.

            Parameters
            ----------
            param : array-like, shape=(degree - 1, dim)
                Parameters of the curve coordinates' polynomial functions of time.

            Returns
            -------
            jac : array-like, shape=(dim * (degree - 1),)
                Jacobian of the cost function at polynomial curve.
            """
            last_coef = end_point - initial_point - gs.sum(param, axis=0)
            coef = gs.vstack((initial_point, param, last_coef))

            t = gs.linspace(0.0, 1.0, n_times)
            t_position = [t**i for i in range(degree + 1)]
            t_position = gs.stack(t_position)
            position = gs.einsum("ij,ik->kj", coef, t_position)

            t_velocity = [i * t**(i - 1) for i in range(1, degree + 1)]
            t_velocity = gs.stack(t_velocity)
            velocity = gs.einsum("ij,ik->kj", coef[1:], t_velocity)

            kappa, gamma = position[:, 0], position[:, 1]
            kappa_dot, gamma_dot = velocity[:, 0], velocity[:, 1]

            jac_kappa_0 = (
                (gs.polygamma(2, kappa) + 1 / kappa**2) * kappa_dot +
                gamma_dot**2 / gamma) * t_position[1:-1]
            jac_kappa_1 = (2 * gs.polygamma(1, kappa) *
                           kappa_dot) * t_velocity[:-1]

            jac_kappa = jac_kappa_0 + jac_kappa_1

            jac_gamma_0 = (-kappa * gamma_dot**2 / gamma**2) * t_position[1:-1]
            jac_gamma_1 = (2 * kappa * gamma_dot / gamma) * t_velocity[:-1]

            jac_gamma = jac_gamma_0 + jac_gamma_1

            jac = gs.vstack([jac_kappa, jac_gamma])

            cost_jac = gs.sum(jac, axis=1)
            return cost_jac
示例#10
0
        def cost_jacobian(param):
            """Compute the jacobian of the cost function at polynomial curve.

            Parameters
            ----------
            param : array-like, shape=(degree - 1, dim)
                Parameters of the curve coordinates' polynomial functions of time.

            Returns
            -------
            jac : array-like, shape=(dim * (degree - 1),)
                Jacobian of the cost function at polynomial curve.
            """
            last_coef = end_point - initial_point - gs.sum(param, 0)
            coef = gs.vstack((initial_point, param, last_coef))

            t = gs.linspace(0.0, 1.0, n_times)
            t_position = [t**i for i in range(degree + 1)]
            t_position = gs.stack(t_position)
            position = gs.einsum("ij,ik->kj", coef, t_position)

            t_velocity = [i * t ** (i - 1) for i in range(1, degree + 1)]
            t_velocity = gs.stack(t_velocity)
            velocity = gs.einsum("ij,ik->kj", coef[1:], t_velocity)

            fac1 = gs.stack(
                [
                    k * t ** (k - 1) - degree * t ** (degree - 1)
                    for k in range(1, degree)
                ]
            )
            fac2 = gs.stack([t**k - t**degree for k in range(1, degree)])
            fac3 = (velocity * gs.polygamma(1, position)).T - gs.sum(
                velocity, 1
            ) * gs.polygamma(1, gs.sum(position, 1))
            fac4 = (velocity**2 * gs.polygamma(2, position)).T - gs.sum(
                velocity, 1
            ) ** 2 * gs.polygamma(2, gs.sum(position, 1))

            cost_jac = (
                2 * gs.einsum("ij,kj->ik", fac1, fac3)
                + gs.einsum("ij,kj->ik", fac2, fac4)
            ) / n_times
            return cost_jac.T.reshape(dim * (degree - 1))
示例#11
0
    def metric_det(param_a, param_b):
        """Compute the determinant of the metric.

        Parameters
        ----------
        param_a : array-like, shape=[...,]
            First parameter of the beta distribution.
        param_b : array-like, shape=[...,]
            Second parameter of the beta distribution.

        Returns
        -------
        metric_det : array-like, shape=[...,]
            Determinant of the metric.
        """
        metric_det = gs.polygamma(1, param_a) * gs.polygamma(1, param_b) - \
            gs.polygamma(1, param_a + param_b) * (gs.polygamma(1, param_a) +
                                                  gs.polygamma(1, param_b))
        return metric_det
示例#12
0
    def test_metric_matrix_dim2(self):
        """Test metric matrix in dimension 2.

        Check the metric matrix in dimension 2.
        """
        dirichlet2 = DirichletDistributions(2)
        points = dirichlet2.random_point(self.n_points)
        result = dirichlet2.metric.metric_matrix(points)

        param_a = points[:, 0]
        param_b = points[:, 1]
        polygamma_ab = gs.polygamma(1, param_a + param_b)
        polygamma_a = gs.polygamma(1, param_a)
        polygamma_b = gs.polygamma(1, param_b)
        vector = gs.stack(
            [polygamma_a - polygamma_ab, -polygamma_ab, polygamma_b - polygamma_ab],
            axis=-1,
        )
        expected = SymmetricMatrices.from_vector(vector)
        self.assertAllClose(result, expected)
示例#13
0
    def inner_product_matrix(self, base_point):
        """Compute inner-product matrix at the tangent space at base point.

        Parameters
        ----------
        base_point : array-like, shape=[..., 2]
            Base point.

        Returns
        -------
        mat : array-like, shape=[..., 2, 2]
            Inner-product matrix.
        """
        param_a = base_point[..., 0]
        param_b = base_point[..., 1]
        polygamma_ab = gs.polygamma(1, param_a + param_b)
        polygamma_a = gs.polygamma(1, param_a)
        polygamma_b = gs.polygamma(1, param_b)
        vector = gs.stack(
            [polygamma_a - polygamma_ab,
             - polygamma_ab,
             polygamma_b - polygamma_ab], axis=-1)
        return SymmetricMatrices.from_vector(vector)
示例#14
0
    def inner_product_matrix(self, base_point=None):
        """Compute inner product matrix at the tangent space at base point.

        Parameters
        ----------
        base_point : array-like, shape=[..., 2]

        Returns
        -------
        base_point : array-like, shape=[..., 2, 2]
        """
        if base_point is None:
            raise ValueError('The metric depends on the base point.')
        param_a = base_point[..., 0]
        param_b = base_point[..., 1]
        polygamma_ab = gs.polygamma(1, param_a + param_b)
        polygamma_a = gs.polygamma(1, param_a)
        polygamma_b = gs.polygamma(1, param_b)
        vector = gs.stack([
            polygamma_a - polygamma_ab, -polygamma_ab,
            polygamma_b - polygamma_ab
        ],
                          axis=-1)
        return SymmetricMatrices.from_vector(vector)
        def coefficients(param_a, param_b):
            poly1a = gs.polygamma(1, param_a)
            poly2a = gs.polygamma(2, param_a)
            poly1b = gs.polygamma(1, param_b)
            poly2b = gs.polygamma(2, param_b)
            poly1ab = gs.polygamma(1, param_a + param_b)
            poly2ab = gs.polygamma(2, param_a + param_b)
            metric_det = 2 * (poly1a * poly1b - poly1ab * (poly1a + poly1b))

            c1 = (poly2a * (poly1b - poly1ab) - poly1b * poly2ab) / metric_det
            c2 = -poly1b * poly2ab / metric_det
            c3 = (poly2b * poly1ab - poly1b * poly2ab) / metric_det
            return c1, c2, c3
示例#16
0
        def coefficients(param_a, param_b):
            """Christoffel coefficients for the beta distributions."""
            poly1a = gs.polygamma(1, param_a)
            poly2a = gs.polygamma(2, param_a)
            poly1b = gs.polygamma(1, param_b)
            poly2b = gs.polygamma(2, param_b)
            poly1ab = gs.polygamma(1, param_a + param_b)
            poly2ab = gs.polygamma(2, param_a + param_b)
            metric_det = 2 * (poly1a * poly1b - poly1ab * (poly1a + poly1b))

            c1 = (poly2a * (poly1b - poly1ab) - poly1b * poly2ab) / metric_det
            c2 = -poly1b * poly2ab / metric_det
            c3 = (poly2b * poly1ab - poly1b * poly2ab) / metric_det
            return c1, c2, c3
示例#17
0
    def jacobian_christoffels(self, base_point):
        """Compute the Jacobian of the Christoffel symbols.

        Compute the Jacobian of the Christoffel symbols of the
        Fisher information metric.

        Parameters
        ----------
        base_point : array-like, shape=[..., dim]
            Base point.

        Returns
        -------
        jac : array-like, shape=[..., dim, dim, dim, dim]
            Jacobian of the Christoffel symbols.
            :math: 'jac[..., i, j, k, l] = dGamma^i_{jk} / dx_l'
        """
        n_dim = base_point.ndim
        param = gs.transpose(base_point)
        sum_param = gs.sum(param, 0)
        term_1 = 1 / gs.polygamma(1, param)
        term_2 = 1 / gs.polygamma(1, sum_param)
        term_3 = -gs.polygamma(2, param) / gs.polygamma(1, param) ** 2
        term_4 = -gs.polygamma(2, sum_param) / gs.polygamma(1, sum_param) ** 2
        term_5 = term_3 / term_1
        term_6 = term_4 / term_2
        term_7 = (
            gs.polygamma(2, param) ** 2
            - gs.polygamma(1, param) * gs.polygamma(3, param)
        ) / gs.polygamma(1, param) ** 2
        term_8 = (
            gs.polygamma(2, sum_param) ** 2
            - gs.polygamma(1, sum_param) * gs.polygamma(3, sum_param)
        ) / gs.polygamma(1, sum_param) ** 2
        term_9 = term_2 - gs.sum(term_1, 0)

        jac_1 = term_1 * term_8 / term_9
        jac_1_mat = gs.squeeze(gs.tile(jac_1, (self.dim, self.dim, self.dim, 1, 1)))
        jac_2 = (
            -term_6
            / term_9**2
            * gs.einsum("j...,i...->ji...", term_4 - term_3, term_1)
        )
        jac_2_mat = gs.squeeze(gs.tile(jac_2, (self.dim, self.dim, 1, 1, 1)))
        jac_3 = term_3 * term_6 / term_9
        jac_3_mat = gs.transpose(from_vector_to_diagonal_matrix(gs.transpose(jac_3)))
        jac_3_mat = gs.squeeze(gs.tile(jac_3_mat, (self.dim, self.dim, 1, 1, 1)))
        jac_4 = (
            1
            / term_9**2
            * gs.einsum("k...,j...,i...->kji...", term_5, term_4 - term_3, term_1)
        )
        jac_4_mat = gs.transpose(from_vector_to_diagonal_matrix(gs.transpose(jac_4)))
        jac_5 = -gs.einsum("j...,i...->ji...", term_7, term_1) / term_9
        jac_5_mat = from_vector_to_diagonal_matrix(gs.transpose(jac_5))
        jac_5_mat = gs.transpose(from_vector_to_diagonal_matrix(jac_5_mat))
        jac_6 = -gs.einsum("k...,j...->kj...", term_5, term_3) / term_9
        jac_6_mat = gs.transpose(from_vector_to_diagonal_matrix(gs.transpose(jac_6)))
        jac_6_mat = (
            gs.transpose(
                from_vector_to_diagonal_matrix(gs.transpose(jac_6_mat, [0, 1, 3, 2])),
                [0, 1, 3, 4, 2],
            )
            if n_dim > 1
            else from_vector_to_diagonal_matrix(jac_6_mat)
        )
        jac_7 = -from_vector_to_diagonal_matrix(gs.transpose(term_7))
        jac_7_mat = from_vector_to_diagonal_matrix(jac_7)
        jac_7_mat = gs.transpose(from_vector_to_diagonal_matrix(jac_7_mat))

        jac = (
            1
            / 2
            * (
                jac_1_mat
                + jac_2_mat
                + jac_3_mat
                + jac_4_mat
                + jac_5_mat
                + jac_6_mat
                + jac_7_mat
            )
        )

        return (
            gs.transpose(jac, [3, 1, 0, 2])
            if n_dim == 1
            else gs.transpose(jac, [4, 3, 1, 0, 2])
        )
示例#18
0
    def jacobian_christoffels(self, base_point):
        """Compute the Jacobian of the Christoffel symbols.

        Compute the Jacobian of the Christoffel symbols of the
        Fisher information metric.

        For computation purposes, we replace the value of
        (gs.polygamma(1, x) - 1/x) and (gs.polygamma(2,x) + 1/x**2) by an equivalent
        (close bounds) when they become too difficult to compute.

        References
        ----------
        ..[GQ2015] Guo, B. N., Qi, F., Zhao, J. L., & Luo, Q. M. (2015).
            Sharp inequalities for polygamma functions.
            Mathematica Slovaca, 65(1), 103-120.

        Parameters
        ----------
        base_point : array-like, shape=[..., 2]
            Base point.

        Returns
        -------
        jac : array-like, shape=[..., 2, 2, 2, 2]
            Jacobian of the Christoffel symbols.
            :math: 'jac[..., i, j, k, l] = dGamma^i_{jk} / dx_l'
        """
        base_point = gs.to_ndarray(base_point, 2)

        n_points = base_point.shape[0]

        kappa, gamma = base_point[:, 0], base_point[:, 1]

        term_0 = gs.zeros((n_points))
        term_1 = 1 / gamma**2
        term_2 = gs.where(
            gs.polygamma(1, kappa) - 1 / kappa > gs.atol,
            kappa / (gamma**3 * (kappa * gs.polygamma(1, kappa) - 1)),
            kappa**2 / gamma**3,
        )
        term_3 = -1 / (2 * kappa**2)
        term_4 = gs.where(
            gs.polygamma(1, kappa) - 1 / kappa > gs.atol,
            (kappa**2 * gs.polygamma(2, kappa) + 1) /
            (2 * gamma**2 * (kappa * gs.polygamma(1, kappa) - 1)**2),
            (kappa**4 * gs.polygamma(2, kappa) + kappa**2) / (2 * gamma**2),
        )
        term_5 = gs.where(
            gs.polygamma(1, kappa) - 1 / kappa > gs.atol,
            (kappa**4 *
             (gs.polygamma(1, kappa) * gs.polygamma(3, kappa) -
              gs.polygamma(2, kappa)**2) - kappa**3 * gs.polygamma(3, kappa) -
             2 * kappa**2 * gs.polygamma(2, kappa) -
             2 * kappa * gs.polygamma(1, kappa) + 1) /
            (2 * (kappa**2 * gs.polygamma(1, kappa) - kappa)**2),
            0.5 *
            (kappa**4 *
             (gs.polygamma(1, kappa) * gs.polygamma(3, kappa) -
              gs.polygamma(2, kappa)**2) - kappa**3 * gs.polygamma(3, kappa) -
             2 * kappa**2 * gs.polygamma(2, kappa) -
             2 * kappa * gs.polygamma(1, kappa) + 1),
        )

        jac = gs.array([
            [
                [[term_5, term_0], [term_0, term_0]],
                [[term_0, term_0], [term_4, term_2]],
            ],
            [
                [[term_0, term_0], [term_3, term_0]],
                [[term_3, term_0], [term_0, term_1]],
            ],
        ])

        if n_points > 1:
            jac = gs.transpose(jac, [4, 0, 1, 2, 3])

        return gs.squeeze(jac)
示例#19
0
    def christoffels(self, base_point):
        """Compute the Christoffel symbols.

        Compute the Christoffel symbols of the Fisher information metric.
        For computation purposes, we replace the value of
        (gs.polygamma(1, x) - 1/x) by an equivalent (close lower-bound) when it becomes
        too difficult to compute, as per in the second reference.

        References
        ----------
        .. [AD2008] Arwini, K. A., & Dodson, C. T. (2008).
            Information geometry (pp. 31-54). Springer Berlin Heidelberg.

        .. [GQ2015] Guo, B. N., Qi, F., Zhao, J. L., & Luo, Q. M. (2015).
            Sharp inequalities for polygamma functions.
            Mathematica Slovaca, 65(1), 103-120.

        Parameters
        ----------
        base_point : array-like, shape=[..., 2]
            Base point.

        Returns
        -------
        christoffels : array-like, shape=[..., 2, 2, 2]
            Christoffel symbols, with the contravariant index on
            the first dimension.
            :math: 'christoffels[..., i, j, k] = Gamma^i_{jk}'
        """
        base_point = gs.to_ndarray(base_point, to_ndim=2)

        kappa, gamma = base_point[:, 0], base_point[:, 1]

        if gs.any(kappa > 4e15):
            raise ValueError(
                "Christoffels computation overflows with values of kappa. "
                "All values of kappa < 4e15 work.")

        shape = kappa.shape

        c111 = gs.where(
            gs.polygamma(1, kappa) - 1 / kappa > gs.atol,
            (gs.polygamma(2, kappa) + gs.array(kappa, dtype=gs.float32)**-2) /
            (2 * (gs.polygamma(1, kappa) - 1 / kappa)),
            0.25 * (kappa**2 * gs.polygamma(2, kappa) + 1),
        )

        c122 = gs.where(
            gs.polygamma(1, kappa) - 1 / kappa > gs.atol,
            -1 / (2 * gamma**2 * (gs.polygamma(1, kappa) - 1 / kappa)),
            -(kappa**2) / (4 * gamma**2),
        )

        c1 = gs.squeeze(
            from_vector_to_diagonal_matrix(gs.transpose(gs.array([c111,
                                                                  c122]))))

        c2 = gs.squeeze(
            gs.transpose(
                gs.array([[gs.zeros(shape), 1 / (2 * kappa)],
                          [1 / (2 * kappa), -1 / gamma]])))

        christoffels = gs.array([c1, c2])

        if len(christoffels.shape) == 4:
            christoffels = gs.transpose(christoffels, [1, 0, 2, 3])

        return gs.squeeze(christoffels)