示例#1
0
 def moment_match_unstable(self,
                           y,
                           cav_mean,
                           cav_cov,
                           hyp=None,
                           power=1.0,
                           cubature_func=None):
     """
     TODO: Attempt to compute full site covariance, including cross terms. However, this makes things unstable.
     """
     if cubature_func is None:
         x, w = gauss_hermite(1,
                              20)  # Gauss-Hermite sigma points and weights
     else:
         x, w = cubature_func(1)
     lZ = self.log_expected_likelihood(y, x, w, np.squeeze(cav_mean),
                                       np.squeeze(np.diag(cav_cov)), power)
     dlZ = self.dlZ_dm(y, x, w, np.squeeze(cav_mean),
                       np.squeeze(np.diag(cav_cov)), power)[:, None]
     d2lZ = jacrev(self.dlZ_dm, argnums=3)(y, x, w, np.squeeze(cav_mean),
                                           np.squeeze(np.diag(cav_cov)),
                                           power)
     # d2lZ = np.diag(np.diag(d2lZ))  # discard cross terms
     id2lZ = inv(
         ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0]))
     site_mean = cav_mean + id2lZ @ dlZ  # approx. likelihood (site) mean (see Rasmussen & Williams p75)
     site_cov = power * (-cav_cov + id2lZ
                         )  # approx. likelihood (site) variance
     return lZ, site_mean, site_cov
示例#2
0
    def moment_match(self,
                     y,
                     cav_mean,
                     cav_cov,
                     hyp=None,
                     power=1.0,
                     cubature_func=None):
        """
        """
        num_components = int(cav_mean.shape[0] / 2)
        if cubature_func is None:
            x, w = gauss_hermite(num_components,
                                 20)  # Gauss-Hermite sigma points and weights
        else:
            x, w = cubature_func(num_components)

        subband_mean, modulator_mean = cav_mean[:num_components], self.link_fn(
            cav_mean[num_components:])
        subband_cov, modulator_cov = cav_cov[:num_components, :
                                             num_components], cav_cov[
                                                 num_components:,
                                                 num_components:]
        sigma_points = cholesky(modulator_cov) @ x + modulator_mean
        const = power**-0.5 * (2 * pi * hyp)**(0.5 - 0.5 * power)
        mu = (self.link_fn(sigma_points).T @ subband_mean)[:, 0]
        var = hyp / power + (self.link_fn(sigma_points).T**2
                             @ np.diag(subband_cov)[..., None])[:, 0]
        normpdf = const * (2 * pi * var)**-0.5 * np.exp(-0.5 *
                                                        (y - mu)**2 / var)
        Z = np.sum(w * normpdf)
        Zinv = 1. / (Z + 1e-8)
        lZ = np.log(Z + 1e-8)

        dZ1 = np.sum(w * self.link_fn(sigma_points) * (y - mu) / var * normpdf,
                     axis=-1)
        dZ2 = np.sum(w * (sigma_points - modulator_mean) *
                     np.diag(modulator_cov)[..., None]**-1 * normpdf,
                     axis=-1)
        dlZ = Zinv * np.block([dZ1, dZ2])

        d2Z1 = np.sum(w * self.link_fn(sigma_points)**2 *
                      (((y - mu) / var)**2 - var**-1) * normpdf,
                      axis=-1)
        d2Z2 = np.sum(w * (((sigma_points - modulator_mean) *
                            np.diag(modulator_cov)[..., None]**-1)**2 -
                           np.diag(modulator_cov)[..., None]**-1) * normpdf,
                      axis=-1)
        d2lZ = np.diag(-dlZ**2 + Zinv * np.block([d2Z1, d2Z2]))
        id2lZ = inv(
            ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0]))
        site_mean = cav_mean + id2lZ @ dlZ[
            ...,
            None]  # approx. likelihood (site) mean (see Rasmussen & Williams p75)
        site_cov = power * (-cav_cov + id2lZ
                            )  # approx. likelihood (site) variance
        return lZ, site_mean, site_cov
示例#3
0
 def __init__(self, site_params=None, intmethod='GH', num_cub_pts=20):
     self.site_params = site_params
     if intmethod == 'GH':
         self.cubature_func = lambda dim: gauss_hermite(dim, num_cub_pts
                                                        )  # Gauss-Hermite
     elif intmethod == 'UT3':
         self.cubature_func = lambda dim: symmetric_cubature_third_order(
             dim)  # Unscented transform (3rd order)
     elif (intmethod == 'UT5') or (intmethod == 'UT'):
         self.cubature_func = lambda dim: symmetric_cubature_fifth_order(
             dim)  # Unscented transform (5th order)
     else:
         raise NotImplementedError('integration method not recognised')
示例#4
0
    def moment_match(self,
                     y,
                     cav_mean,
                     cav_cov,
                     hyp=None,
                     power=1.0,
                     cubature_func=None):
        """
        """
        if cubature_func is None:
            x, w = gauss_hermite(1,
                                 20)  # Gauss-Hermite sigma points and weights
        else:
            x, w = cubature_func(1)
        # sigma_points = np.sqrt(2) * np.sqrt(v) * x + m  # scale locations according to cavity dist.
        sigma_points = np.sqrt(cav_cov[1, 1]) * x + cav_mean[
            1]  # fsigᵢ=xᵢ√cₙ + mₙ: scale locations according to cavity

        f2 = self.link_fn(sigma_points)**2. / power
        obs_var = f2 + cav_cov[0, 0]
        const = power**-0.5 * (2 * pi * self.link_fn(sigma_points)**2.)**(
            0.5 - 0.5 * power)
        normpdf = const * (2 * pi * obs_var)**-0.5 * np.exp(
            -0.5 * (y - cav_mean[0, 0])**2 / obs_var)
        Z = np.sum(w * normpdf)
        Zinv = 1. / np.maximum(Z, 1e-8)
        lZ = np.log(np.maximum(Z, 1e-8))

        dZ_integrand1 = (y - cav_mean[0, 0]) / obs_var * normpdf
        dlZ1 = Zinv * np.sum(w * dZ_integrand1)

        dZ_integrand2 = (sigma_points - cav_mean[1, 0]) / cav_cov[1,
                                                                  1] * normpdf
        dlZ2 = Zinv * np.sum(w * dZ_integrand2)

        d2Z_integrand1 = (-(f2 + cav_cov[0, 0])**-1 +
                          ((y - cav_mean[0, 0]) / obs_var)**2) * normpdf
        d2lZ1 = -dlZ1**2 + Zinv * np.sum(w * d2Z_integrand1)

        d2Z_integrand2 = (-cav_cov[1, 1]**-1 + (
            (sigma_points - cav_mean[1, 0]) / cav_cov[1, 1])**2) * normpdf
        d2lZ2 = -dlZ2**2 + Zinv * np.sum(w * d2Z_integrand2)

        dlZ = np.block([[dlZ1], [dlZ2]])
        d2lZ = np.block([[d2lZ1, 0], [0., d2lZ2]])
        id2lZ = inv(
            ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0]))
        site_mean = cav_mean + id2lZ @ dlZ  # approx. likelihood (site) mean (see Rasmussen & Williams p75)
        site_cov = power * (-cav_cov + id2lZ
                            )  # approx. likelihood (site) variance
        return lZ, site_mean, site_cov
示例#5
0
 def variational_expectation_cubature(self,
                                      y,
                                      post_mean,
                                      post_cov,
                                      hyp=None,
                                      cubature_func=None):
     """
     Computes the "variational expectation" via cubature, i.e. the
     expected log-likelihood, and its derivatives w.r.t. the posterior mean
         E[log p(yₙ|fₙ)] = ∫ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     with EP power a.
     :param y: observed data (yₙ) [scalar]
     :param post_mean: posterior mean (mₙ) [scalar]
     :param post_cov: posterior variance (vₙ) [scalar]
     :param hyp: likelihood hyperparameter [scalar]
     :param cubature_func: the function to compute sigma points and weights to use during cubature
     :return:
         exp_log_lik: the expected log likelihood, E[log p(yₙ|fₙ)]  [scalar]
         dE_dm: derivative of E[log p(yₙ|fₙ)] w.r.t. mₙ  [scalar]
         dE_dv: derivative of E[log p(yₙ|fₙ)] w.r.t. vₙ  [scalar]
     """
     if cubature_func is None:
         x, w = gauss_hermite(post_mean.shape[0],
                              20)  # Gauss-Hermite sigma points and weights
     else:
         x, w = cubature_func(post_mean.shape[0])
     # fsigᵢ=xᵢ√(vₙ) + mₙ: scale locations according to cavity dist.
     sigma_points = cholesky(post_cov) @ np.atleast_2d(x) + post_mean
     # pre-compute wᵢ log p(yₙ|xᵢ√(2vₙ) + mₙ)
     weighted_log_likelihood_eval = w * self.evaluate_log_likelihood(
         y, sigma_points, hyp)
     # Compute expected log likelihood via cubature:
     # E[log p(yₙ|fₙ)] = ∫ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     #                 ≈ ∑ᵢ wᵢ p(yₙ|fsigᵢ)
     exp_log_lik = np.sum(weighted_log_likelihood_eval)
     # Compute first derivative via cubature:
     # dE[log p(yₙ|fₙ)]/dmₙ = ∫ (fₙ-mₙ) vₙ⁻¹ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     #                      ≈ ∑ᵢ wᵢ (fₙ-mₙ) vₙ⁻¹ log p(yₙ|fsigᵢ)
     invv = np.diag(post_cov)[:, None]**-1
     dE_dm = np.sum(invv * (sigma_points - post_mean) *
                    weighted_log_likelihood_eval,
                    axis=-1)[:, None]
     # Compute second derivative via cubature (deriv. w.r.t. var = 0.5 * 2nd deriv. w.r.t. mean):
     # dE[log p(yₙ|fₙ)]/dvₙ = ∫ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹]/2 log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     #                        ≈ ∑ᵢ wᵢ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹]/2 log p(yₙ|fsigᵢ)
     dE_dv = np.sum(
         (0.5 * (invv**2 * (sigma_points - post_mean)**2) - 0.5 * invv) *
         weighted_log_likelihood_eval,
         axis=-1)
     dE_dv = np.diag(dE_dv)
     return exp_log_lik, dE_dm, dE_dv
示例#6
0
    def statistical_linear_regression(self,
                                      cav_mean,
                                      cav_cov,
                                      hyp=None,
                                      cubature_func=None):
        """
        This gives the same result as above - delete
        """
        num_components = int(cav_mean.shape[0] / 2)
        if cubature_func is None:
            x, w = gauss_hermite(num_components,
                                 20)  # Gauss-Hermite sigma points and weights
        else:
            x, w = cubature_func(num_components)

        subband_mean, modulator_mean = cav_mean[:num_components], self.link_fn(
            cav_mean[num_components:])
        subband_cov, modulator_cov = cav_cov[:num_components, :
                                             num_components], cav_cov[
                                                 num_components:,
                                                 num_components:]
        sigma_points = cholesky(modulator_cov) @ x + modulator_mean
        lik_expectation, lik_covariance = (
            self.link_fn(sigma_points).T @ subband_mean).T, hyp
        # Compute zₙ via cubature:
        # muₙ = ∫ E[yₙ|fₙ] 𝓝(fₙ|mₙ,vₙ) dfₙ
        #    ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ]
        mu = np.sum(w * lik_expectation, axis=-1)[:, None]
        # Compute variance S via cubature:
        # S = ∫ [(E[yₙ|fₙ]-zₙ) (E[yₙ|fₙ]-zₙ)' + Cov[yₙ|fₙ]] 𝓝(fₙ|mₙ,vₙ) dfₙ
        #   ≈ ∑ᵢ wᵢ [(E[yₙ|fsigᵢ]-zₙ) (E[yₙ|fsigᵢ]-zₙ)' + Cov[yₙ|fₙ]]
        S = np.sum(w * ((lik_expectation - mu) *
                        (lik_expectation - mu) + lik_covariance),
                   axis=-1)[:, None]
        # Compute cross covariance C via cubature:
        # C = ∫ (fₙ-mₙ) (E[yₙ|fₙ]-zₙ)' 𝓝(fₙ|mₙ,vₙ) dfₙ
        #   ≈ ∑ᵢ wᵢ (fsigᵢ -mₙ) (E[yₙ|fsigᵢ]-zₙ)'
        C = np.sum(w * np.block([[
            self.link_fn(sigma_points) * np.diag(subband_cov)[..., None]
        ], [sigma_points - modulator_mean]]) * (lik_expectation - mu),
                   axis=-1)[:, None]
        # Compute derivative of mu via cubature:
        omega = np.sum(
            w * np.block([[self.link_fn(sigma_points)],
                          [
                              np.diag(modulator_cov)[..., None]**-1 *
                              (sigma_points - modulator_mean) * lik_expectation
                          ]]),
            axis=-1)[None, :]
        return mu, S, C, omega
def rollout_utility_archive(x,
                            bounds,
                            func_policy,
                            depth_h,
                            _queries,
                            _values,
                            N_q,
                            n_sample=None,
                            decay_rate=0.9,
                            ARD_Flag=False,
                            length_scale=None):
    #print(depth_h)
    global U
    if len(x.shape) == 1:
        x = np.array([x])
    kernel = GPy.kern.RBF(len(bounds), ARD=ARD_Flag, lengthscale=length_scale)
    gp_model = fit(_queries, _values, kernel)  #todo:memo
    if depth_h == 0:
        U += ei(x, bounds, gp_model)
    else:
        U += ei(x, bounds, gp_model)
        _queries = np.concatenate([_queries, x])
        points, weights = gauss_hermite(x, gp_model, N_q)
        for i in range(N_q):
            val = np.array([[points[0][i]]])
            _values = np.concatenate([_values, val])
            kernel = GPy.kern.RBF(len(bounds),
                                  ARD=ARD_Flag,
                                  lengthscale=length_scale)
            #print("X",_queries)
            #print("Y",_values)
            _gp_model = fit(_queries, _values, kernel)  #todo:memo
            #print(i,"afterfit_afterker")
            x_next = func_policy(_gp_model, depth_h, bounds)
            U = U + weights[i] * decay_rate * rollout_utility_archive(
                x_next,
                bounds,
                func_policy,
                depth_h - 1,
                _queries,
                _values,
                N_q,
                decay_rate,
                ARD_Flag=ARD_Flag,
                length_scale=length_scale)
            _values = np.copy(_values[:-1, :])
        _queries = np.copy(_queries[:-1, :])
    _U = U
    U = 0
    return _U
示例#8
0
    def variational_expectation(self,
                                y,
                                post_mean,
                                post_cov,
                                hyp=None,
                                cubature_func=None):
        """
        """
        num_components = int(post_mean.shape[0] / 2)
        if cubature_func is None:
            x, w = gauss_hermite(num_components,
                                 20)  # Gauss-Hermite sigma points and weights
        else:
            x, w = cubature_func(num_components)

        subband_mean, modulator_mean = post_mean[:num_components], self.link_fn(
            post_mean[num_components:])
        subband_cov, modulator_cov = post_cov[:num_components, :
                                              num_components], post_cov[
                                                  num_components:,
                                                  num_components:]
        sigma_points = cholesky(modulator_cov) @ x + modulator_mean

        modulator_var = np.diag(subband_cov)[..., None]
        mu = (self.link_fn(sigma_points).T @ subband_mean)[:, 0]
        lognormpdf = -0.5 * np.log(2 * pi * hyp) - 0.5 * (y - mu)**2 / hyp
        const = -0.5 / hyp * (
            self.link_fn(sigma_points).T**2 @ modulator_var)[:, 0]
        exp_log_lik = np.sum(w * (lognormpdf + const))

        dE1 = np.sum(w * self.link_fn(sigma_points) * (y - mu) / hyp, axis=-1)
        dE2 = np.sum(w * (sigma_points - modulator_mean) * modulator_var**-1 *
                     (lognormpdf + const),
                     axis=-1)
        dE_dm = np.block([dE1, dE2])[..., None]

        d2E1 = np.sum(w * -0.5 * self.link_fn(sigma_points)**2 / hyp, axis=-1)
        d2E2 = np.sum(
            w * 0.5 *
            (((sigma_points - modulator_mean) * modulator_var**-1)**2 -
             modulator_var**-1) * (lognormpdf + const),
            axis=-1)
        dE_dv = np.diag(np.block([d2E1, d2E2]))
        return exp_log_lik, dE_dm, dE_dv
示例#9
0
 def statistical_linear_regression_cubature(self,
                                            cav_mean,
                                            cav_cov,
                                            hyp=None,
                                            cubature_func=None):
     """
     Perform statistical linear regression (SLR) using cubature.
     We aim to find a likelihood approximation p(yₙ|fₙ) ≈ 𝓝(yₙ|Afₙ+b,Ω+Var[yₙ|fₙ]).
     TODO: this currently assumes an additive noise model (ok for our current applications), make more general
     """
     if cubature_func is None:
         x, w = gauss_hermite(cav_mean.shape[0],
                              20)  # Gauss-Hermite sigma points and weights
     else:
         x, w = cubature_func(cav_mean.shape[0])
     # fsigᵢ=xᵢ√(vₙ) + mₙ: scale locations according to cavity dist.
     sigma_points = cholesky(cav_cov) @ np.atleast_2d(x) + cav_mean
     lik_expectation, lik_covariance = self.conditional_moments(
         sigma_points, hyp)
     # Compute zₙ via cubature:
     # zₙ = ∫ E[yₙ|fₙ] 𝓝(fₙ|mₙ,vₙ) dfₙ
     #    ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ]
     mu = np.sum(w * lik_expectation, axis=-1)[:, None]
     # Compute variance S via cubature:
     # S = ∫ [(E[yₙ|fₙ]-zₙ) (E[yₙ|fₙ]-zₙ)' + Cov[yₙ|fₙ]] 𝓝(fₙ|mₙ,vₙ) dfₙ
     #   ≈ ∑ᵢ wᵢ [(E[yₙ|fsigᵢ]-zₙ) (E[yₙ|fsigᵢ]-zₙ)' + Cov[yₙ|fₙ]]
     # TODO: allow for multi-dim cubature
     S = np.sum(w * ((lik_expectation - mu) *
                     (lik_expectation - mu) + lik_covariance),
                axis=-1)[:, None]
     # Compute cross covariance C via cubature:
     # C = ∫ (fₙ-mₙ) (E[yₙ|fₙ]-zₙ)' 𝓝(fₙ|mₙ,vₙ) dfₙ
     #   ≈ ∑ᵢ wᵢ (fsigᵢ -mₙ) (E[yₙ|fsigᵢ]-zₙ)'
     C = np.sum(w * (sigma_points - cav_mean) * (lik_expectation - mu),
                axis=-1)[:, None]
     # Compute derivative of z via cubature:
     # omega = ∫ E[yₙ|fₙ] vₙ⁻¹ (fₙ-mₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     #       ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ] vₙ⁻¹ (fsigᵢ-mₙ)
     omega = np.sum(w * lik_expectation *
                    (inv(cav_cov) @ (sigma_points - cav_mean)),
                    axis=-1)[None, :]
     return mu, S, C, omega
示例#10
0
 def statistical_linear_regression(self,
                                   cav_mean,
                                   cav_cov,
                                   hyp=None,
                                   cubature_func=None):
     """
     Perform statistical linear regression (SLR) using cubature.
     We aim to find a likelihood approximation p(yₙ|fₙ) ≈ 𝓝(yₙ|Afₙ+b,Ω+Var[yₙ|fₙ]).
     """
     if cubature_func is None:
         x, w = gauss_hermite(cav_mean.shape[0],
                              20)  # Gauss-Hermite sigma points and weights
     else:
         x, w = cubature_func(cav_mean.shape[0])
     m0, m1, v0, v1 = cav_mean[0, 0], cav_mean[1, 0], cav_cov[0,
                                                              0], cav_cov[1,
                                                                          1]
     # fsigᵢ=xᵢ√(vₙ) + mₙ: scale locations according to cavity dist.
     sigma_points = cholesky(cav_cov) @ x + cav_mean
     var = self.link_fn(sigma_points[1])**2
     # Compute zₙ via cubature:
     # zₙ = ∫ E[yₙ|fₙ] 𝓝(fₙ|mₙ,vₙ) dfₙ
     #    ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ]
     mu = m0.reshape(1, 1)
     # Compute variance S via cubature:
     # S = ∫ [(E[yₙ|fₙ]-zₙ) (E[yₙ|fₙ]-zₙ)' + Cov[yₙ|fₙ]] 𝓝(fₙ|mₙ,vₙ) dfₙ
     #   ≈ ∑ᵢ wᵢ [(E[yₙ|fsigᵢ]-zₙ) (E[yₙ|fsigᵢ]-zₙ)' + Cov[yₙ|fₙ]]
     S = v0 + np.sum(w * var)
     S = S.reshape(1, 1)
     # Compute cross covariance C via cubature:
     # C = ∫ (fₙ-mₙ) (E[yₙ|fₙ]-zₙ)' 𝓝(fₙ|mₙ,vₙ) dfₙ
     #   ≈ ∑ᵢ wᵢ (fsigᵢ -mₙ) (E[yₙ|fsigᵢ]-zₙ)'
     C = np.sum(w * (sigma_points - cav_mean) * (sigma_points[0] - m0),
                axis=-1).reshape(2, 1)
     # Compute derivative of z via cubature:
     # omega = ∫ E[yₙ|fₙ] vₙ⁻¹ (fₙ-mₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     #       ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ] vₙ⁻¹ (fsigᵢ-mₙ)
     omega = np.block([[1., 0.]])
     return mu, S, C, omega
示例#11
0
 def variational_expectation(self, y, m, v, hyp=None, cubature_func=None):
     """
     """
     if cubature_func is None:
         x, w = gauss_hermite(1,
                              20)  # Gauss-Hermite sigma points and weights
     else:
         x, w = cubature_func(1)
     m0, m1, v0, v1 = m[0, 0], m[1, 0], v[0, 0], v[1, 1]
     sigma_points = np.sqrt(
         v1
     ) * x + m1  # fsigᵢ=xᵢ√(2vₙ) + mₙ: scale locations according to cavity dist.
     # pre-compute wᵢ log p(yₙ|xᵢ√(2vₙ) + mₙ)
     var = self.link_fn(sigma_points)**2
     log_lik = np.log(var) + var**-1 * ((y - m0)**2 + v0)
     weighted_log_likelihood_eval = w * log_lik
     # Compute expected log likelihood via cubature:
     # E[log p(yₙ|fₙ)] = ∫ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     #                 ≈ ∑ᵢ wᵢ p(yₙ|fsigᵢ)
     exp_log_lik = -0.5 * np.log(
         2 * pi) - 0.5 * np.sum(weighted_log_likelihood_eval)
     # Compute first derivative via cubature:
     dE_dm1 = np.sum((var**-1 * (y - m0 + v0)) * w)
     # dE[log p(yₙ|fₙ)]/dmₙ = ∫ (fₙ-mₙ) vₙ⁻¹ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     #                      ≈ ∑ᵢ wᵢ (fₙ-mₙ) vₙ⁻¹ log p(yₙ|fsigᵢ)
     dE_dm2 = -0.5 * np.sum(weighted_log_likelihood_eval * v1**-1 *
                            (sigma_points - m1))
     # Compute derivative w.r.t. variance:
     dE_dv1 = -0.5 * np.sum(var**-1 * w)
     # dE[log p(yₙ|fₙ)]/dvₙ = ∫ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹]/2 log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     #                        ≈ ∑ᵢ wᵢ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹]/2 log p(yₙ|fsigᵢ)
     dE_dv2 = -0.25 * np.sum(
         (v1**-2 *
          (sigma_points - m1)**2 - v1**-1) * weighted_log_likelihood_eval)
     dE_dm = np.block([[dE_dm1], [dE_dm2]])
     dE_dv = np.block([[dE_dv1, 0], [0., dE_dv2]])
     return exp_log_lik, dE_dm, dE_dv
示例#12
0
    def moment_match_cubature(self,
                              y,
                              cav_mean,
                              cav_cov,
                              hyp=None,
                              power=1.0,
                              cubature_func=None):
        """
        TODO: N.B. THIS VERSION IS SUPERCEDED BY THE FUNCTION BELOW. HOWEVER THIS ONE MAY BE MORE STABLE.
        Perform moment matching via cubature.
        Moment matching invloves computing the log partition function, logZₙ, and its derivatives w.r.t. the cavity mean
            logZₙ = log ∫ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
        with EP power a.
        :param y: observed data (yₙ) [scalar]
        :param cav_mean: cavity mean (mₙ) [scalar]
        :param cav_cov: cavity covariance (cₙ) [scalar]
        :param hyp: likelihood hyperparameter [scalar]
        :param power: EP power / fraction (a) [scalar]
        :param cubature_func: the function to compute sigma points and weights to use during cubature
        :return:
            lZ: the log partition function, logZₙ  [scalar]
            dlZ: first derivative of logZₙ w.r.t. mₙ (if derivatives=True)  [scalar]
            d2lZ: second derivative of logZₙ w.r.t. mₙ (if derivatives=True)  [scalar]
        """
        if cubature_func is None:
            x, w = gauss_hermite(cav_mean.shape[0],
                                 20)  # Gauss-Hermite sigma points and weights
        else:
            x, w = cubature_func(cav_mean.shape[0])
        cav_cho, low = cho_factor(cav_cov)
        # fsigᵢ=xᵢ√cₙ + mₙ: scale locations according to cavity dist.
        sigma_points = cav_cho @ np.atleast_2d(x) + cav_mean
        # pre-compute wᵢ pᵃ(yₙ|xᵢ√(2vₙ) + mₙ)
        weighted_likelihood_eval = w * self.evaluate_likelihood(
            y, sigma_points, hyp)**power

        # a different approach, based on the log-likelihood, which can be more stable:
        # ll = self.evaluate_log_likelihood(y, sigma_points)
        # lmax = np.max(ll)
        # weighted_likelihood_eval = np.exp(lmax * power) * w * np.exp(power * (ll - lmax))

        # Compute partition function via cubature:
        # Zₙ = ∫ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
        #    ≈ ∑ᵢ wᵢ pᵃ(yₙ|fsigᵢ)
        Z = np.sum(weighted_likelihood_eval, axis=-1)
        lZ = np.log(Z)
        Zinv = 1.0 / Z

        # Compute derivative of partition function via cubature:
        # dZₙ/dmₙ = ∫ (fₙ-mₙ) vₙ⁻¹ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
        #         ≈ ∑ᵢ wᵢ (fₙ-mₙ) vₙ⁻¹ pᵃ(yₙ|fsigᵢ)
        covinv_f_m = cho_solve((cav_cho, low), sigma_points - cav_mean)
        dZ = np.sum(
            # (sigma_points - cav_mean) / cav_cov
            covinv_f_m * weighted_likelihood_eval,
            axis=-1)
        # dlogZₙ/dmₙ = (dZₙ/dmₙ) / Zₙ
        dlZ = Zinv * dZ

        # Compute second derivative of partition function via cubature:
        # d²Zₙ/dmₙ² = ∫ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹] pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
        #           ≈ ∑ᵢ wᵢ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹] pᵃ(yₙ|fsigᵢ)
        d2Z = np.sum(
            ((sigma_points - cav_mean)**2 / cav_cov**2 - 1.0 / cav_cov) *
            weighted_likelihood_eval)

        # d²logZₙ/dmₙ² = d[(dZₙ/dmₙ) / Zₙ]/dmₙ
        #              = (d²Zₙ/dmₙ² * Zₙ - (dZₙ/dmₙ)²) / Zₙ²
        #              = d²Zₙ/dmₙ² / Zₙ - (dlogZₙ/dmₙ)²
        d2lZ = -dlZ @ dlZ.T + Zinv * d2Z
        id2lZ = inv(
            ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0]))
        site_mean = cav_mean + id2lZ @ dlZ  # approx. likelihood (site) mean (see Rasmussen & Williams p75)
        site_cov = power * (-cav_cov + id2lZ
                            )  # approx. likelihood (site) variance
        return lZ, site_mean, site_cov
示例#13
0
    def moment_match_cubature(self,
                              y,
                              cav_mean,
                              cav_cov,
                              hyp=None,
                              power=1.0,
                              cubature_func=None):
        """
        TODO: N.B. THIS VERSION ALLOWS MULTI-DIMENSIONAL MOMENT MATCHING, BUT CAN BE UNSTABLE
        Perform moment matching via cubature.
        Moment matching invloves computing the log partition function, logZₙ, and its derivatives w.r.t. the cavity mean
            logZₙ = log ∫ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
        with EP power a.
        :param y: observed data (yₙ) [scalar]
        :param cav_mean: cavity mean (mₙ) [scalar]
        :param cav_cov: cavity covariance (cₙ) [scalar]
        :param hyp: likelihood hyperparameter [scalar]
        :param power: EP power / fraction (a) [scalar]
        :param cubature_func: the function to compute sigma points and weights to use during cubature
        :return:
            lZ: the log partition function, logZₙ  [scalar]
            dlZ: first derivative of logZₙ w.r.t. mₙ (if derivatives=True)  [scalar]
            d2lZ: second derivative of logZₙ w.r.t. mₙ (if derivatives=True)  [scalar]
        """
        if cubature_func is None:
            x, w = gauss_hermite(cav_mean.shape[0],
                                 20)  # Gauss-Hermite sigma points and weights
        else:
            x, w = cubature_func(cav_mean.shape[0])
        cav_cho, low = cho_factor(cav_cov)
        # fsigᵢ=xᵢ√cₙ + mₙ: scale locations according to cavity dist.
        sigma_points = cav_cho @ np.atleast_2d(x) + cav_mean
        # pre-compute wᵢ pᵃ(yₙ|xᵢ√(2vₙ) + mₙ)
        weighted_likelihood_eval = w * self.evaluate_likelihood(
            y, sigma_points, hyp)**power

        # Compute partition function via cubature:
        # Zₙ = ∫ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
        #    ≈ ∑ᵢ wᵢ pᵃ(yₙ|fsigᵢ)
        Z = np.sum(weighted_likelihood_eval, axis=-1)
        lZ = np.log(np.maximum(Z, 1e-8))
        Zinv = 1.0 / np.maximum(Z, 1e-8)

        # Compute derivative of partition function via cubature:
        # dZₙ/dmₙ = ∫ (fₙ-mₙ) vₙ⁻¹ pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
        #         ≈ ∑ᵢ wᵢ (fₙ-mₙ) vₙ⁻¹ pᵃ(yₙ|fsigᵢ)
        d1 = vmap(gaussian_first_derivative_wrt_mean,
                  (1, None, None, 1))(sigma_points[..., None], cav_mean,
                                      cav_cov, weighted_likelihood_eval)
        dZ = np.sum(d1, axis=0)
        # dlogZₙ/dmₙ = (dZₙ/dmₙ) / Zₙ
        dlZ = Zinv * dZ

        # Compute second derivative of partition function via cubature:
        # d²Zₙ/dmₙ² = ∫ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹] pᵃ(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
        #           ≈ ∑ᵢ wᵢ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹] pᵃ(yₙ|fsigᵢ)
        d2 = vmap(gaussian_second_derivative_wrt_mean,
                  (1, None, None, 1))(sigma_points[..., None], cav_mean,
                                      cav_cov, weighted_likelihood_eval)
        d2Z = np.sum(d2, axis=0)

        # d²logZₙ/dmₙ² = d[(dZₙ/dmₙ) / Zₙ]/dmₙ
        #              = (d²Zₙ/dmₙ² * Zₙ - (dZₙ/dmₙ)²) / Zₙ²
        #              = d²Zₙ/dmₙ² / Zₙ - (dlogZₙ/dmₙ)²
        d2lZ = -dlZ @ dlZ.T + Zinv * d2Z
        id2lZ = inv(
            ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0]))
        site_mean = cav_mean + id2lZ @ dlZ  # approx. likelihood (site) mean (see Rasmussen & Williams p75)
        site_cov = power * (-cav_cov + id2lZ
                            )  # approx. likelihood (site) variance
        return lZ, site_mean, site_cov