示例#1
0
def get_cond_params(learned_params: dict,
                    x: Array,
                    y: Array,
                    jitter: float = 1e-5) -> dict:

    params = deepcopy(learned_params)
    n_samples = x.shape[0]

    # calculate the cholesky factorization
    Kuu = rbf_kernel(params["x_u"], params["x_u"], params["variance"],
                     params["length_scale"])
    Kuu = add_to_diagonal(Kuu, jitter)
    Luu = cholesky(Kuu, lower=True)

    Kuf = rbf_kernel(params["x_u"], x, params["variance"],
                     params["length_scale"])

    W = solve_triangular(Luu, Kuf, lower=True)
    D = np.ones(n_samples) * params["obs_noise"]

    W_Dinv = W / D
    K = W_Dinv @ W.T
    K = add_to_diagonal(K, 1.0)

    L = cholesky(K, lower=True)

    # mean function
    y_residual = y  # mean function
    y_2D = y_residual.reshape(-1, n_samples).T
    W_Dinv_y = W_Dinv @ y_2D

    return {"Luu": Luu, "W_Dinv_y": W_Dinv_y, "L": L}
示例#2
0
 def model(self, batch):
     XL, XH = batch['XL'], batch['XH']
     y = batch['y']
     NL, NH = XL.shape[0], XH.shape[0]
     D = XH.shape[1]
     # set uninformative log-normal priors for low-fidelity kernel
     var_L = sample('kernel_var_L', dist.LogNormal(0.0, 1.0), sample_shape = (1,))
     length_L = sample('kernel_length_L', dist.LogNormal(0.0, 1.0), sample_shape = (D,))
     theta_L = np.concatenate([var_L, length_L])
     # set uninformative log-normal priors for high-fidelity kernel
     var_H = sample('kernel_var_H', dist.LogNormal(0.0, 1.0), sample_shape = (1,))
     length_H = sample('kernel_length_H', dist.LogNormal(0.0, 1.0), sample_shape = (D,))
     theta_H = np.concatenate([var_H, length_H])
     # prior for rho
     rho = sample('rho', dist.Normal(0.0, 10.0), sample_shape = (1,))
     # Compute kernels
     K_LL = self.kernel(XL, XL, theta_L) + np.eye(NL)*1e-8
     K_LH = rho*self.kernel(XL, XH, theta_L)
     K_HH = rho**2 * self.kernel(XH, XH, theta_L) + \
                     self.kernel(XH, XH, theta_H) + np.eye(NH)*1e-8
     K = np.vstack((np.hstack((K_LL,K_LH)),
                    np.hstack((K_LH.T,K_HH))))
     L = cholesky(K, lower=True)
     # Generate latent function
     beta_L = sample('beta_L', dist.Normal(0.0, 1.0))
     beta_H = sample('beta_H', dist.Normal(0.0, 1.0))
     eta_L = sample('eta_L', dist.Normal(0.0, 1.0), sample_shape=(NL,))
     eta_H = sample('eta_H', dist.Normal(0.0, 1.0), sample_shape=(NH,))
     beta = np.concatenate([beta_L*np.ones(NL), beta_H*np.ones(NH)])
     eta = np.concatenate([eta_L, eta_H])
     f = np.matmul(L, eta) + beta
     # Bernoulli likelihood
     sample('y', dist.Bernoulli(logits=f), obs=y)
 def posterior_sample(self, key, sample, X_star, **kwargs):
     # Fetch training data
     batch = kwargs['batch']
     X = batch['X']
     # Fetch params
     var = sample['kernel_var']
     length = sample['kernel_length']
     beta = sample['beta']
     eta = sample['eta']
     theta = np.concatenate([var, length])
     # Compute kernels
     K_xx = self.kernel(X, X, theta) + np.eye(X.shape[0]) * 1e-8
     k_pp = self.kernel(X_star, X_star,
                        theta) + np.eye(X_star.shape[0]) * 1e-8
     k_pX = self.kernel(X_star, X, theta)
     L = cholesky(K_xx, lower=True)
     f = np.matmul(L, eta) + beta
     tmp_1 = solve_triangular(L.T, solve_triangular(L, f, lower=True))
     tmp_2 = solve_triangular(L.T, solve_triangular(L, k_pX.T, lower=True))
     # Compute predictive mean
     mu = np.matmul(k_pX, tmp_1)
     cov = k_pp - np.matmul(k_pX, tmp_2)
     std = np.sqrt(np.clip(np.diag(cov), a_min=0.))
     sample = mu + std * random.normal(key, mu.shape)
     return mu, sample
示例#4
0
def get_cond_params(
    kernel, params: dict, x: Array, y: Array, jitter: float = 1e-5
) -> dict:

    params = deepcopy(params)
    x_u = params.pop("x_u")
    obs_noise = params.pop("obs_noise")
    kernel = kernel(**params)
    n_samples = x.shape[0]

    # calculate the cholesky factorization
    Luu, W, D = vfe_precompute(x, x_u, obs_noise, kernel, jitter=jitter)

    W_Dinv = W.T / D
    K = W_Dinv @ W
    K = add_to_diagonal(K, 1.0)
    L = cholesky(K, lower=True)

    # mean function
    y_residual = y  # mean function
    y_2D = y_residual.reshape(-1, n_samples).T
    W_Dinv_y = W_Dinv @ y_2D

    return {
        "X": x,
        "y": y,
        "Luu": Luu,
        "L": L,
        "W_Dinv_y": W_Dinv_y,
        "x_u": x_u,
        "kernel_params": params,
        "obs_noise": obs_noise,
        "kernel": kernel,
    }
示例#5
0
文件: kde.py 项目: romanngg/jax
def _gaussian_kernel_eval(in_log, points, values, xi, precision):
    points, values, xi, precision = _promote_dtypes_inexact(
        points, values, xi, precision)
    d = points.shape[1]

    if xi.shape[1] != d:
        raise ValueError("points and xi must have same trailing dim")
    if precision.shape != (d, d):
        raise ValueError("precision matrix must match data dims")

    whitening = linalg.cholesky(precision, lower=True)
    points = jnp.dot(points, whitening)
    xi = jnp.dot(xi, whitening)
    log_norm = jnp.sum(jnp.log(
        jnp.diag(whitening))) - 0.5 * d * jnp.log(2 * np.pi)

    def kernel(x_test, x_train, y_train):
        arg = log_norm - 0.5 * jnp.sum(jnp.square(x_train - x_test))
        if in_log:
            return jnp.log(y_train) + arg
        else:
            return y_train * jnp.exp(arg)

    reduce = special.logsumexp if in_log else jnp.sum
    reduced_kernel = lambda x: reduce(vmap(kernel, in_axes=(None, 0, 0))
                                      (x, points, values),
                                      axis=0)
    mapped_kernel = vmap(reduced_kernel)

    return mapped_kernel(xi)
示例#6
0
    def variational_expectation(self, y, post_mean, post_cov, cubature=None):
        """
        """
        num_components = int(post_mean.shape[0] / 2)
        if cubature is None:
            x, w = gauss_hermite(num_components, 20)  # Gauss-Hermite sigma points and weights
        else:
            x, w = cubature(num_components)

        # subband_mean, modulator_mean = post_mean[:num_components], self.link_fn(post_mean[num_components:])
        subband_mean, modulator_mean = post_mean[:num_components], post_mean[num_components:]  # TODO: CHECK
        subband_cov, modulator_cov = post_cov[:num_components, :num_components], post_cov[num_components:,
                                                                                 num_components:]
        sigma_points = cholesky(modulator_cov) @ x + modulator_mean

        modulator_var = np.diag(subband_cov)[..., None]
        mu = (self.link_fn(sigma_points).T @ subband_mean)[:, 0]
        lognormpdf = -0.5 * np.log(2 * np.pi * self.variance) - 0.5 * (y - mu) ** 2 / self.variance
        const = -0.5 / self.variance * (self.link_fn(sigma_points).T ** 2 @ modulator_var)[:, 0]
        exp_log_lik = np.sum(w * (lognormpdf + const))

        dE1 = np.sum(w * self.link_fn(sigma_points) * (y - mu) / self.variance, axis=-1)
        dE2 = np.sum(w * (sigma_points - modulator_mean) * modulator_var ** -1
                     * (lognormpdf + const), axis=-1)
        dE_dm = np.block([dE1, dE2])[..., None]

        d2E1 = np.sum(w * - 0.5 * self.link_fn(sigma_points) ** 2 / self.variance, axis=-1)
        d2E2 = np.sum(w * 0.5 * (
                ((sigma_points - modulator_mean) * modulator_var ** -1) ** 2
                - modulator_var ** -1
        ) * (lognormpdf + const), axis=-1)
        dE_dv = np.diag(np.block([d2E1, d2E2]))
        return exp_log_lik, dE_dm, dE_dv
示例#7
0
    def mll(ds: Dataset):

        x, y = ds.X, ds.y

        params = {}

        for iname, iparam in numpyro_params.items():
            if iparam["param_type"] == "prior":
                params[iname] = numpyro.sample(name=iname, fn=iparam["prior"])
            else:
                params[iname] = numpyro.param(
                    name=iname,
                    init_value=iparam["init_value"],
                    constraint=iparam["constraint"],
                )
        # get mean function
        mu = gp.prior.mean_function(x)

        # covariance function
        gram_matrix = gram(gp.prior.kernel, x, params)
        gram_matrix += params["obs_noise"] * I(x.shape[0])

        # scale triangular matrix
        L = cholesky(gram_matrix, lower=True)
        return numpyro.sample(
            "y",
            dist.MultivariateNormal(loc=mu, scale_tril=L),
            obs=y.squeeze(),
        )
示例#8
0
def GP(X, y):

    X = numpyro.deterministic("X", X)

    # Set informative priors on kernel hyperparameters.
    η = numpyro.sample("variance", dist.HalfCauchy(scale=5.0))
    ℓ = numpyro.sample("length_scale", dist.Gamma(2.0, 1.0))
    σ = numpyro.sample("obs_noise", dist.HalfCauchy(scale=5.0))

    # Compute kernel
    K = rbf_kernel(X, X, η, ℓ)
    K = add_to_diagonal(K, σ)
    K = add_to_diagonal(K, wandb.config.jitter)
    # cholesky decomposition
    Lff = numpyro.deterministic("Lff", cholesky(K, lower=True))

    # Sample y according to the standard gaussian process formula
    return numpyro.sample(
        "y",
        dist.MultivariateNormal(loc=jnp.zeros(X.shape[0]),
                                scale_tril=Lff).expand_by(
                                    y.shape[:-1])  # for multioutput scenarios
        .to_event(y.ndim - 1),
        obs=y,
    )
示例#9
0
def _multivariate_normal(key, mean, cov, shape, dtype):
    """Sample multivariate normal random values with given shape, mean, and covariance.

  Args:
    key: a PRNGKey used as the random key.
    mean: optional, a scalar or array of mean values along each dimension
    cov: optional, a scalar (isotropic), vector (diagonal covariance matrix), or full covariance matrix
    shape: optional, a tuple of nonnegative integers representing the shape.

  Returns:
    A random array with latent dimension of (max(asarray(mean).ndim, asarray(cov).ndim)),)
  """
    _check_shape("multivariate_normal", shape)
    if hasattr(mean, "shape") and mean.ndim > 1:
        raise ValueError("Mean cannot have more than 1 dimension.")
    if hasattr(cov, "shape") and cov.ndim > 0:
        if cov.ndim > 2:
            raise ValueError(
                "Covariance matrix cannot have more than 2 dimensions.")
        shape = shape + cov.shape[:1]
        normal_samples = normal(key, shape, dtype)
        if cov.ndim == 2:
            samples = np.tensordot(normal_samples, cholesky(cov), axes=1)
        else:
            samples = normal_samples * np.sqrt(cov)
    else:
        if hasattr(mean, "shape") and mean.ndim > 0:
            shape = shape + mean.shape[:1]
        normal_samples = normal(key, shape, dtype)
        samples = np.sqrt(cov) * normal_samples
    return samples + mean
示例#10
0
    def evaluate(self):
        K = self.model.kernel.function(self.model.X,
                                       self.model.parameters)\
            + jnp.eye(self.N) * (self.model.parameters["noise"] + 1e-8)

        self.L = cholesky(K, lower=True)
        self.alpha = solve_triangular(
            self.L.T, solve_triangular(self.L, self.model.y, lower=True))
示例#11
0
 def observation_model(self, f, sigma):
     """
     The implicit observation model is:
         h(fₙ,rₙ) = E[yₙ|fₙ] + √Cov[yₙ|fₙ] σₙ
     """
     conditional_expectation, conditional_covariance = self.conditional_moments(f)
     obs_model = conditional_expectation + cholesky(conditional_covariance) @ sigma
     return np.squeeze(obs_model)
示例#12
0
def precompute(X, obs_noise, kernel, jitter):

    # Kernel
    Kff = kernel.gram(X)
    Kff = add_to_diagonal(Kff, obs_noise)
    Kff = add_to_diagonal(Kff, jitter)
    Lff = cholesky(Kff, lower=True)

    return Lff
示例#13
0
 def observation_model(self, f, sigma):
     """
     TODO: sort out broadcasting so we don't need this additional function (only difference is the transpose)
     The implicit observation model is:
         h(fₙ,rₙ) = E[yₙ|fₙ] + √Cov[yₙ|fₙ] σₙ
     """
     conditional_expectation, conditional_covariance = self.conditional_moments(f)
     obs_model = conditional_expectation + cholesky(conditional_covariance.T) @ sigma
     return np.squeeze(obs_model)
示例#14
0
    def moment_match(self,
                     y,
                     cav_mean,
                     cav_cov,
                     hyp=None,
                     power=1.0,
                     cubature_func=None):
        """
        """
        num_components = int(cav_mean.shape[0] / 2)
        if cubature_func is None:
            x, w = gauss_hermite(num_components,
                                 20)  # Gauss-Hermite sigma points and weights
        else:
            x, w = cubature_func(num_components)

        subband_mean, modulator_mean = cav_mean[:num_components], self.link_fn(
            cav_mean[num_components:])
        subband_cov, modulator_cov = cav_cov[:num_components, :
                                             num_components], cav_cov[
                                                 num_components:,
                                                 num_components:]
        sigma_points = cholesky(modulator_cov) @ x + modulator_mean
        const = power**-0.5 * (2 * pi * hyp)**(0.5 - 0.5 * power)
        mu = (self.link_fn(sigma_points).T @ subband_mean)[:, 0]
        var = hyp / power + (self.link_fn(sigma_points).T**2
                             @ np.diag(subband_cov)[..., None])[:, 0]
        normpdf = const * (2 * pi * var)**-0.5 * np.exp(-0.5 *
                                                        (y - mu)**2 / var)
        Z = np.sum(w * normpdf)
        Zinv = 1. / (Z + 1e-8)
        lZ = np.log(Z + 1e-8)

        dZ1 = np.sum(w * self.link_fn(sigma_points) * (y - mu) / var * normpdf,
                     axis=-1)
        dZ2 = np.sum(w * (sigma_points - modulator_mean) *
                     np.diag(modulator_cov)[..., None]**-1 * normpdf,
                     axis=-1)
        dlZ = Zinv * np.block([dZ1, dZ2])

        d2Z1 = np.sum(w * self.link_fn(sigma_points)**2 *
                      (((y - mu) / var)**2 - var**-1) * normpdf,
                      axis=-1)
        d2Z2 = np.sum(w * (((sigma_points - modulator_mean) *
                            np.diag(modulator_cov)[..., None]**-1)**2 -
                           np.diag(modulator_cov)[..., None]**-1) * normpdf,
                      axis=-1)
        d2lZ = np.diag(-dlZ**2 + Zinv * np.block([d2Z1, d2Z2]))
        id2lZ = inv(
            ensure_positive_precision(-d2lZ) - 1e-10 * np.eye(d2lZ.shape[0]))
        site_mean = cav_mean + id2lZ @ dlZ[
            ...,
            None]  # approx. likelihood (site) mean (see Rasmussen & Williams p75)
        site_cov = power * (-cav_cov + id2lZ
                            )  # approx. likelihood (site) variance
        return lZ, site_mean, site_cov
 def compute_cholesky(self, params, batch):
     X = batch['X']
     N, D = X.shape
     # Fetch params
     sigma_n = params[-1]
     theta = params[:-1]
     # Compute kernel
     K = self.kernel(X, X, theta) + np.eye(N) * (sigma_n + 1e-8)
     L = cholesky(K, lower=True)
     return L
示例#16
0
def vfe_precompute(X, X_u, obs_noise, kernel, jitter: float = 1e-5):

    # Kernel
    Kuu = kernel.gram(X_u)
    Kuu = add_to_diagonal(Kuu, jitter)
    Luu = cholesky(Kuu, lower=True)

    Kuf = kernel.cross_covariance(X_u, X)

    # calculate cholesky
    Luu = cholesky(Kuu, lower=True)

    # compute W
    W = solve_triangular(Luu, Kuf, lower=True).T

    # compute D
    D = jnp.ones(Kuf.shape[1]) * obs_noise

    return Luu, W, D
示例#17
0
def fitc_precompute(X, X_u, obs_noise, kernel, jitter: float = 1e-5):

    # Kernel
    Kuu = kernel.gram(X_u)
    Kuu = add_to_diagonal(Kuu, jitter)
    Luu = cholesky(Kuu, lower=True)

    Kuf = kernel.cross_covariance(X_u, X)

    # calculate cholesky
    Luu = cholesky(Kuu, lower=True)

    # compute W
    W = solve_triangular(Luu, Kuf, lower=True).T

    Kffdiag = kernel.diag(X)
    Qffdiag = jnp.power(W, 2).sum(axis=1)
    D = Kffdiag - Qffdiag + obs_noise

    return Luu, W, D
示例#18
0
 def variational_expectation_cubature(self,
                                      y,
                                      post_mean,
                                      post_cov,
                                      hyp=None,
                                      cubature_func=None):
     """
     Computes the "variational expectation" via cubature, i.e. the
     expected log-likelihood, and its derivatives w.r.t. the posterior mean
         E[log p(yₙ|fₙ)] = ∫ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     with EP power a.
     :param y: observed data (yₙ) [scalar]
     :param post_mean: posterior mean (mₙ) [scalar]
     :param post_cov: posterior variance (vₙ) [scalar]
     :param hyp: likelihood hyperparameter [scalar]
     :param cubature_func: the function to compute sigma points and weights to use during cubature
     :return:
         exp_log_lik: the expected log likelihood, E[log p(yₙ|fₙ)]  [scalar]
         dE_dm: derivative of E[log p(yₙ|fₙ)] w.r.t. mₙ  [scalar]
         dE_dv: derivative of E[log p(yₙ|fₙ)] w.r.t. vₙ  [scalar]
     """
     if cubature_func is None:
         x, w = gauss_hermite(post_mean.shape[0],
                              20)  # Gauss-Hermite sigma points and weights
     else:
         x, w = cubature_func(post_mean.shape[0])
     # fsigᵢ=xᵢ√(vₙ) + mₙ: scale locations according to cavity dist.
     sigma_points = cholesky(post_cov) @ np.atleast_2d(x) + post_mean
     # pre-compute wᵢ log p(yₙ|xᵢ√(2vₙ) + mₙ)
     weighted_log_likelihood_eval = w * self.evaluate_log_likelihood(
         y, sigma_points, hyp)
     # Compute expected log likelihood via cubature:
     # E[log p(yₙ|fₙ)] = ∫ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     #                 ≈ ∑ᵢ wᵢ p(yₙ|fsigᵢ)
     exp_log_lik = np.sum(weighted_log_likelihood_eval)
     # Compute first derivative via cubature:
     # dE[log p(yₙ|fₙ)]/dmₙ = ∫ (fₙ-mₙ) vₙ⁻¹ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     #                      ≈ ∑ᵢ wᵢ (fₙ-mₙ) vₙ⁻¹ log p(yₙ|fsigᵢ)
     invv = np.diag(post_cov)[:, None]**-1
     dE_dm = np.sum(invv * (sigma_points - post_mean) *
                    weighted_log_likelihood_eval,
                    axis=-1)[:, None]
     # Compute second derivative via cubature (deriv. w.r.t. var = 0.5 * 2nd deriv. w.r.t. mean):
     # dE[log p(yₙ|fₙ)]/dvₙ = ∫ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹]/2 log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     #                        ≈ ∑ᵢ wᵢ [(fₙ-mₙ)² vₙ⁻² - vₙ⁻¹]/2 log p(yₙ|fsigᵢ)
     dE_dv = np.sum(
         (0.5 * (invv**2 * (sigma_points - post_mean)**2) - 0.5 * invv) *
         weighted_log_likelihood_eval,
         axis=-1)
     dE_dv = np.diag(dE_dv)
     return exp_log_lik, dE_dm, dE_dv
示例#19
0
    def log_likelihood(self, params):
        self.model.set_parameters(params)
        kx = self.model.kernel.function(
            self.model.X, params) + jnp.eye(self.N) * (params["noise"] + 1e-8)
        L = cholesky(kx, lower=True)

        alpha = solve_triangular(L.T,
                                 solve_triangular(L, self.model.y, lower=True))
        W_logdet = 2. * jnp.sum(jnp.log(jnp.diag(L)))
        log_marginal = 0.5 * (-self.model.y.size * log_2_pi -
                              self.model.y.shape[1] * W_logdet -
                              jnp.sum(alpha * self.model.y))

        return log_marginal
示例#20
0
    def statistical_linear_regression(self,
                                      cav_mean,
                                      cav_cov,
                                      hyp=None,
                                      cubature_func=None):
        """
        This gives the same result as above - delete
        """
        num_components = int(cav_mean.shape[0] / 2)
        if cubature_func is None:
            x, w = gauss_hermite(num_components,
                                 20)  # Gauss-Hermite sigma points and weights
        else:
            x, w = cubature_func(num_components)

        subband_mean, modulator_mean = cav_mean[:num_components], self.link_fn(
            cav_mean[num_components:])
        subband_cov, modulator_cov = cav_cov[:num_components, :
                                             num_components], cav_cov[
                                                 num_components:,
                                                 num_components:]
        sigma_points = cholesky(modulator_cov) @ x + modulator_mean
        lik_expectation, lik_covariance = (
            self.link_fn(sigma_points).T @ subband_mean).T, hyp
        # Compute zₙ via cubature:
        # muₙ = ∫ E[yₙ|fₙ] 𝓝(fₙ|mₙ,vₙ) dfₙ
        #    ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ]
        mu = np.sum(w * lik_expectation, axis=-1)[:, None]
        # Compute variance S via cubature:
        # S = ∫ [(E[yₙ|fₙ]-zₙ) (E[yₙ|fₙ]-zₙ)' + Cov[yₙ|fₙ]] 𝓝(fₙ|mₙ,vₙ) dfₙ
        #   ≈ ∑ᵢ wᵢ [(E[yₙ|fsigᵢ]-zₙ) (E[yₙ|fsigᵢ]-zₙ)' + Cov[yₙ|fₙ]]
        S = np.sum(w * ((lik_expectation - mu) *
                        (lik_expectation - mu) + lik_covariance),
                   axis=-1)[:, None]
        # Compute cross covariance C via cubature:
        # C = ∫ (fₙ-mₙ) (E[yₙ|fₙ]-zₙ)' 𝓝(fₙ|mₙ,vₙ) dfₙ
        #   ≈ ∑ᵢ wᵢ (fsigᵢ -mₙ) (E[yₙ|fsigᵢ]-zₙ)'
        C = np.sum(w * np.block([[
            self.link_fn(sigma_points) * np.diag(subband_cov)[..., None]
        ], [sigma_points - modulator_mean]]) * (lik_expectation - mu),
                   axis=-1)[:, None]
        # Compute derivative of mu via cubature:
        omega = np.sum(
            w * np.block([[self.link_fn(sigma_points)],
                          [
                              np.diag(modulator_cov)[..., None]**-1 *
                              (sigma_points - modulator_mean) * lik_expectation
                          ]]),
            axis=-1)[None, :]
        return mu, S, C, omega
示例#21
0
 def expected_log_likelihood(self, y, m, v, cubature=None):
     """
     """
     if cubature is None:
         x, w = gauss_hermite(2, 20)  # Gauss-Hermite sigma points and weights
     else:
         x, w = cubature(2)
     v = (v + v.T) / 2
     sigma_points = cholesky(v) @ x + m  # fsigᵢ=xᵢ√(2vₙ) + mₙ: scale locations according to cavity dist.
     # Compute expected log likelihood via cubature:
     # E[log p(yₙ|fₙ)] = ∫ log p(yₙ|fₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     #                 ≈ ∑ᵢ wᵢ log p(yₙ|fsigᵢ)
     exp_log_lik = np.sum(w * self.evaluate_log_likelihood(y, sigma_points))
     return exp_log_lik
示例#22
0
 def model(self, batch):
     X = batch['X']
     y = batch['y']
     N, D = X.shape
     # set uninformative log-normal priors
     var = sample('kernel_var', dist.LogNormal(0.0, 1.0), sample_shape = (1,))
     length = sample('kernel_length', dist.LogNormal(0.0, 1.0), sample_shape = (D,))
     theta = np.concatenate([var, length])
     # compute kernel
     K = self.kernel(X, X, theta) + np.eye(N)*1e-8
     L = cholesky(K, lower=True)
     # Generate latent function
     beta = sample('beta', dist.Normal(0.0, 1.0))
     eta = sample('eta', dist.Normal(0.0, 1.0), sample_shape=(N,))
     f = np.matmul(L, eta) + beta
     # Bernoulli likelihood
     sample('y', dist.Bernoulli(logits=f), obs=y)
示例#23
0
 def posterior_sample(self, key, sample, X_star, **kwargs):
     # Fetch training data
     batch = kwargs['batch']
     XL, XH = batch['XL'], batch['XH']
     NL, NH = XL.shape[0], XH.shape[0]
     # Fetch params
     var_L = sample['kernel_var_L']
     var_H = sample['kernel_var_H']
     length_L = sample['kernel_length_L']
     length_H = sample['kernel_length_H']
     beta_L = sample['beta_L']
     beta_H = sample['beta_H']
     eta_L = sample['eta_L']
     eta_H = sample['eta_H']
     rho = sample['rho']
     theta_L = np.concatenate([var_L, length_L])
     theta_H = np.concatenate([var_H, length_H])
     beta = np.concatenate([beta_L*np.ones(NL), beta_H*np.ones(NH)])
     eta = np.concatenate([eta_L, eta_H])
     # Compute kernels
     k_pp = rho**2 * self.kernel(X_star, X_star, theta_L) + \
                     self.kernel(X_star, X_star, theta_H) + \
                     np.eye(X_star.shape[0])*1e-8
     psi1 = rho*self.kernel(X_star, XL, theta_L)
     psi2 = rho**2 * self.kernel(X_star, XH, theta_L) + \
                     self.kernel(X_star, XH, theta_H)
     k_pX = np.hstack((psi1,psi2))
     # Compute K_xx
     K_LL = self.kernel(XL, XL, theta_L) + np.eye(NL)*1e-8
     K_LH = rho*self.kernel(XL, XH, theta_L)
     K_HH = rho**2 * self.kernel(XH, XH, theta_L) + \
                     self.kernel(XH, XH, theta_H) + np.eye(NH)*1e-8
     K_xx = np.vstack((np.hstack((K_LL,K_LH)),
                    np.hstack((K_LH.T,K_HH))))
     L = cholesky(K_xx, lower=True)
     # Sample latent function
     f = np.matmul(L, eta) + beta
     tmp_1 = solve_triangular(L.T,solve_triangular(L, f, lower=True))
     tmp_2  = solve_triangular(L.T,solve_triangular(L, k_pX.T, lower=True))
     # Compute predictive mean
     mu = np.matmul(k_pX, tmp_1)
     cov = k_pp - np.matmul(k_pX, tmp_2)
     std = np.sqrt(np.clip(np.diag(cov), a_min=0.))
     sample = mu + std * random.normal(key, mu.shape)
     return mu, sample
示例#24
0
文件: gp.py 项目: sagar87/numgp
    def _build_prior(self, Xs, **kwargs):
        self.N = np.prod([len(X) for X in Xs])
        mu = self.mean_func(cartesian(Xs))
        chols = []
        for i, (cov, X) in enumerate(zip(self.cov_funcs, Xs)):
            Kxx = npy.deterministic(f"{self.name}_Kxx_{i}", cov(X))
            chol = cholesky(stabilize(Kxx), lower=True)
            chols.append(chol)

        # remove reparameterization option
        v = npy.sample(
            f"{self.name}_rotated",
            dist.Normal(loc=jnp.zeros(self.N),
                        scale=jnp.ones(self.N),
                        **kwargs),
        )
        f = npy.deterministic(self.name, mu + (kron_dot(chols, v)).reshape(-1))
        return f
示例#25
0
 def statistical_linear_regression_cubature(self,
                                            cav_mean,
                                            cav_cov,
                                            hyp=None,
                                            cubature_func=None):
     """
     Perform statistical linear regression (SLR) using cubature.
     We aim to find a likelihood approximation p(yₙ|fₙ) ≈ 𝓝(yₙ|Afₙ+b,Ω+Var[yₙ|fₙ]).
     TODO: this currently assumes an additive noise model (ok for our current applications), make more general
     """
     if cubature_func is None:
         x, w = gauss_hermite(cav_mean.shape[0],
                              20)  # Gauss-Hermite sigma points and weights
     else:
         x, w = cubature_func(cav_mean.shape[0])
     # fsigᵢ=xᵢ√(vₙ) + mₙ: scale locations according to cavity dist.
     sigma_points = cholesky(cav_cov) @ np.atleast_2d(x) + cav_mean
     lik_expectation, lik_covariance = self.conditional_moments(
         sigma_points, hyp)
     # Compute zₙ via cubature:
     # zₙ = ∫ E[yₙ|fₙ] 𝓝(fₙ|mₙ,vₙ) dfₙ
     #    ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ]
     mu = np.sum(w * lik_expectation, axis=-1)[:, None]
     # Compute variance S via cubature:
     # S = ∫ [(E[yₙ|fₙ]-zₙ) (E[yₙ|fₙ]-zₙ)' + Cov[yₙ|fₙ]] 𝓝(fₙ|mₙ,vₙ) dfₙ
     #   ≈ ∑ᵢ wᵢ [(E[yₙ|fsigᵢ]-zₙ) (E[yₙ|fsigᵢ]-zₙ)' + Cov[yₙ|fₙ]]
     # TODO: allow for multi-dim cubature
     S = np.sum(w * ((lik_expectation - mu) *
                     (lik_expectation - mu) + lik_covariance),
                axis=-1)[:, None]
     # Compute cross covariance C via cubature:
     # C = ∫ (fₙ-mₙ) (E[yₙ|fₙ]-zₙ)' 𝓝(fₙ|mₙ,vₙ) dfₙ
     #   ≈ ∑ᵢ wᵢ (fsigᵢ -mₙ) (E[yₙ|fsigᵢ]-zₙ)'
     C = np.sum(w * (sigma_points - cav_mean) * (lik_expectation - mu),
                axis=-1)[:, None]
     # Compute derivative of z via cubature:
     # omega = ∫ E[yₙ|fₙ] vₙ⁻¹ (fₙ-mₙ) 𝓝(fₙ|mₙ,vₙ) dfₙ
     #       ≈ ∑ᵢ wᵢ E[yₙ|fsigᵢ] vₙ⁻¹ (fsigᵢ-mₙ)
     omega = np.sum(w * lik_expectation *
                    (inv(cav_cov) @ (sigma_points - cav_mean)),
                    axis=-1)[None, :]
     return mu, S, C, omega
示例#26
0
    def log_density(self, y, mean, cov, cubature=None):
        """
        """
        num_components = int(mean.shape[0] / 2)
        if cubature is None:
            x, w = gauss_hermite(num_components, 20)  # Gauss-Hermite sigma points and weights
        else:
            x, w = cubature(num_components)

        # subband_mean, modulator_mean = mean[:num_components], self.link_fn(mean[num_components:])
        subband_mean, modulator_mean = mean[:num_components], mean[num_components:]  # TODO: CHECK
        subband_cov, modulator_cov = cov[:num_components, :num_components], cov[num_components:,
                                                                                num_components:]
        sigma_points = cholesky(modulator_cov) @ x + modulator_mean
        mu = (self.link_fn(sigma_points).T @ subband_mean)[:, 0]
        var = self.variance + (self.link_fn(sigma_points).T ** 2 @ np.diag(subband_cov)[..., None])[:, 0]
        normpdf = (2 * np.pi * var) ** -0.5 * np.exp(-0.5 * (y - mu) ** 2 / var)
        Z = np.sum(w * normpdf)
        lZ = np.log(Z + 1e-8)
        return lZ
示例#27
0
def _multivariate_normal(key, mean, cov, shape, dtype):
  if not onp.ndim(mean) >= 1:
    msg = "multivariate_normal requires mean.ndim >= 1, got mean.ndim == {}"
    raise ValueError(msg.format(onp.ndim(mean)))
  if not onp.ndim(cov) >= 2:
    msg = "multivariate_normal requires cov.ndim >= 2, got cov.ndim == {}"
    raise ValueError(msg.format(onp.ndim(cov)))
  n = mean.shape[-1]
  if onp.shape(cov)[-2:] != (n, n):
    msg = ("multivariate_normal requires cov.shape == (..., n, n) for n={n}, "
           "but got cov.shape == {shape}.")
    raise ValueError(msg.format(n=n, shape=onp.shape(cov)))

  if shape is None:
    shape = lax.broadcast_shapes(mean.shape[:-1], cov.shape[:-2])
  else:
    _check_shape("normal", shape, mean.shape[:-1], mean.shape[:-2])

  chol_factor = cholesky(cov)
  normal_samples = normal(key, shape + mean.shape[-1:], dtype)
  return mean + np.tensordot(normal_samples, chol_factor, [-1, 1])
示例#28
0
def _newton_iteration(y_train, K, f):
    pi = expit(f)
    W = pi * (1 - pi)
    # Line 5
    W_sr = np.sqrt(W)
    W_sr_K = W_sr[:, np.newaxis] * K
    B = np.eye(W.shape[0]) + W_sr_K * W_sr
    L = cholesky(B, lower=True)
    # Line 6
    b = W * f + (y_train - pi)
    # Line 7
    a = b - W_sr * cho_solve((L, True), W_sr_K.dot(b))
    # Line 8
    f = K.dot(a)

    # Line 10: Compute log marginal likelihood in loop and use as
    #          convergence criterion
    lml = -0.5 * a.T.dot(f) \
        - np.log1p(np.exp(-(y_train * 2 - 1) * f)).sum() \
        - np.log(np.diag(L)).sum()
    return lml, f, (pi, W_sr, L, b, a)
示例#29
0
    def _conditional(self, params, *args, **kwargs):
        delta = params[self.gp].squeeze() - params[self.mean].squeeze()

        chols = [
            cholesky(stabilize(params[Kxx].squeeze()), lower=True)
            for _, Kxx in sorted(self.Kxx.items())
        ]
        cholTs = [chol.T for chol in chols]

        Kss = params[self.Kss].squeeze()
        Ksx = params[self.Ksx].squeeze()
        Kxs = Ksx.T

        alpha = kron_solve_lower(chols, delta)
        alpha = kron_solve_upper(cholTs, alpha)

        mu = jnp.dot(Ksx, alpha).ravel() + params[self.cond].squeeze()
        A = kron_solve_lower(chols, Kxs)
        cov = stabilize(Kss - jnp.dot(A.T, A))

        return mu, cov
示例#30
0
 def _build_conditional(self, params, pred_noise=False, diag=False):
     Kxx = params[self.Kxx].squeeze() + params[self.Knx].squeeze()
     Kxs = params[self.Ksx].T.squeeze()
     Knx = params[self.Knx].squeeze()
     rxx = params[self.y].squeeze() - params[self.mean].squeeze()
     L = cholesky(stabilize(Kxx) + Knx, lower=True)
     A = solve_lower(L, Kxs)
     v = solve_lower(L, rxx)
     mu = params[self.cond].squeeze() + A.T @ v
     if diag:
         Kss = jnp.diag(jnp.diag(params[self.Kss].squeeze()))
         var = Kss - jnp.sum(jnp.square(A), 0)
         if pred_noise:
             var += jnp.diag(jnp.diag(params[self.Kns].squeeze()))
         return mu, var
     else:
         Kss = params[self.Kss].squeeze()
         cov = Kss - A.T @ A
         if pred_noise:
             cov += params[self.Kns].squeeze()
         return mu, cov if pred_noise else stabilize(cov)