Exemplo n.º 1
0
def reparameterize(mean, var, z, full_cov=False):
    """
    Implements the 'reparameterization trick' for the Gaussian, either full rank or diagonal

    If the z is a sample from N(0, 1), the output is a sample from N(mean, var)

    If full_cov=True then var must be of shape S,N,N,D and the full covariance is used. Otherwise
    var must be S,N,D and the operation is elementwise

    :param mean: mean of shape S,N,D
    :param var: covariance of shape S,N,D or S,N,N,D
    :param z: samples form unit Gaussian of shape S,N,D
    :param full_cov: bool to indicate whether var is of shape S,N,N,D or S,N,D
    :return sample from N(mean, var) of shape S,N,D
    """
    if var is None:
        return mean

    if full_cov is False:
        return mean + z * (var + gpflow.default_jitter())**0.5
    else:
        S, N, D = tf.shape(mean)[0], tf.shape(mean)[1], tf.shape(mean)[
            2]  # var is SNND
        mean = tf.transpose(mean, (0, 2, 1))  # SND -> SDN
        var = tf.transpose(var, (0, 3, 1, 2))  # SNND -> SDNN
        I = gpflow.default_jitter() * tf.eye(
            N, dtype=gpflow.default_float())[None, None, :, :]  # 11NN
        chol = tf.linalg.cholesky(var + I)  # SDNN
        z_SDN1 = tf.transpose(z, [0, 2, 1])[:, :, :, None]  # SND->SDN1
        f = mean + tf.matmul(chol, z_SDN1)[:, :, :, 0]  # SDN(1)
        return tf.transpose(f, (0, 2, 1))  # SND
Exemplo n.º 2
0
def get_pred_Y_approx(m, by_K=False):
    pred_Y = np.zeros((m.N, m.D))

    if by_K:
        pred_Y_k = np.zeros((m.N, m.D, m.K))

    # fs(xk)
    Kmm_s = gpflow.covariances.Kuu(m.Zs, m.kernel_s, jitter=gpflow.default_jitter())
    Kmn_s = gpflow.covariances.Kuf(m.Zs, m.kernel_s, m.Xs_mean) 
    pred_s = (tf.transpose(Kmn_s) @ tf.linalg.inv(Kmm_s) @ m.q_mu_s).numpy()

    # fk(xk)
    for k in range(m.K):
        kernel = m.kernel_K[k]
        Kmm = gpflow.covariances.Kuu(m.Zp, kernel, jitter=gpflow.default_jitter())
        Kmn = gpflow.covariances.Kuf(m.Zp, kernel, m.Xp_mean)
        pred = tf.transpose(Kmn) @ tf.linalg.inv(Kmm) @ m.q_mu[k] # [N, D]
        if by_K:
            pred_Y_k[..., k] = pred.numpy()
        assignment = m.pi.numpy()[:, k]
        pred_Y += pred.numpy() * np.stack([assignment for _ in range(m.D)], axis=1)
    pred_Y += pred_s

    if by_K:
        return pred_Y, pred_Y_k, pred_s
    else:
        return pred_Y
Exemplo n.º 3
0
def klu(m):
    KL_u = 0
    prior_Kuu = np.zeros((m.M, m.M))
    if m.split_space:
        prior_Kuu += gpflow.covariances.Kuu(m.Zs, m.kernel_s, jitter=gpflow.default_jitter())
    for k in range(2):
        prior_Kuu_k = gpflow.covariances.Kuu(m.Zp, m.kernel_K[k], jitter=gpflow.default_jitter())
        KL_u += gpflow.kullback_leiblers.gauss_kl(q_mu=m.q_mu[k], q_sqrt=m.q_sqrt[k], K=prior_Kuu+prior_Kuu_k)
    return KL_u
Exemplo n.º 4
0
 def predict_f(self, Xnew, full_cov=False):
     M = tf.shape(self.X)[0]
     K = self.kernel.K(self.X)
     Phi = tf.nn.softmax(self.logPhi)
     # try squashing Phi to avoid numerical errors
     Phi = (1 - 2e-6) * Phi + 1e-6
     sigma2 = self.likelihood.variance
     L = (tf.linalg.cholesky(K) +
          tf.eye(M, dtype=gpflow.default_float()) * gpflow.default_jitter())
     W = tf.transpose(L) * tf.sqrt(tf.math.reduce_sum(Phi,
                                                      0)) / tf.sqrt(sigma2)
     P = tf.linalg.matmul(W, tf.transpose(W)) + tf.eye(
         M, dtype=gpflow.default_float())
     R = tf.linalg.cholesky(P)
     PhiY = tf.linalg.matmul(tf.transpose(Phi), self.Y)
     LPhiY = tf.linalg.matmul(tf.transpose(L), PhiY)
     c = tf.linalg.triangular_solve(R, LPhiY, lower=True) / sigma2
     Kus = self.kernel.K(self.X, Xnew)
     tmp1 = tf.linalg.triangular_solve(L, Kus, lower=True)
     tmp2 = tf.linalg.triangular_solve(R, tmp1, lower=True)
     mean = tf.linalg.matmul(tf.transpose(tmp2), c)
     if full_cov:
         var = (self.kernel.K(Xnew) +
                tf.linalg.matmul(tf.transpose(tmp2), tmp2) -
                tf.linalg.matmul(tf.transpose(tmp1), tmp1))
         shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 2), shape)
     else:
         var = (self.kernel.K_diag(Xnew) +
                tf.math.reduce_sum(tf.math.square(tmp2), 0) -
                tf.math.reduce_sum(tf.math.square(tmp1), 0))
         shape = tf.stack([1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 1), shape)
     return mean, var
Exemplo n.º 5
0
 def build_cholesky_if_needed(self):
     # # make sure we only compute this once
     # if self.needs_build_cholesky:
     self.Ku = covs.Kuu(self.feature,
                        self.kern,
                        jitter=gpflow.default_jitter())
     self.Lu = tf.linalg.cholesky(self.Ku)
     self.Ku_tiled = tf.tile(self.Ku[None, :, :], [self.num_outputs, 1, 1])
     self.Lu_tiled = tf.tile(self.Lu[None, :, :], [self.num_outputs, 1, 1])
Exemplo n.º 6
0
    def __call__(self, Xnew, full_cov=False, full_output_cov=False):
        q_mu = self.q_mu  # M x K x O
        q_sqrt = self.q_sqrt  # K x O x M x M

        Kuu = covariances.Kuu(self.inducing_variables,
                              self.kernel,
                              jitter=default_jitter())  # K x M x M
        Kuf = covariances.Kuf(self.inducing_variables, self.kernel,
                              Xnew)  # K x M x N
        Knn = self.kernel.K(Xnew, full_output_cov=False)
Exemplo n.º 7
0
    def __init__(self,
                 kern,
                 Z,
                 num_outputs,
                 mean_function,
                 white=False,
                 input_prop_dim=None,
                 **kwargs):
        """
        A sparse variational GP layer in whitened representation. This layer holds the kernel,
        variational parameters, inducing points and mean function.

        The underlying model at inputs X is
        f = Lv + mean_function(X), where v \sim N(0, I) and LL^T = kern.K(X)

        The variational distribution over the inducing points is
        q(v) = N(q_mu, q_sqrt q_sqrt^T)

        The layer holds D_out independent GPs with the same kernel and inducing points.

        :param kern: The kernel for the layer (input_dim = D_in)
        :param Z: Inducing points (M, D_in)
        :param num_outputs: The number of GP outputs (q_mu is shape (M, num_outputs))
        :param mean_function: The mean function
        :return:
        """
        super().__init__(input_prop_dim=input_prop_dim, **kwargs)
        self.num_inducing = Z.shape[0]

        # Inducing points prior mean
        q_mu = np.zeros((self.num_inducing, num_outputs))
        self.q_mu = Parameter(q_mu, name="q_mu")
        # Square-root of inducing points prior covariance
        q_sqrt = np.tile(
            np.eye(self.num_inducing)[None, :, :], [num_outputs, 1, 1])
        self.q_sqrt = Parameter(q_sqrt, transform=triangular(), name="q_sqrt")

        self.feature = InducingPoints(Z)
        self.kern = kern
        self.mean_function = mean_function

        self.num_outputs = num_outputs
        self.white = white

        if not self.white:  # initialize to prior
            Ku = self.kern.K(Z)
            Lu = np.linalg.cholesky(Ku + np.eye(Z.shape[0]) *
                                    gpflow.default_jitter())
            self.q_sqrt = Parameter(np.tile(Lu[None, :, :],
                                            [num_outputs, 1, 1]),
                                    transform=triangular(),
                                    name="q_sqrt")

        self.Ku, self.Lu, self.Ku_tiled, self.Lu_tiled = None, None, None, None
        self.needs_build_cholesky = True
class Datum:
    M, N = 5, 4

    mu = rng.randn(M, N)  # [M, N]
    A = rng.randn(M, M)
    I = np.eye(M)  # [M, M]
    K = A @ A.T + default_jitter() * I  # [M, M]
    sqrt = make_sqrt(N, M)  # [N, M, M]
    sqrt_diag = rng.randn(M, N)  # [M, N]
    K_batch = make_K_batch(N, M)
    K_cholesky = np.linalg.cholesky(K)
Exemplo n.º 9
0
def _cholesky_with_jitter(cov: TensorType) -> tf.Tensor:
    """
    Compute the Cholesky of the covariance, adding jitter (determined by
    :func:`gpflow.default_jitter`) to the diagonal to improve stability.

    :param cov: full covariance with shape ``[..., N, D, D]``.
    """
    # cov [..., N, D, D]
    cov_shape = tf.shape(cov)
    batch_shape = cov_shape[:-2]
    D = cov_shape[-2]
    jittermat = default_jitter() * tf.eye(
        D, batch_shape=batch_shape, dtype=cov.dtype
    )  # [..., N, D, D]
    return tf.linalg.cholesky(cov + jittermat)  # [..., N, D, D]
Exemplo n.º 10
0
 def maximum_log_likelihood_objective(self):
     print("assignegp_dense compiling model (build_likelihood)")
     N = tf.cast(tf.shape(self.Y)[0], dtype=gpflow.default_float())
     M = tf.shape(self.X)[0]
     D = tf.cast(tf.shape(self.Y)[1], dtype=gpflow.default_float())
     if self.KConst is not None:
         K = tf.cast(self.KConst, gpflow.default_float())
     else:
         K = self.kernel.K(self.X)
     Phi = tf.nn.softmax(self.logPhi)
     # try squashing Phi to avoid numerical errors
     Phi = (1 - 2e-6) * Phi + 1e-6
     sigma2 = self.likelihood.variance
     tau = 1.0 / self.likelihood.variance
     L = (tf.linalg.cholesky(K) +
          tf.eye(M, dtype=gpflow.default_float()) * gpflow.default_jitter())
     W = tf.transpose(L) * tf.sqrt(tf.reduce_sum(Phi, 0)) / tf.sqrt(sigma2)
     P = tf.linalg.matmul(W, tf.transpose(W)) + tf.eye(
         M, dtype=gpflow.default_float())
     R = tf.linalg.cholesky(P)
     PhiY = tf.linalg.matmul(tf.transpose(Phi), self.Y)
     LPhiY = tf.linalg.matmul(tf.transpose(L), PhiY)
     if self.fDebug:
         tf.print(Phi, [tf.shape(P), P], name="P", summarize=10)
         tf.print(Phi, [tf.shape(LPhiY), LPhiY], name="LPhiY", summarize=10)
         tf.print(Phi, [tf.shape(K), K], name="K", summarize=10)
         tf.print(Phi, [tau], name="tau", summarize=10)
     c = tf.linalg.triangular_solve(R, LPhiY, lower=True) / sigma2
     # compute KL
     KL = self.build_KL(Phi)
     a1 = -0.5 * N * D * tf.math.log(2.0 * np.pi / tau)
     a2 = (-0.5 * D * tf.math.reduce_sum(
         tf.math.log(tf.math.square(tf.linalg.diag_part(R)))))
     a3 = -0.5 * tf.math.reduce_sum(tf.math.square(self.Y)) / sigma2
     a4 = +0.5 * tf.math.reduce_sum(tf.math.square(c))
     a5 = -KL
     if self.fDebug:
         tf.print(a1, [a1], name="a1=")
         tf.print(a2, [a2], name="a2=")
         tf.print(a3, [a3], name="a3=")
         tf.print(a4, [a4], name="a4=")
         tf.print(a5, [a5, Phi], name="a5 and Phi=", summarize=10)
     return a1 + a2 + a3 + a4 + a5
    def maximum_log_likelihood_objective(self):
        if self.fDebug:
            print("assignegp_denseSparse compiling model (build_likelihood)")
        N = tf.cast(tf.shape(self.Y)[0], dtype=gpflow.default_float())
        M = tf.shape(self.ZExpanded)[0]
        D = tf.cast(tf.shape(self.Y)[1], dtype=gpflow.default_float())

        Phi = tf.nn.softmax(self.logPhi)
        # try squashing Phi to avoid numerical errors
        Phi = (1 - 2e-6) * Phi + 1e-6

        sigma2 = self.likelihood.variance
        sigma = tf.sqrt(self.likelihood.variance)
        Kuu = (
            self.kernel.K(self.ZExpanded) +
            tf.eye(M, dtype=gpflow.default_float()) * gpflow.default_jitter())
        Kuf = self.kernel.K(self.ZExpanded, self.X)

        Kdiag = self.kernel.K_diag(self.X)
        L = tf.linalg.cholesky(Kuu)
        A = tf.math.reduce_sum(Phi, 0)
        LiKuf = tf.linalg.triangular_solve(L, Kuf)
        W = LiKuf * tf.sqrt(A) / sigma
        P = tf.linalg.matmul(W, tf.transpose(W)) + tf.eye(
            M, dtype=gpflow.default_float())
        traceTerm = -0.5 * tf.math.reduce_sum(
            Kdiag * A) / sigma2 + 0.5 * tf.math.reduce_sum(tf.math.square(W))
        R = tf.linalg.cholesky(P)
        tmp = tf.linalg.matmul(LiKuf,
                               tf.linalg.matmul(tf.transpose(Phi), self.Y))
        c = tf.linalg.triangular_solve(R, tmp, lower=True) / sigma2
        if self.fDebug:
            # trace term should be 0 for Z=X (full data)
            tf.print([traceTerm], name="traceTerm", summarize=10)

        self.bound = (
            traceTerm - 0.5 * N * D * tf.math.log(2 * np.pi * sigma2) -
            0.5 * D * tf.math.reduce_sum(
                tf.math.log(tf.math.square(tf.linalg.diag_part(R)))) -
            0.5 * tf.math.reduce_sum(tf.math.square(self.Y)) / sigma2 +
            0.5 * tf.math.reduce_sum(tf.math.square(c)) - self.build_KL(Phi))

        return self.bound
Exemplo n.º 12
0
def gauss_kl(q_mu, q_sqrt, K=None):
    """
    Wrapper for gauss_kl from gpflow that returns the negative log prob if q_sqrt is None. This can be  
    for use in HMC: all that is required is to set q_sqrt to None and this function substitues the
    negative log prob instead of the KL (so no need to set q_mu.prior = gpflow.priors.Gaussian(0, 1)). 
    Also, this allows the use of HMC in the unwhitened case. 
    """
    if q_sqrt is None:
        # return negative log prob with q_mu as 'x', with mean 0 and cov K (or I, if None)
        M, D = tf.shape(q_mu)[0], tf.shape(q_mu)[1]
        I = tf.eye(M, dtype=q_mu.dtype)

        if K is None:
            L = I
        else:
            L = tf.cholesky(K + I * gpflow.default_jitter())

        return -tf.reduce_sum(
            gpflow.logdensities.multivariate_normal(q_mu, tf.zeros_like(q_mu),
                                                    L))

    else:
        # return kl
        return gauss_kl_gpflow(q_mu, q_sqrt, K=K)
Exemplo n.º 13
0
    def K(self, X, Y=None):
        if Y is None:
            Y = X  # hack to avoid duplicating code below

        if self.fDebug:
            print("Compiling kernel")
        t1s = tf.expand_dims(X[:, 0], 1)  # N X 1
        t2s = tf.expand_dims(Y[:, 0], 1)
        i1s_r = tf.expand_dims(X[:, 1], 1)
        i2s_r = tf.expand_dims(Y[:, 1], 1)
        if self.fDebug:
            snl = 10  # how many entries to print
            i1s = i1s_r
            i2s = i2s_r

            tf.print([tf.shape(i1s_r), i1s_r], name="i1sdebug",
                     summarize=snl)  # will print message
            tf.print([tf.shape(i2s_r), i2s_r], name="i2sdebug",
                     summarize=snl)  # will print message
        else:
            i1s = i1s_r
            i2s = i2s_r

        i1s_matrix = tf.tile(i1s, tf.reverse(tf.shape(i2s), [0]))
        i2s_matrix = tf.tile(i2s, tf.reverse(tf.shape(i1s), [0]))
        i2s_matrixT = tf.transpose(i2s_matrix)

        Ktts = self.kern.K(t1s, t2s)  # N*M X N*M
        with tf.name_scope("kttscope"):  # scope
            same_functions = tf.equal(i1s_matrix,
                                      tf.transpose(i2s_matrix),
                                      name="FiEQFj")
            K_s = tf.where(
                same_functions, Ktts, Ktts,
                name="selectFiEQFj")  # just setup matrix with block diagonal

        m = self.fm.shape[0]
        for fi in range(m):
            for fj in range(m):
                if fi != fj:
                    with tf.name_scope("f" + str(fi) + "f" + str(fj)):  # scope
                        # much easier to remove nans before tensorflow
                        bnan = self.fm[fi, fj, ~np.isnan(self.fm[fi, fj, :])]
                        fi_s = tf.constant(fi + 1,
                                           tf.int32,
                                           name="function" + str(fi))
                        fj_s = tf.constant(fj + 1,
                                           tf.int32,
                                           name="function" + str(fj))

                        i1s_matrixInt = tf.cast(i1s_matrix,
                                                tf.int32,
                                                name="casti1s")
                        i2s_matrixTInt = tf.cast(i2s_matrixT,
                                                 tf.int32,
                                                 name="casti2s")

                        fiFilter = fi_s * tf.ones_like(
                            i1s_matrixInt, tf.int32, name="fiFilter")
                        fjFilter = fj_s * tf.ones_like(
                            i2s_matrixTInt, tf.int32,
                            name="fjFilter")  # must be transpose

                        f1F = tf.equal(i1s_matrixInt,
                                       fiFilter,
                                       name="indexF" + str(fi))
                        f2F = tf.equal(i2s_matrixTInt,
                                       fjFilter,
                                       name="indexF" + str(fj))

                        t12F = tf.logical_and(f1F,
                                              f2F,
                                              name="F" + str(fi) + "andF" +
                                              str(fj))

                        # Get the actual values of the Bs = B[index of relevant branching points]
                        bint = bnan.astype(
                            int)  # convert to int - set of indexes
                        if self.fDebug:
                            Br = self.Bv
                            tf.print([tf.shape(self.Bv), self.Bv],
                                     name="Bv",
                                     summarize=3)  # will print message
                        else:
                            Br = self.Bv
                        Bs = tf.concat(
                            [tf.slice(Br, [i - 1, 0], [1, 1]) for i in bint],
                            0)

                        kbb = (self.kern.K(Bs) + tf.linalg.diag(
                            tf.ones(tf.shape(Bs)[:1],
                                    dtype=gpflow.default_float())) *
                               gpflow.default_jitter())
                        if self.fDebug:
                            tf.print([tf.shape(kbb), kbb],
                                     name="kbb",
                                     summarize=10)
                            tf.print(
                                [self.kern.lengthscales.numpy()],
                                name="lenscales",
                                summarize=10,
                            )
                            tf.print(
                                [self.kern.variance.numpy()],
                                name="lenscales",
                                summarize=10,
                            )
                            tf.print([Bs], name="Bs", summarize=10)

                        Kbbs_inv = tf.linalg.inv(kbb, name="invKbb")  # B X B
                        Kb1s = self.kern.K(t1s, Bs)  # N*m X B
                        Kb2s = self.kern.K(t2s, Bs)  # N*m X B

                        a = tf.linalg.matmul(Kb1s, Kbbs_inv)
                        K_crosss = tf.linalg.matmul(a,
                                                    tf.transpose(Kb2s),
                                                    name="Kt1_Bi_invBB_KBt2")

                        K_s = tf.where(t12F, K_crosss, K_s, name="selectIndex")
        return K_s
Exemplo n.º 14
0
def vaele_jitter():
    return gpflow.default_jitter()