示例#1
0
        def w_init(shape, dtype=dtype):
            # Sample $u ~ N(f(Z), \epsilon)$ from the prior
            prior_f = prior_fn(Z.Z)
            prior_u = prior_f + (default_jitter() ** 0.5) * \
                        tf.random.normal(prior_f.shape, dtype=prior_f.dtype)

            # Compute matrix square root $Cov(u, u)^{1/2}$
            Suu = gpflow.covariances.Kuu(Z,
                                         latent_kernel,
                                         jitter=default_jitter())
            Luu = tf.linalg.cholesky(Suu)

            # Sample $u ~ N(q_mu, q_sqrt)$ from inducing distribution $q(u)$
            q_mu = model.q_mu[:, latent_dim:latent_dim + 1]  # Mx1
            q_sqrt = model.q_sqrt[latent_dim]  # MxM
            rvs = tf.random.normal(shape=shape, dtype=dtype)
            induced_u = q_mu + tf.matmul(q_sqrt, rvs, transpose_b=True)
            if model.whiten:
                induced_u = Luu @ induced_u

            # Solve for $Cov(u, u)^{-1} (u - f(Z))$
            init = tf.linalg.adjoint(
                parallel_solve(solver=tf.linalg.cholesky_solve,
                               lhs=Luu,
                               rhs=induced_u - prior_u))

            assert tuple(init.shape) == tuple(shape)
            return tf.cast(init, dtype)
示例#2
0
    def _init_backwards_layers(self,
                               X,
                               Y,
                               Z,
                               mean_function=Zero(),
                               optimize_inducing_location=True,
                               Layer=SVGPLayer,
                               white=False):
        backlayers = []
        num_inputs = X.shape[1]
        num_outputs = Y.shape[1]
        num_inducing = Z.shape[0]

        for i in range(num_outputs):
            if i == 0: inducing_points = Z[:, :num_inputs]
            else: inducing_points = Z[:, num_inputs + num_outputs - i][:, None]
            layer = Layer(
                SquaredExponential(),
                inducing_points,
                Z[:, num_inputs + num_outputs - i - 1],
                [default_jitter()] * num_inducing,
                mean_function,
                optimize_inducing_location=optimize_inducing_location,
                white=white)
            backlayers.append(layer)
        return backlayers
示例#3
0
    def sample(self, batch_size, train_size, num_context, x_min, x_max):
        # [batch_size, num_context]
        x = np.random.uniform(x_min, x_max, size=(batch_size, train_size))
        x = np.expand_dims(
            x, 2)  # [batch_size, train_size=num_context + num_target, 1]
        x_context = np.array([
            np.random.choice(x[i, :, 0], size=num_context, replace=False)
            for i in range(batch_size)
        ])
        x_context = np.expand_dims(x_context, 2)

        knn = self.kernel(x)  # [batch_size, train_size, train_size]
        knn = ops.add_to_diagonal(knn, default_jitter())
        Lnn = np.linalg.cholesky(knn)
        Vnn = np.random.normal(size=(batch_size, train_size, 1))

        y = Lnn @ Vnn  # [batch_size, train_size]

        idx = [
            np.random.permutation(train_size)[:num_context]
            for i in range(batch_size)
        ]
        x_context = [x[i, idx[i], :] for i in range(batch_size)]
        x_context = np.array(x_context)
        y_context = [y[i, idx[i], :] for i in range(batch_size)]
        y_context = np.array(y_context)

        return x_context, y_context, x, y
示例#4
0
def test_independent_interdomain_conditional_whiten(whiten):
    """
    This test checks the effect of the `white` flag, which changes the projection matrix `A`.

    The impact of the flag on the value of `A` can be easily verified by its effect on the
    predicted mean. While the predicted covariance is also a function of `A` this test does not
    inspect that value.
    """
    N, P = Data.N, Data.P

    Lm = np.random.randn(1, 1, 1).astype(np.float32) ** 2
    Kmm = Lm * Lm + default_jitter()

    Kmn = tf.ones((1, 1, N, P))

    Knn = tf.ones((N, P))
    f = np.random.randn(1, 1).astype(np.float32)

    mean, _ = independent_interdomain_conditional(
        Kmn,
        Kmm,
        Knn,
        f,
        white=whiten,
    )

    if whiten:
        expected_mean = (f * Kmn) / Lm
    else:
        expected_mean = (f * Kmn) / Kmm

    np.testing.assert_allclose(mean, expected_mean[0][0], rtol=1e-2)
示例#5
0
    def _create_update_fn(batch_shape, prior_fn):
        Z, u = model.data
        sigma2 = model.likelihood.variance + default_jitter()
        if model.mean_function is not None:
            u = u - model.mean_function(Z)

        m = Z.shape[-2]
        Kuu = model.kernel(Z, full_cov=True)
        Suu = tf.linalg.set_diag(Kuu, tf.linalg.diag_part(Kuu) + sigma2)
        Luu = tf.linalg.cholesky(Suu)
        basis = KernelBasis(kernel=model.kernel, centers=Z)

        def w_init(shape, dtype=dtype):
            prior_f = prior_fn(Z)
            prior_u = prior_f + (sigma2 ** 0.5) * \
                        tf.random.normal(prior_f.shape, dtype=prior_f.dtype)

            init = tf.linalg.adjoint(
                parallel_solve(solver=tf.linalg.cholesky_solve,
                               lhs=Luu,
                               rhs=u - prior_u))
            assert tuple(init.shape) == tuple(shape)
            return tf.cast(init, dtype)

        weights = w_init(shape=batch_shape + [m])
        return BayesianLinearSampler(basis=basis,
                                     weights=weights,
                                     weight_initializer=w_init)
示例#6
0
def _sample_joint_conv2d(kern,
                         Z,
                         Xnew,
                         num_samples: int,
                         L: TensorLike = None,
                         diag: Union[float, tf.Tensor] = None):
    """
  Sample from the joint distribution of $f(X), g(Z)$ via a
  location-scale transform.
  """
    if diag is None:
        diag = default_jitter()

    # Construct joint covariance and compute matrix square root
    if L is None:
        Zp = Z.as_patches  # [M, patch_len]
        Xp = kern.get_patches(Xnew, full_spatial=False)
        P = tf.concat([Zp, tf.reshape(Xp, [-1, Xp.shape[-1]])], axis=0)
        K = kern.kernel(P, full_cov=True)
        K = tf.linalg.set_diag(K, tf.linalg.diag_part(K) + diag)
        L = tf.linalg.cholesky(K)
        L = tf.tile(L[None], [kern.channels_out, 1, 1])  # TODO: Improve me

    # Draw samples using a location-scale transform
    spatial_in = Xnew.shape[-3:-1]
    spatial_out = kern.get_spatial_out(spatial_in)
    rvs = tf.random.normal(list(L.shape[:-1]) + [num_samples], dtype=floatx())
    draws = tf.transpose(L @ rvs)  # [S, M + P, L]
    fz, fx = tf.split(draws, [len(Z), -1], axis=1)

    # Reorganize $f(X)$ as a 3d feature map
    fx_shape = [num_samples, Xnew.shape[0]] + spatial_out + [kern.channels_out]
    fx = tf.reshape(fx, fx_shape)
    return (fz, fx), L
示例#7
0
    def __init__(self,
                 kernel,
                 inducing_variables,
                 mean_function,
                 white=False,
                 **kwargs):
        super().__init__(**kwargs)

        self.inducing_points = inducing_variables

        self.num_inducing = inducing_variables.shape[0]
        m = inducing_variables.shape[1]

        # Initialise q_mu to y^2_pi(i)
        q_mu = np.zeros((self.num_inducing, 1))
        self.q_mu = Parameter(q_mu, dtype=default_float())

        # Initialise q_sqrt to near deterministic. Store as lower triangular matrix L.
        q_sqrt = 1e-4 * np.eye(self.num_inducing, dtype=default_float())
        self.q_sqrt = Parameter(q_sqrt, transform=triangular())

        self.kernel = kernel
        self.mean_function = mean_function
        self.white = white

        # Initialise to prior (Ku) + jitter.
        if not self.white:
            Ku = self.kernel(self.inducing_points)
            Ku += default_jitter() * tf.eye(self.num_inducing, dtype=Ku.dtype)
            Lu = tf.linalg.cholesky(Ku)
            q_sqrt = Lu
            self.q_sqrt = Parameter(q_sqrt, transform=triangular())
示例#8
0
def _linear_fallback(Z: TensorLike,
                     u: TensorLike,
                     f: TensorLike,
                     *,
                     L: TensorLike = None,
                     diag: TensorLike = None,
                     basis: AbstractBasis = None,
                     **kwargs):

    u_shape = tuple(u.shape)
    f_shape = tuple(f.shape)
    assert u_shape[-1] == 1, "Recieved multiple output features"
    assert u_shape == f_shape[-len(u_shape):], "Incompatible shapes detected"

    # Prepare diagonal term
    if diag is None:  # used by <GPflow.conditionals>
        diag = default_jitter()
    if isinstance(diag, float):
        diag = tf.convert_to_tensor(diag, dtype=f.dtype)
    diag = tf.expand_dims(diag, axis=-1)  # [M, 1] or [1, 1] or [1]

    # Extract "features" of Z
    if basis is None:
        if isinstance(Z, inducing_variables.InducingVariables):
            feat = inducing_to_tensor(Z)  # [M, D]
        else:
            feat = Z
    else:
        feat = basis(Z)  # [M, D] (maybe a different "D" than above)

    # Compute error term and matrix square root $Cov(u, u)^{1/2}$
    err = swap_axes(u - f, -3, -1)  # [1, M, S]
    err -= tf.sqrt(diag) * tf.random.normal(err.shape, dtype=err.dtype)
    M, D = feat.shape[-2:]
    if L is None:
        if D < M:
            feat_iDiag = feat * tf.math.reciprocal(diag)
            S = tf.matmul(feat_iDiag, feat, transpose_a=True)  # [D, D]
            L = tf.linalg.cholesky(S + tf.eye(S.shape[-1], dtype=S.dtype))
        else:
            K = tf.matmul(feat, feat, transpose_b=True)  # [M, M]
            K = tf.linalg.set_diag(K, tf.linalg.diag_part(K) + diag[..., 0])
            L = tf.linalg.cholesky(K)
    else:
        assert L.shape[-1] == min(M, D)  # TODO: improve me

    # Solve for $Cov(u, u)^{-1}(u - f(Z))$
    if D < M:
        feat_iDiag = feat * tf.math.reciprocal(diag)
        weights = tf.linalg.adjoint(
            tf.linalg.cholesky_solve(
                L, tf.matmul(feat_iDiag, err, transpose_a=True)))
    else:
        iK_err = tf.linalg.cholesky_solve(L, err)  # [S, M, 1]
        weights = tf.matmul(iK_err, feat, transpose_a=True)  # [S, 1, D]

    return DenseSampler(basis=basis,
                        weights=move_axis(weights, -2, -3),
                        **kwargs)
示例#9
0
    def predict_f(self,
                  Xnew: InputData,
                  full_cov: bool = False,
                  full_output_cov: bool = False) -> MeanAndVariance:
        """
        Compute the mean and variance of the latent function at some new points.
        Note that this is very similar to the SGPR prediction, for which
        there are notes in the SGPR notebook.

        Note: This model does not allow full output covariances.

        :param Xnew: points at which to predict
        """
        if full_output_cov:
            raise NotImplementedError

        pX = DiagonalGaussian(self.X_data_mean, self.X_data_var)

        Y_data = self.data
        num_inducing = self.inducing_variable.num_inducing
        psi1 = expectation(pX, (self.kernel, self.inducing_variable))
        psi2 = tf.reduce_sum(
            expectation(pX, (self.kernel, self.inducing_variable),
                        (self.kernel, self.inducing_variable)),
            axis=0,
        )
        jitter = default_jitter()
        Kus = covariances.Kuf(self.inducing_variable, self.kernel, Xnew)
        sigma2 = self.likelihood.variance
        sigma = tf.sqrt(sigma2)
        L = tf.linalg.cholesky(
            covariances.Kuu(self.inducing_variable, self.kernel,
                            jitter=jitter))

        A = tf.linalg.triangular_solve(L, tf.transpose(psi1),
                                       lower=True) / sigma
        tmp = tf.linalg.triangular_solve(L, psi2, lower=True)
        AAT = tf.linalg.triangular_solve(L, tf.transpose(tmp),
                                         lower=True) / sigma2
        B = AAT + tf.eye(num_inducing, dtype=default_float())
        LB = tf.linalg.cholesky(B)
        c = tf.linalg.triangular_solve(
            LB, tf.linalg.matmul(A, Y_data), lower=True) / sigma
        tmp1 = tf.linalg.triangular_solve(L, Kus, lower=True)
        tmp2 = tf.linalg.triangular_solve(LB, tmp1, lower=True)
        mean = tf.linalg.matmul(tmp2, c, transpose_a=True)
        if full_cov:
            var = (self.kernel(Xnew) +
                   tf.linalg.matmul(tmp2, tmp2, transpose_a=True) -
                   tf.linalg.matmul(tmp1, tmp1, transpose_a=True))
            shape = tf.stack([1, 1, tf.shape(Y_data)[1]])
            var = tf.tile(tf.expand_dims(var, 2), shape)
        else:
            var = (self.kernel(Xnew, full_cov=False) +
                   tf.reduce_sum(tf.square(tmp2), axis=0) -
                   tf.reduce_sum(tf.square(tmp1), axis=0))
            shape = tf.stack([1, tf.shape(Y_data)[1]])
            var = tf.tile(tf.expand_dims(var, 1), shape)
        return mean + self.mean_function(Xnew), var
示例#10
0
def test_inducing_variables_psd_schur(input_dim, inducing_variable, kernel):
    # Conditional variance must be PSD.
    X = np.random.randn(5, input_dim)
    Kuf_values = Kuf(inducing_variable, kernel, X)
    Kuu_values = Kuu(inducing_variable, kernel, jitter=default_jitter())
    Kff_values = kernel(X)
    Qff_values = Kuf_values.numpy().T @ np.linalg.solve(Kuu_values, Kuf_values)
    assert np.all(np.linalg.eig(Kff_values - Qff_values)[0] > 0.0)
示例#11
0
        def main(config):
            assert config is not None, ValueError
            tf.random.set_seed(config.seed)
            gpflow_config.set_default_float(config.floatx)
            gpflow_config.set_default_jitter(config.jitter)

            X = tf.random.uniform([config.num_test, config.input_dims],
                                  dtype=floatx())
            allK = []
            allZ = []
            Z_shape = config.num_cond, config.input_dims
            for cls in SupportedBaseKernels:
                minval = config.rel_lengthscales_min * (config.input_dims**0.5)
                maxval = config.rel_lengthscales_max * (config.input_dims**0.5)
                lenscales = tf.random.uniform(shape=[config.input_dims],
                                              minval=minval,
                                              maxval=maxval,
                                              dtype=floatx())

                rel_variance = tf.random.uniform(shape=[],
                                                 minval=0.9,
                                                 maxval=1.1,
                                                 dtype=floatx())

                allK.append(
                    cls(lengthscales=lenscales,
                        variance=config.kernel_variance * rel_variance))

                allZ.append(
                    InducingPoints(tf.random.uniform(Z_shape, dtype=floatx())))

            kern = kernels.SeparateIndependent(allK)
            Z = SeparateIndependentInducingVariables(allZ)

            Kuu = covariances.Kuu(Z,
                                  kern,
                                  jitter=gpflow_config.default_jitter())
            q_sqrt = tf.linalg.cholesky(Kuu)\
                     * tf.random.uniform(shape=[kern.num_latent_gps, 1, 1],
                                         minval=0.0,
                                         maxval=0.5,
                                         dtype=floatx())

            const = tf.random.normal([len(kern.kernels)], dtype=floatx())
            model = SVGP(kernel=kern,
                         likelihood=None,
                         inducing_variable=Z,
                         mean_function=mean_functions.Constant(c=const),
                         q_sqrt=q_sqrt,
                         whiten=False,
                         num_latent_gps=len(allK))

            mf, Sff = subroutine(config, model, X)
            mg, Sgg = model.predict_f(X, full_cov=True)
            tol = config.error_tol
            assert allclose(mf, mg, tol, tol)
            assert allclose(Sff, Sgg, tol, tol)
示例#12
0
 def KL(self):
     """The KL divergence from variational distribution to the prior."""
     if self.white:
         return kullback_leiblers.gauss_kl(self.q_mu,
                                           self.q_sqrt[None, :, :], None)
     else:
         K = self.kernel(self.inducing_points)
         K += default_jitter() * tf.eye(self.num_inducing, dtype=K.dtype)
         return kullback_leiblers.gauss_kl(self.q_mu,
                                           self.q_sqrt[None, :, :], K)
示例#13
0
    def compute_qu(self, full_cov: bool = True) -> Tuple[tf.Tensor, tf.Tensor]:
        """
        Computes the mean and variance of q(u) = N(mu, cov), the variational distribution on
        inducing outputs. SVGP with this q(u) should predict identically to
        SGPR.
        The derivation is at follows:
        q(u)=N(u | m, S)
        with:
        S=Kuu^{-1}+ [Kuu^{-1}* Kuf * Kfu * Kuu^{-1} * beta]
        m=S^{-1} Kuu^{-1} Kuf y beta

        were sigma^-2 = beta
        
        :return: mu, cov
        """

        Y_data = self.data

        X_data_mean, X_data_var = self.encoder(Y_data)

        pX = DiagonalGaussian(X_data_mean, X_data_var)

        # num_inducing = self.inducing_variable.num_inducing

        #E_qx[Kfu]
        psi1 = expectation(pX, (self.kernel, self.inducing_variable))
        #E_qx[Kuf@Kfu]
        psi2 = tf.reduce_sum(
            expectation(pX, (self.kernel, self.inducing_variable),
                        (self.kernel, self.inducing_variable)),
            axis=0)

        kuu = covariances.Kuu(self.inducing_variable,
                              self.kernel,
                              jitter=default_jitter())
        kuf = tf.transpose(psi1)

        sig = kuu + psi2 * (self.likelihood.variance**-1)
        sig_sqrt = tf.linalg.cholesky(sig)

        sig_sqrt_kuu = tf.linalg.triangular_solve(sig_sqrt, kuu)
        # [M,M] -> [M(M +1)//2] =/= [M,D]

        cov = tf.linalg.matmul(sig_sqrt_kuu, sig_sqrt_kuu, transpose_a=True)

        err = Y_data - self.mean_function(X_data_mean)

        mu = (tf.linalg.matmul(sig_sqrt_kuu,
                               tf.linalg.triangular_solve(
                                   sig_sqrt, tf.linalg.matmul(kuf, err)),
                               transpose_a=True) / self.likelihood.variance)
        if not full_cov:
            return mu, cov
        else:
            return mu, tf.tile(cov[None, :, :], [mu.shape[-1], 1, 1])
  def build_cache(cls, model: gpflow.models.SVGP):
    assert model.q_sqrt.shape.ndims == 3 and model.q_sqrt.shape[0] == 1
    q_mu = model.q_mu
    q_sqrt = model.q_sqrt[0]

    Z = model.inducing_variable
    Suu = gpflow.covariances.Kuu(Z, model.kernel, jitter=default_jitter())
    return CacheLocationScaleSamplerSVGP(Z=Z,
                                         Luu=tf.linalg.cholesky(Suu),
                                         q_mu=q_mu,
                                         q_sqrt=q_sqrt)
示例#15
0
    def conditional_ND(self, X, full_cov=False):
        # X is [S,N,D]
        Kmm = Kuu(self.inducing_points, self.kernel, jitter=default_jitter())
        Lmm = tf.linalg.cholesky(Kmm)
        Kmm_tiled = tf.tile(tf.expand_dims(Kmm, 0), (self.num_outputs, 1, 1))
        Lmm_tiled = tf.tile(tf.expand_dims(Lmm, 0), (self.num_outputs, 1, 1))

        Kmn = Kuf(self.inducing_points, self.kernel, X)  # K(Z,X)
        # alpha(X) = k(Z,Z)^{-1}k(Z,X), = L^{-T}L^{-1}k(Z,X)
        A = tf.linalg.triangular_solve(Lmm, Kmn, lower=True)  # L^{-1}k(Z,X)
        if not self.white:
            # L^{-T}L^{-1}K(Z,X) is [M,N]
            A = tf.linalg.triangular_solve(tf.transpose(Lmm), A, lower=False)

        # m = alpha(X)^T(q_mu - m(Z)) = alpha(X)^T(q_mu) if zero mean function.
        mean = tf.matmul(A, self.q_mu, transpose_a=True)  # [N]

        # [D_out,M,N]
        A_tiled = tf.tile(A[None, :, :], [self.num_outputs, 1, 1])
        I = tf.eye(self.num_inducing, dtype=default_float())[None, :, :]

        # var = k(X,X) - alpha(X)^T(k(Z,Z)-q_sqrtq_sqrt^T)alpha(X)
        if self.white:
            SK = -I
        else:
            # -k(Z,Z)
            SK = -Kmm_tiled  # [D_out,M,M]

        if self.q_sqrt is not None:
            # SK = -k(Z,Z) + q_sqrtq_sqrt^T
            # [D_out,M,M]
            SK += tf.matmul(self.q_sqrt, self.q_sqrt, transpose_b=True)

        # B = -(k(Z,Z) - q_sqrtq_sqrt^T)alpha(X)
        B = tf.matmul(SK, A_tiled)  # [D_out,M,N]

        if full_cov:
            # delta_cov = -alpha(X)^T(k(Z,Z) - q_sqrtq_sqrt^T)alpha(X)
            delta_cov = tf.matmul(A_tiled, B, transpose_a=True)  # [D_out,N,N]
            # Knn = k(X,X)
            Knn = self.kernel.K(X)
        else:
            # Summing over dimension 1 --> sum variances due to other.
            # Is this legit?
            delta_cov = tf.reduce_sum(A_tiled * B, 1)
            #delta_cov = tf.linalg.diag_part(tf.matmul(A_tiled, B,
            #    transpose_a=True)) # [D_out,N]
            Knn = self.kernel.K_diag(X)  # [N]

        var = tf.expand_dims(Knn, 0) + delta_cov  # [D_out,N]
        var = tf.transpose(var)

        return mean + self.mean_function(X), var
示例#16
0
    def __init__(self,
                 kernel,
                 inducing_variables,
                 num_outputs,
                 mean_function,
                 input_prop_dim=None,
                 white=False,
                 **kwargs):
        super().__init__(input_prop_dim, **kwargs)

        self.num_inducing = inducing_variables.shape[0]
        self.mean_function = mean_function
        self.num_outputs = num_outputs
        self.white = white

        self.kernels = []
        for i in range(self.num_outputs):
            self.kernels.append(copy.deepcopy(kernel))

        # Initialise q_mu to all zeros
        q_mu = np.zeros((self.num_inducing, num_outputs))
        self.q_mu = Parameter(q_mu, dtype=default_float())

        # Initialise q_sqrt to identity function
        #q_sqrt = tf.tile(tf.expand_dims(tf.eye(self.num_inducing,
        #    dtype=default_float()), 0), (num_outputs, 1, 1))
        q_sqrt = [
            np.eye(self.num_inducing, dtype=default_float())
            for _ in range(num_outputs)
        ]
        q_sqrt = np.array(q_sqrt)
        # Store as lower triangular matrix L.
        self.q_sqrt = Parameter(q_sqrt, transform=triangular())

        # Initialise to prior (Ku) + jitter.
        if not self.white:
            Kus = [
                self.kernels[i].K(inducing_variables)
                for i in range(self.num_outputs)
            ]
            Lus = [
                np.linalg.cholesky(Kus[i] + np.eye(self.num_inducing) *
                                   default_jitter())
                for i in range(self.num_outputs)
            ]
            q_sqrt = Lus
            q_sqrt = np.array(q_sqrt)
            self.q_sqrt = Parameter(q_sqrt, transform=triangular())

        self.inducing_points = []
        for i in range(self.num_outputs):
            self.inducing_points.append(
                inducingpoint_wrapper(inducing_variables))
  def build_cache(cls, model: gpflow.models.GPR):
    Z, err = model.data
    sigma2 = model.likelihood.variance + default_jitter()
    if model.mean_function is not None:
      err -= model.mean_function(Z)

    Kuu = model.kernel(Z, full_cov=True)
    Suu = tf.linalg.set_diag(Kuu, tf.linalg.diag_part(Kuu) + sigma2)
    Luu = tf.linalg.cholesky(Suu)
    iLuu_err = parallel_solve(tf.linalg.triangular_solve, Luu, err)
    return CacheLocationScaleSamplerGPR(Z=Z,
                                        Luu=tf.linalg.cholesky(Suu),
                                        iLuu_err=iLuu_err)
示例#18
0
def residual_variances(model):
    X_data, Y_data = model.data

    Kdiag = model.kernel(X_data, full_cov=False)
    kuu = Kuu(model.inducing_variable, model.kernel, jitter=default_jitter())
    kuf = Kuf(model.inducing_variable, model.kernel, X_data)

    L = tf.linalg.cholesky(kuu)
    A = tf.linalg.triangular_solve(L, kuf, lower=True)

    c = Kdiag - tf.reduce_sum(tf.square(A), 0)

    return c.numpy()
示例#19
0
def reparameterise(mean, var, z, full_cov=False):
    """Implements the reparameterisation trick for the Gaussian, either full
    rank or diagonal.

    If z is a sample from N(0,I), the output is a sample from N(mean,var).

    :mean: A tensor, the mean of shape [S,N,1].
    :var: A tensor, the coariance of shape [S,N,1] or [S,N,N].
    :z: A tensor, samples from a unit Gaussian of shape [S,N,1].
    :full_cov: A boolean, indicates the shape of var."""
    if var is None:
        return mean

    if full_cov is False:
        return mean + z * (var + default_jitter())**0.5

    else:
        S, N = tf.shape(mean)[0], tf.shape(mean)[1]
        I = default_jitter() * tf.eye(N, dtype=default_float())\
                [None, :, :] # [1,N,N]
        chol = tf.linalg.cholesky(var + I)  # [S,N,N]
        f = mean + tf.matmul(chol, z)
        return f  # [S,N,1]
示例#20
0
def _conditional_train(
    Xnew: tf.Tensor,
    inducing_variable: InducingVariables,
    kernel: Kernel,
    f: tf.Tensor,
    *,
    full_cov=False,
    full_output_cov=False,
    q_sqrt=None,
    white=False,
):
    """
    Single-output GP conditional.

    The covariance matrices used to calculate the conditional have the following shape:
    - Kuu: [M, M]
    - Kuf: [M, N]
    - Kff: [N, N]

    Further reference
    -----------------
    - See `gpflow.conditionals._conditional` (below) for a detailed explanation of
      conditional in the single-output case.
    - See the multiouput notebook for more information about the multiouput framework.

    Parameters
    ----------
    :param Xnew: data matrix, size [N, D].
    :param f: data matrix, [M, R]
    :param full_cov: return the covariance between the datapoints
    :param full_output_cov: return the covariance between the outputs.
           NOTE: as we are using a single-output kernel with repetitions
                 these covariances will be zero.
    :param q_sqrt: matrix of standard-deviations or Cholesky matrices,
        size [M, R] or [R, M, M].
    :param white: boolean of whether to use the whitened representation
    :return:
        - mean:     [N, R]
        - variance: [N, R], [R, N, N], [N, R, R] or [N, R, N, R]
        Please see `gpflow.conditional._expand_independent_outputs` for more information
        about the shape of the variance, depending on `full_cov` and `full_output_cov`.
    """
    Kmm = Kuu(inducing_variable, kernel, jitter=default_jitter())  # [M, M]
    Kmn = Kuf(inducing_variable, kernel, Xnew)  # [M, N]
    Knn = kernel.diag_tr() #uses optimzied function to calculate the covariances
    fmean, fvar = base_conditional(
        Kmn, Kmm, Knn, f, full_cov=full_cov, q_sqrt=q_sqrt, white=white
    )  # [N, R],  [R, N, N] or [N, R]
    return fmean, expand_independent_outputs(fvar, full_cov, full_output_cov)
 def _generate_u(self, num_samples: int, L: tf.Tensor = None):
   """
   Returns samples $u ~ q(u)$.
   """
   q_sqrt = tf.linalg.band_part(self.q_sqrt, -1, 0)
   shape = self.num_latent_gps, q_sqrt.shape[-1], num_samples
   rvs = tf.random.normal(shape, dtype=default_float())  # [L, M, S]
   uT = q_sqrt @ rvs + tf.transpose(self.q_mu)[..., None]
   if self.whiten:
     if L is None:
       Z = self.inducing_variable
       K = covariances.Kuu(Z, self.kernel, jitter=default_jitter())
       L = tf.linalg.cholesky(K)
     uT = L @ uT
   return tf.transpose(uT)  # [S, M, L]
示例#22
0
def test_fully_correlated_conditional_repeat_shapes(func, R):
    L, M, N, P = Data.L, Data.M, Data.N, Data.P

    Kmm = tf.ones((L * M, L * M)) + default_jitter() * tf.eye(L * M)
    Kmn = tf.ones((L * M, N, P))
    Knn = tf.ones((N, P))
    f = tf.ones((L * M, R))
    q_sqrt = None
    white = True

    m, v = func(
        Kmn, Kmm, Knn, f, full_cov=False, full_output_cov=False, q_sqrt=q_sqrt, white=white,
    )

    assert v.shape.as_list() == m.shape.as_list()
  def __call__(self, X: TensorType,
               sample_shape: List[int] = None,
               full_cov: bool = None) -> tf.Tensor:

    if full_cov is None:
       full_cov = self.full_cov

    if sample_shape is None:
      sample_shape = self.sample_shape

    # Get or compute required terms
    Z, Luu, q_mu, q_sqrt = self.cache
    rvs = tf.random.normal(shape=list(sample_shape) + list(X.shape[:-1]),
                           dtype=X.dtype)

    # Solve for $Cov(u, u)^{-1/2} Cov(u, f)$
    # [!] Fix me: doesn't broadcast in the desired way
    #     Kuf = gpflow.covariances.Kuf(Z, self.model.kernel, X)
    Kuf = tf.linalg.adjoint(self.model.kernel(X, Z.Z))
    iLuu_Kuf = parallel_solve(tf.linalg.triangular_solve, Luu, Kuf)

    # Compute and draw samples from posterior
    if self.model.whiten:
      m = tf.matmul(iLuu_Kuf, q_mu, transpose_a=True)
      A = tf.matmul(iLuu_Kuf, q_sqrt, transpose_a=True)
    else:
      iSuu_Kuf = parallel_solve(tf.linalg.triangular_solve,
                                tf.linalg.adjoint(Luu),
                                iLuu_Kuf,
                                lower=False)
      m = tf.matmul(iSuu_Kuf, q_mu, transpose_a=True)
      A = tf.matmul(iSuu_Kuf, q_sqrt, transpose_a=True)

    if self.model.mean_function is not None:
      m += self.model.mean_function(X)

    if full_cov:
      S = self.model.kernel(X, full_cov=True) \
          + tf.matmul(A, A, transpose_b=True) \
          - tf.matmul(iLuu_Kuf, iLuu_Kuf, transpose_a=True)

      L = tf.linalg.cholesky(
            tf.linalg.set_diag(S, tf.linalg.diag_part(S) + default_jitter()))
      return m + tf.expand_dims(tf.linalg.matvec(L, rvs), -1)

    v = self.model.kernel(X, full_cov=False) \
        + tf.reduce_sum(tf.square(A) - tf.square(iLuu_Kuf), axis=-2)
    return m + tf.expand_dims(tf.sqrt(v) * rvs, axis=-1)
示例#24
0
def test_equivalence_vgp_and_opper_archambeau():
    kernel = gpflow.kernels.Matern52()
    likelihood = gpflow.likelihoods.StudentT()

    vgp_oa_model = _create_vgpao_model(kernel, likelihood, DatumVGP.q_alpha,
                                       DatumVGP.q_lambda)

    K = kernel(DatumVGP.X) + np.eye(DatumVGP.N) * default_jitter()
    L = np.linalg.cholesky(K)
    L_inv = np.linalg.inv(L)
    K_inv = np.linalg.inv(K)

    mean = K @ DatumVGP.q_alpha

    prec_dnn = K_inv[None, :, :] + np.array(
        [np.diag(l**2) for l in DatumVGP.q_lambda.T])
    var_dnn = np.linalg.inv(prec_dnn)

    svgp_model_unwhitened = _create_svgp_model(kernel,
                                               likelihood,
                                               mean,
                                               np.linalg.cholesky(var_dnn),
                                               whiten=False)

    mean_white_nd = L_inv.dot(mean)
    var_white_dnn = np.einsum('nN,dNM,mM->dnm', L_inv, var_dnn, L_inv)
    q_sqrt_nnd = np.linalg.cholesky(var_white_dnn)

    vgp_model = _create_vgp_model(kernel, likelihood, mean_white_nd,
                                  q_sqrt_nnd)

    likelihood_vgp = vgp_model.log_likelihood()
    likelihood_vgp_oa = vgp_oa_model.log_likelihood()
    likelihood_svgp_unwhitened = svgp_model_unwhitened.log_likelihood(
        DatumVGP.data)

    assert_allclose(likelihood_vgp, likelihood_vgp_oa, rtol=1e-2)
    assert_allclose(likelihood_vgp, likelihood_svgp_unwhitened, rtol=1e-2)

    vgp_oa_mu, vgp_oa_var = vgp_oa_model.predict_f(DatumVGP.Xs)
    svgp_unwhitened_mu, svgp_unwhitened_var = svgp_model_unwhitened.predict_f(
        DatumVGP.Xs)
    vgp_mu, vgp_var = vgp_model.predict_f(DatumVGP.Xs)

    assert_allclose(vgp_oa_mu, vgp_mu)
    assert_allclose(vgp_oa_var, vgp_var, rtol=1e-4)  # jitter?
    assert_allclose(svgp_unwhitened_mu, vgp_mu)
    assert_allclose(svgp_unwhitened_var, vgp_var, rtol=1e-4)
示例#25
0
def _test_cg_svgp(config: ConfigDense,
                  model: SVGP,
                  Xnew: tf.Tensor) -> tf.Tensor:
  """
  Sample generation subroutine common to each unit test
  """
  # Prepare preconditioner for CG
  Z = model.inducing_variable
  Kff = covariances.Kuu(Z, model.kernel, jitter=0)
  max_rank = config.num_cond//(2 if config.num_cond > 1 else 1)
  preconditioner = get_default_preconditioner(Kff,
                                              diag=default_jitter(),
                                              max_rank=max_rank)

  count = 0
  samples = []
  L_joint = None
  while count < config.num_samples:
    # Sample $u ~ N(q_mu, q_sqrt q_sqrt^{T})$
    size = min(config.shard_size, config.num_samples - count)
    shape = model.num_latent_gps, config.num_cond, size
    rvs = tf.random.normal(shape=shape, dtype=floatx())
    u = tf.transpose(model.q_sqrt @ rvs)

    # Generate draws from the joint distribution $p(f(X), g(Z))$
    (f, fnew), L_joint = common.sample_joint(model.kernel,
                                             Z,
                                             Xnew,
                                             num_samples=size,
                                             L=L_joint)

    # Solve for update functions
    update_fns = cg_update(model.kernel,
                           Z,
                           u,
                           f,
                           tol=1e-6,
                           max_iter=config.num_cond,
                           preconditioner=preconditioner)

    samples.append(fnew + update_fns(Xnew))
    count += size

  samples = tf.concat(samples, axis=0)
  if model.mean_function is not None:
    samples += model.mean_function(Xnew)
  return samples
示例#26
0
def _exact_independent(kern: kernels.MultioutputKernel,
                       Z: TensorLike,
                       u: TensorLike,
                       f: TensorLike,
                       *,
                       L: TensorLike = None,
                       diag: TensorLike = None,
                       basis: AbstractBasis = None,
                       multioutput_axis: int = 0,
                       **kwargs):
    """
  Return (independent) pathwise updates for each of the latent prior processes
  $f$ subject to the condition $p(f | u) = N(f | u, diag)$ on $f = f(Z)$.
  """
    u_shape = tuple(u.shape)
    f_shape = tuple(f.shape)
    assert u_shape[
        -1] == kern.num_latent_gps, "Num. outputs != num. latent GPs"
    assert u_shape == f_shape[-len(u_shape):], "Incompatible shapes detected"
    if basis is None:  # finite-dimensional basis used to express the update
        basis = kernel_basis(kern, centers=Z)

    # Prepare diagonal term
    if diag is None:  # used by <GPflow.conditionals>
        diag = default_jitter()
    if isinstance(diag, float):
        diag = tf.convert_to_tensor(diag, dtype=f.dtype)
    diag = tf.expand_dims(diag, axis=-1)  # ([L] or []) + ([M] or []) + [1]

    # Compute error term and matrix square root $Cov(u, u)^{1/2}$
    err = swap_axes(u - f, -3, -1)  # [L, M, S]
    err -= tf.sqrt(diag) * tf.random.normal(err.shape, dtype=err.dtype)
    if L is None:
        if isinstance(Z, inducing_variables.InducingVariables):
            K = covariances.Kuu(Z, kern, jitter=0.0)
        else:
            K = kern(Z, full_cov=True, full_output_cov=False)
        K = tf.linalg.set_diag(K, tf.linalg.diag_part(K) + diag[..., 0])
        L = tf.linalg.cholesky(K)

    # Solve for $Cov(u, u)^{-1}(u - f(Z))$
    weights = move_axis(tf.linalg.cholesky_solve(L, err), -1, -3)  # [S, L, M]
    return MultioutputDenseSampler(basis=basis,
                                   weights=weights,
                                   multioutput_axis=multioutput_axis,
                                   **kwargs)
示例#27
0
        def main(config):
            assert config is not None, ValueError
            tf.random.set_seed(config.seed)
            gpflow_config.set_default_float(config.floatx)
            gpflow_config.set_default_jitter(config.jitter)

            X = tf.random.uniform([config.num_test, config.input_dims],
                                  dtype=floatx())
            Z_shape = config.num_cond, config.input_dims
            for cls in SupportedBaseKernels:
                minval = config.rel_lengthscales_min * (config.input_dims**0.5)
                maxval = config.rel_lengthscales_max * (config.input_dims**0.5)
                lenscales = tf.random.uniform(shape=[config.input_dims],
                                              minval=minval,
                                              maxval=maxval,
                                              dtype=floatx())

                base = cls(lengthscales=lenscales,
                           variance=config.kernel_variance)
                kern = kernels.SharedIndependent(base, output_dim=2)

                Z = SharedIndependentInducingVariables(
                    InducingPoints(tf.random.uniform(Z_shape, dtype=floatx())))
                Kuu = covariances.Kuu(Z,
                                      kern,
                                      jitter=gpflow_config.default_jitter())
                q_sqrt = tf.stack([
                    tf.zeros(2 * [config.num_cond], dtype=floatx()),
                    tf.linalg.cholesky(Kuu)
                ])

                const = tf.random.normal([2], dtype=floatx())
                model = SVGP(kernel=kern,
                             likelihood=None,
                             inducing_variable=Z,
                             mean_function=mean_functions.Constant(c=const),
                             q_sqrt=q_sqrt,
                             whiten=False,
                             num_latent_gps=2)

                mf, Sff = subroutine(config, model, X)
                mg, Sgg = model.predict_f(X, full_cov=True)
                tol = config.error_tol
                assert allclose(mf, mg, tol, tol)
                assert allclose(Sff, Sgg, tol, tol)
示例#28
0
    def conditional(self, X, full_cov=False):
        # X is [N,D] or [S*N,D]

        #Kmm = Kuu(self.inducing_points, self.kernel, jitter=default_jitter()) #[M,M]
        Kmm = self.kernel(self.inducing_points)
        Kmm += default_jitter() * tf.eye(self.num_inducing, dtype=Kmm.dtype)
        Lmm = tf.linalg.cholesky(Kmm)
        #Kmn = Kuf(self.inducing_points, self.kernel, X) #[M,N]
        Kmn = self.kernel(self.inducing_points, X)

        # alpha(X) = k(Z,Z)^{-1}k(Z,X), = L^{-T}L^{-1}k(Z,X)
        A = tf.linalg.triangular_solve(Lmm, Kmn, lower=True)  # L^{-1}k(Z,X)
        if not self.white:
            # L^{-T}L^{-1}K(Z,X) is [M,N]
            A = tf.linalg.triangular_solve(tf.transpose(Lmm), A, lower=False)

        # m = alpha(X)^T(q_mu - m(Z))
        mean = tf.matmul(A,
                         self.q_mu - self.mean_function(self.inducing_points),
                         transpose_a=True)  # [N,1]

        I = tf.eye(self.num_inducing, dtype=default_float())

        # var = k(X,X) - alpha(X)^T(k(Z,Z)-q_sqrtq_sqrt^T)alpha(X)
        if self.white: SK = -I
        else: SK = -Kmm

        if self.q_sqrt is not None:  # SK = -k(Z,Z) + q_sqrtq_sqrt^T
            SK += tf.matmul(self.q_sqrt, self.q_sqrt, transpose_b=True)

        # B = -(k(Z,Z) - q_sqrtq_sqrt^T)alpha(X)
        B = tf.matmul(SK, A)  #[M,N]

        if full_cov:
            # delta_cov = -alpha(X)^T(k(Z,Z) - q_sqrtq_sqrt^T)alpha(X)
            delta_cov = tf.matmul(A, B, transpose_a=True)  # [N,N]
            Knn = self.kernel(X, full_cov=True, presliced=False)
        else:
            delta_cov = tf.reduce_sum(A * B, 0)
            Knn = self.kernel(X, full_cov=False, presliced=False)

        var = Knn + delta_cov
        var = tf.transpose(var)

        return mean + self.mean_function(X), var
        def log_likelihood(self):
            """
            Computes the log likelihood.
            """
            x, y = self.data
            K = self.kernel(x)
            num_data = x.shape[0]
            k_diag = tf.linalg.diag_part(K)
            s_diag = tf.convert_to_tensor(self.likelihood.variance)
            jitter = tf.cast(tf.fill([num_data], default_jitter()),
                             'float64')  # stabilize K matrix w/jitter
            ks = tf.linalg.set_diag(K, k_diag + s_diag + jitter)
            L = tf.linalg.cholesky(ks)
            m = self.mean_function(x)

            # [R,] log-likelihoods for each independent dimension of Y
            log_prob = multivariate_normal(y, m, L)
            return tf.reduce_sum(log_prob)
示例#30
0
def _sample_joint_inducing(kern,
                           Z,
                           Xnew,
                           num_samples: int,
                           L: TensorLike = None,
                           diag: Union[float, tf.Tensor] = None):
    """
  Sample from the joint distribution of $f(X), g(Z)$ via a
  location-scale transform.
  """
    if diag is None:
        diag = default_jitter()

    # Construct joint covariance and compute matrix square root
    has_multiple_outputs = isinstance(kern, MultioutputKernel)
    if L is None:
        if has_multiple_outputs:
            Kff = kern(Xnew, full_cov=True, full_output_cov=False)
        else:
            Kff = kern(Xnew, full_cov=True)
        Kuu = covariances.Kuu(Z, kern, jitter=0.0)
        Kuf = covariances.Kuf(Z, kern, Xnew)
        if isinstance(kern, SharedIndependent) and \
           isinstance(Z, SharedIndependentInducingVariables):
            Kuu = tf.tile(Kuu[None], [Kff.shape[0], 1, 1])
            Kuf = tf.tile(Kuf[None], [Kff.shape[0], 1, 1])

        K = tf.concat([
            tf.concat([Kuu, Kuf], axis=-1),
            tf.concat([tf.linalg.adjoint(Kuf), Kff], axis=-1)
        ],
                      axis=-2)

        K = tf.linalg.set_diag(K, tf.linalg.diag_part(K) + diag)
        L = tf.linalg.cholesky(K)

    # Draw samples using a location-scale transform
    rvs = tf.random.normal(list(L.shape[:-1]) + [num_samples], dtype=floatx())
    draws = L @ rvs  # [L, M + N, S] or [M + N, S]
    if not has_multiple_outputs:
        draws = tf.expand_dims(draws, 0)

    return tf.split(tf.transpose(draws), [-1, Xnew.shape[0]], axis=-2), L