示例#1
0
def test_compare_mixed_kernel(session_tf):
    data = DataMixedKernel

    kern_list = [RBF(data.D) for _ in range(data.L)]
    k1 = mk.SeparateMixedMok(kern_list, W=data.W)
    f1 = mf.SharedIndependentMof(InducingPoints(data.X[:data.M, ...].copy()))
    m1 = SVGP(data.X,
              data.Y,
              k1,
              Gaussian(),
              feat=f1,
              q_mu=data.mu_data,
              q_sqrt=data.sqrt_data)

    kern_list = [RBF(data.D) for _ in range(data.L)]
    k2 = mk.SeparateMixedMok(kern_list, W=data.W)
    f2 = mf.MixedKernelSharedMof(InducingPoints(data.X[:data.M, ...].copy()))
    m2 = SVGP(data.X,
              data.Y,
              k2,
              Gaussian(),
              feat=f2,
              q_mu=data.mu_data,
              q_sqrt=data.sqrt_data)

    check_equality_predictions(session_tf, [m1, m2])
示例#2
0
def test_MixedKernelSeparateMof():
    data = DataMixedKernel

    kern_list = [SquaredExponential() for _ in range(data.L)]
    inducing_variable_list = [
        InducingPoints(data.X[:data.M, ...]) for _ in range(data.L)
    ]
    k1 = mk.LinearCoregionalization(kern_list, W=data.W)
    f1 = mf.SeparateIndependentInducingVariables(inducing_variable_list)
    model_1 = SVGP(k1,
                   Gaussian(),
                   inducing_variable=f1,
                   q_mu=data.mu_data,
                   q_sqrt=data.sqrt_data)

    kern_list = [SquaredExponential() for _ in range(data.L)]
    inducing_variable_list = [
        InducingPoints(data.X[:data.M, ...]) for _ in range(data.L)
    ]
    k2 = mk.LinearCoregionalization(kern_list, W=data.W)
    f2 = mf.SeparateIndependentInducingVariables(inducing_variable_list)
    model_2 = SVGP(k2,
                   Gaussian(),
                   inducing_variable=f2,
                   q_mu=data.mu_data,
                   q_sqrt=data.sqrt_data)

    check_equality_predictions(Data.data, [model_1, model_2])
示例#3
0
def test_multioutput_with_diag_q_sqrt(session_tf):
    data = DataMixedKernel

    q_sqrt_diag = np.ones((data.M, data.L)) * 2
    q_sqrt = np.repeat(np.eye(data.M)[None, ...], data.L,
                       axis=0) * 2  # L x M x M

    kern_list = [RBF(data.D) for _ in range(data.L)]
    k1 = mk.SeparateMixedMok(kern_list, W=data.W)
    f1 = mf.SharedIndependentMof(InducingPoints(data.X[:data.M, ...].copy()))
    m1 = SVGP(data.X,
              data.Y,
              k1,
              Gaussian(),
              feat=f1,
              q_mu=data.mu_data,
              q_sqrt=q_sqrt_diag,
              q_diag=True)

    kern_list = [RBF(data.D) for _ in range(data.L)]
    k2 = mk.SeparateMixedMok(kern_list, W=data.W)
    f2 = mf.SharedIndependentMof(InducingPoints(data.X[:data.M, ...].copy()))
    m2 = SVGP(data.X,
              data.Y,
              k2,
              Gaussian(),
              feat=f2,
              q_mu=data.mu_data,
              q_sqrt=q_sqrt,
              q_diag=False)

    check_equality_predictions(session_tf, [m1, m2])
示例#4
0
def test_multioutput_with_diag_q_sqrt():
    data = DataMixedKernel

    q_sqrt_diag = np.ones((data.M, data.L)) * 2
    q_sqrt = np.repeat(np.eye(data.M)[None, ...], data.L,
                       axis=0) * 2  # L x M x M

    kern_list = [SquaredExponential() for _ in range(data.L)]
    k1 = mk.LinearCoregionalization(kern_list, W=data.W)
    f1 = mf.SharedIndependentInducingVariables(
        InducingPoints(data.X[:data.M, ...]))
    model_1 = SVGP(
        k1,
        Gaussian(),
        inducing_variable=f1,
        q_mu=data.mu_data,
        q_sqrt=q_sqrt_diag,
        q_diag=True,
    )

    kern_list = [SquaredExponential() for _ in range(data.L)]
    k2 = mk.LinearCoregionalization(kern_list, W=data.W)
    f2 = mf.SharedIndependentInducingVariables(
        InducingPoints(data.X[:data.M, ...]))
    model_2 = SVGP(
        k2,
        Gaussian(),
        inducing_variable=f2,
        q_mu=data.mu_data,
        q_sqrt=q_sqrt,
        q_diag=False,
    )

    check_equality_predictions(Data.data, [model_1, model_2])
示例#5
0
        def main(config):
            assert config is not None, ValueError
            tf.random.set_seed(config.seed)
            gpflow_config.set_default_float(config.floatx)
            gpflow_config.set_default_jitter(config.jitter)

            X = tf.random.uniform([config.num_test, config.input_dims],
                                  dtype=floatx())
            Z_shape = config.num_cond, config.input_dims
            for cls in SupportedBaseKernels:
                minval = config.rel_lengthscales_min * (config.input_dims**0.5)
                maxval = config.rel_lengthscales_max * (config.input_dims**0.5)
                lenscales = tf.random.uniform(shape=[config.input_dims],
                                              minval=minval,
                                              maxval=maxval,
                                              dtype=floatx())

                q_sqrt = tf.zeros([1] + 2 * [config.num_cond], dtype=floatx())
                kern = cls(lengthscales=lenscales,
                           variance=config.kernel_variance)
                Z = InducingPoints(tf.random.uniform(Z_shape, dtype=floatx()))

                const = tf.random.normal([1], dtype=floatx())
                model = SVGP(kernel=kern,
                             likelihood=None,
                             inducing_variable=Z,
                             mean_function=mean_functions.Constant(c=const),
                             q_sqrt=q_sqrt)

                mf, Sff = subroutine(config, model, X)
                mg, Sgg = model.predict_f(X, full_cov=True)

                tol = config.error_tol
                assert allclose(mf, mg, tol, tol)
                assert allclose(Sff, Sgg, tol, tol)
示例#6
0
        def main(config):
            assert config is not None, ValueError
            tf.random.set_seed(config.seed)
            gpflow_config.set_default_float(config.floatx)
            gpflow_config.set_default_jitter(config.jitter)

            X = tf.random.uniform([config.num_test, config.input_dims],
                                  dtype=floatx())
            allK = []
            allZ = []
            Z_shape = config.num_cond, config.input_dims
            for cls in SupportedBaseKernels:
                minval = config.rel_lengthscales_min * (config.input_dims**0.5)
                maxval = config.rel_lengthscales_max * (config.input_dims**0.5)
                lenscales = tf.random.uniform(shape=[config.input_dims],
                                              minval=minval,
                                              maxval=maxval,
                                              dtype=floatx())

                rel_variance = tf.random.uniform(shape=[],
                                                 minval=0.9,
                                                 maxval=1.1,
                                                 dtype=floatx())

                allK.append(
                    cls(lengthscales=lenscales,
                        variance=config.kernel_variance * rel_variance))

                allZ.append(
                    InducingPoints(tf.random.uniform(Z_shape, dtype=floatx())))

            kern = kernels.SeparateIndependent(allK)
            Z = SeparateIndependentInducingVariables(allZ)

            Kuu = covariances.Kuu(Z,
                                  kern,
                                  jitter=gpflow_config.default_jitter())
            q_sqrt = tf.linalg.cholesky(Kuu)\
                     * tf.random.uniform(shape=[kern.num_latent_gps, 1, 1],
                                         minval=0.0,
                                         maxval=0.5,
                                         dtype=floatx())

            const = tf.random.normal([len(kern.kernels)], dtype=floatx())
            model = SVGP(kernel=kern,
                         likelihood=None,
                         inducing_variable=Z,
                         mean_function=mean_functions.Constant(c=const),
                         q_sqrt=q_sqrt,
                         whiten=False,
                         num_latent_gps=len(allK))

            mf, Sff = subroutine(config, model, X)
            mg, Sgg = model.predict_f(X, full_cov=True)
            tol = config.error_tol
            assert allclose(mf, mg, tol, tol)
            assert allclose(Sff, Sgg, tol, tol)
示例#7
0
def test_mixed_mok_with_Id_vs_independent_mok(session_tf):
    data = DataMixedKernelWithEye
    # Independent model
    k1 = mk.SharedIndependentMok(RBF(data.D, variance=0.5, lengthscales=1.2),
                                 data.L)
    f1 = InducingPoints(data.X[:data.M, ...].copy())
    m1 = SVGP(data.X,
              data.Y,
              k1,
              Gaussian(),
              f1,
              q_mu=data.mu_data_full,
              q_sqrt=data.sqrt_data_full)
    m1.set_trainable(False)
    m1.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m1, maxiter=data.MAXITER)

    # Mixed Model
    kern_list = [
        RBF(data.D, variance=0.5, lengthscales=1.2) for _ in range(data.L)
    ]
    k2 = mk.SeparateMixedMok(kern_list, data.W)
    f2 = InducingPoints(data.X[:data.M, ...].copy())
    m2 = SVGP(data.X,
              data.Y,
              k2,
              Gaussian(),
              f2,
              q_mu=data.mu_data_full,
              q_sqrt=data.sqrt_data_full)
    m2.set_trainable(False)
    m2.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m2, maxiter=data.MAXITER)

    check_equality_predictions(session_tf, [m1, m2])
示例#8
0
def _test_linear_svgp(config: ConfigDense, model: SVGP,
                      Xnew: tf.Tensor) -> tf.Tensor:
    """
  Sample generation subroutine common to each unit test
  """
    Z = model.inducing_variable
    count = 0
    basis = fourier_basis(model.kernel, num_bases=config.num_bases)
    L_joint = None
    samples = []
    while count < config.num_samples:
        # Sample $u ~ N(q_mu, q_sqrt q_sqrt^{T})$
        size = min(config.shard_size, config.num_samples - count)
        shape = model.num_latent_gps, config.num_cond, size
        rvs = tf.random.normal(shape=shape, dtype=floatx())
        u = tf.transpose(model.q_sqrt @ rvs)

        # Generate draws from the joint distribution $p(f(X), g(Z))$
        (f, fnew), L_joint = common.sample_joint(model.kernel,
                                                 Z,
                                                 Xnew,
                                                 num_samples=size,
                                                 L=L_joint)

        # Solve for update functions
        update_fns = linear_update(Z, u, f, basis=basis)
        samples.append(fnew + update_fns(Xnew))
        count += size

    samples = tf.concat(samples, axis=0)
    if model.mean_function is not None:
        samples += model.mean_function(Xnew)
    return samples
示例#9
0
def test_mixed_mok_with_Id_vs_independent_mok():
    data = DataMixedKernelWithEye
    # Independent model
    k1 = mk.SharedIndependent(
        SquaredExponential(variance=0.5, lengthscale=1.2), data.L)
    f1 = InducingPoints(data.X[:data.M, ...])
    model_1 = SVGP(k1,
                   Gaussian(),
                   f1,
                   q_mu=data.mu_data_full,
                   q_sqrt=data.sqrt_data_full)
    set_trainable(model_1, False)
    model_1.q_sqrt.trainable = True

    @tf.function(autograph=False)
    def closure1():
        return -model_1.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure1,
                                       variables=model_1.trainable_variables,
                                       method='BFGS')

    # Mixed Model
    kern_list = [
        SquaredExponential(variance=0.5, lengthscale=1.2)
        for _ in range(data.L)
    ]
    k2 = mk.LinearCoregionalization(kern_list, data.W)
    f2 = InducingPoints(data.X[:data.M, ...])
    model_2 = SVGP(k2,
                   Gaussian(),
                   f2,
                   q_mu=data.mu_data_full,
                   q_sqrt=data.sqrt_data_full)
    set_trainable(model_2, False)
    model_2.q_sqrt.trainable = True

    @tf.function(autograph=False)
    def closure2():
        return -model_2.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure2,
                                       variables=model_2.trainable_variables,
                                       method='BFGS')

    check_equality_predictions(Data.X, Data.Y, [model_1, model_2])
示例#10
0
        def main(config):
            assert config is not None, ValueError
            tf.random.set_seed(config.seed)
            gpflow_config.set_default_float(config.floatx)
            gpflow_config.set_default_jitter(config.jitter)

            X = tf.random.uniform([config.num_test, config.input_dims],
                                  dtype=floatx())
            Z_shape = config.num_cond, config.input_dims
            for cls in SupportedBaseKernels:
                minval = config.rel_lengthscales_min * (config.input_dims**0.5)
                maxval = config.rel_lengthscales_max * (config.input_dims**0.5)
                lenscales = tf.random.uniform(shape=[config.input_dims],
                                              minval=minval,
                                              maxval=maxval,
                                              dtype=floatx())

                base = cls(lengthscales=lenscales,
                           variance=config.kernel_variance)
                kern = kernels.SharedIndependent(base, output_dim=2)

                Z = SharedIndependentInducingVariables(
                    InducingPoints(tf.random.uniform(Z_shape, dtype=floatx())))
                Kuu = covariances.Kuu(Z,
                                      kern,
                                      jitter=gpflow_config.default_jitter())
                q_sqrt = tf.stack([
                    tf.zeros(2 * [config.num_cond], dtype=floatx()),
                    tf.linalg.cholesky(Kuu)
                ])

                const = tf.random.normal([2], dtype=floatx())
                model = SVGP(kernel=kern,
                             likelihood=None,
                             inducing_variable=Z,
                             mean_function=mean_functions.Constant(c=const),
                             q_sqrt=q_sqrt,
                             whiten=False,
                             num_latent_gps=2)

                mf, Sff = subroutine(config, model, X)
                mg, Sgg = model.predict_f(X, full_cov=True)
                tol = config.error_tol
                assert allclose(mf, mg, tol, tol)
                assert allclose(Sff, Sgg, tol, tol)
示例#11
0
def test_separate_independent_mok(session_tf):
    """
    We use different independent kernels for each of the output dimensions.
    We can achieve this in two ways:
        1) efficient: SeparateIndependentMok with Shared/SeparateIndependentMof
        2) inefficient: SeparateIndependentMok with InducingPoints
    However, both methods should return the same conditional,
    and after optimization return the same log likelihood.
    """
    # Model 1 (INefficient)
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P,
                                       Data.M * Data.P))[None,
                                                         ...]  # 1 x MP x MP
    kern_list_1 = [
        RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P)
    ]
    kernel_1 = mk.SeparateIndependentMok(kern_list_1)
    feature_1 = InducingPoints(Data.X[:Data.M, ...].copy())
    m1 = SVGP(Data.X,
              Data.Y,
              kernel_1,
              Gaussian(),
              feature_1,
              q_mu=q_mu_1,
              q_sqrt=q_sqrt_1)
    m1.set_trainable(False)
    m1.q_sqrt.set_trainable(True)
    m1.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER)

    # Model 2 (efficient)
    q_mu_2 = np.random.randn(Data.M, Data.P)
    q_sqrt_2 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kern_list_2 = [
        RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P)
    ]
    kernel_2 = mk.SeparateIndependentMok(kern_list_2)
    feature_2 = mf.SharedIndependentMof(
        InducingPoints(Data.X[:Data.M, ...].copy()))
    m2 = SVGP(Data.X,
              Data.Y,
              kernel_2,
              Gaussian(),
              feature_2,
              q_mu=q_mu_2,
              q_sqrt=q_sqrt_2)
    m2.set_trainable(False)
    m2.q_sqrt.set_trainable(True)
    m2.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER)

    check_equality_predictions(session_tf, [m1, m2])
示例#12
0
def test_mixed_mok_with_Id_vs_independent_mok():
    data = DataMixedKernelWithEye
    # Independent model
    k1 = mk.SharedIndependent(SquaredExponential(variance=0.5, lengthscales=1.2), data.L)
    f1 = InducingPoints(data.X[: data.M, ...])
    model_1 = SVGP(k1, Gaussian(), f1, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full)
    set_trainable(model_1, False)
    set_trainable(model_1.q_sqrt, True)

    gpflow.optimizers.Scipy().minimize(
        model_1.training_loss_closure(Data.data),
        variables=model_1.trainable_variables,
        method="BFGS",
        compile=True,
    )

    # Mixed Model
    kern_list = [SquaredExponential(variance=0.5, lengthscales=1.2) for _ in range(data.L)]
    k2 = mk.LinearCoregionalization(kern_list, data.W)
    f2 = InducingPoints(data.X[: data.M, ...])
    model_2 = SVGP(k2, Gaussian(), f2, q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full)
    set_trainable(model_2, False)
    set_trainable(model_2.q_sqrt, True)

    gpflow.optimizers.Scipy().minimize(
        model_2.training_loss_closure(Data.data),
        variables=model_2.trainable_variables,
        method="BFGS",
        compile=True,
    )

    check_equality_predictions(Data.data, [model_1, model_2])
示例#13
0
def init_spectral(x, y, M, Q, kern, n_inits=10, minibatch_size=256, noise_var=10.0, ARD=True, likelihood=None):
    print('Initializing a spectral kernel...')
    best_loglik = -np.inf
    best_m = None
    N, input_dim = x.shape
    for k in range(n_inits):
        try:
            #gpflow.reset_default_graph_and_session()
            with gpflow.defer_build():
                Z = random_Z(x, N, M)
                dists = pdist(Z, 'euclidean').ravel()
                max_freq = min(10.0, 1./np.min(dists[dists > 0.0]))
                max_len = min(5.0, np.max(dists) * (2*np.pi))
                k = kern(input_dim=input_dim, max_freq=max_freq, Q=Q, ARD=ARD, max_len=max_len)
                if likelihood is not None:
                    likhood = likelihood
                else:
                    likhood = gpflow.likelihoods.Gaussian(noise_var)
                    likhood.variance.prior = gpflow.priors.LogNormal(mu=0, var=1)
                model = SVGP(X=x, Y=y, Z=Z, kern=k, likelihood=likhood,
                             minibatch_size=minibatch_size)
                model.feature.Z.prior = gpflow.priors.Gaussian(0, 1)
            model.compile()
            loglik = model.compute_log_likelihood()
            if loglik > best_loglik:
                best_loglik = loglik
                best_m = model
                #best_dir = tempfile.TemporaryDirectory()
                #gpflow.saver.Saver().save(best_dir.name + 'model.gpflow', best_m)
            del model
            gc.collect()
        except tf.errors.InvalidArgumentError:  # cholesky fails sometimes (with really bad init?)
            pass
    print('Best init: %f' % best_loglik)
    print(best_m)
    #gpflow.reset_default_graph_and_session()
    #best_m = gpflow.saver.Saver().load(best_dir.name + 'model.gpflow')
    #best_m.compile()
    #print(best_m)
    return best_m
示例#14
0
def test_separate_independent_mok():
    """
    We use different independent kernels for each of the output dimensions.
    We can achieve this in two ways:
        1) efficient: SeparateIndependentMok with Shared/SeparateIndependentMof
        2) inefficient: SeparateIndependentMok with InducingPoints
    However, both methods should return the same conditional,
    and after optimization return the same log likelihood.
    """
    # Model 1 (Inefficient)
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...]  # 1 x MP x MP

    kern_list_1 = [SquaredExponential(variance=0.5, lengthscales=1.2) for _ in range(Data.P)]
    kernel_1 = mk.SeparateIndependent(kern_list_1)
    inducing_variable_1 = InducingPoints(Data.X[: Data.M, ...])
    model_1 = SVGP(
        kernel_1, Gaussian(), inducing_variable_1, num_latent_gps=1, q_mu=q_mu_1, q_sqrt=q_sqrt_1,
    )
    set_trainable(model_1, False)
    set_trainable(model_1.q_sqrt, True)
    set_trainable(model_1.q_mu, True)

    gpflow.optimizers.Scipy().minimize(
        model_1.training_loss_closure(Data.data),
        variables=model_1.trainable_variables,
        method="BFGS",
        compile=True,
    )

    # Model 2 (efficient)
    q_mu_2 = np.random.randn(Data.M, Data.P)
    q_sqrt_2 = np.array(
        [np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)]
    )  # P x M x M
    kern_list_2 = [SquaredExponential(variance=0.5, lengthscales=1.2) for _ in range(Data.P)]
    kernel_2 = mk.SeparateIndependent(kern_list_2)
    inducing_variable_2 = mf.SharedIndependentInducingVariables(
        InducingPoints(Data.X[: Data.M, ...])
    )
    model_2 = SVGP(
        kernel_2,
        Gaussian(),
        inducing_variable_2,
        num_latent_gps=Data.P,
        q_mu=q_mu_2,
        q_sqrt=q_sqrt_2,
    )
    set_trainable(model_2, False)
    set_trainable(model_2.q_sqrt, True)
    set_trainable(model_2.q_mu, True)

    gpflow.optimizers.Scipy().minimize(
        model_2.training_loss_closure(Data.data),
        variables=model_2.trainable_variables,
        method="BFGS",
        compile=True,
    )

    check_equality_predictions(Data.data, [model_1, model_2])
示例#15
0
def test_separate_independent_mof(session_tf):
    """
    Same test as above but we use different (i.e. separate) inducing features
    for each of the output dimensions.
    """
    np.random.seed(0)

    # Model 1 (INefficient)
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...]  # 1 x MP x MP
    kernel_1 = mk.SharedIndependentMok(RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P)
    feature_1 = InducingPoints(Data.X[:Data.M,...].copy())
    m1 = SVGP(Data.X, Data.Y, kernel_1, Gaussian(), feature_1, q_mu=q_mu_1, q_sqrt=q_sqrt_1)
    m1.set_trainable(False)
    m1.q_sqrt.set_trainable(True)
    m1.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER)

    # Model 2 (efficient)
    q_mu_2 = np.random.randn(Data.M, Data.P)
    q_sqrt_2 = np.array([np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)])  # P x M x M
    kernel_2 = mk.SharedIndependentMok(RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P)
    feat_list_2 = [InducingPoints(Data.X[:Data.M, ...].copy()) for _ in range(Data.P)]
    feature_2 = mf.SeparateIndependentMof(feat_list_2)
    m2 = SVGP(Data.X, Data.Y, kernel_2, Gaussian(), feature_2, q_mu=q_mu_2, q_sqrt=q_sqrt_2)
    m2.set_trainable(False)
    m2.q_sqrt.set_trainable(True)
    m2.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER)

    # Model 3 (Inefficient): an idenitical feature is used P times,
    # and treated as a separate feature.
    q_mu_3 = np.random.randn(Data.M, Data.P)
    q_sqrt_3 = np.array([np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)])  # P x M x M
    kern_list = [RBF(Data.D, variance=0.5, lengthscales=1.2)  for _ in range(Data.P)]
    kernel_3 = mk.SeparateIndependentMok(kern_list)
    feat_list_3 = [InducingPoints(Data.X[:Data.M, ...].copy()) for _ in range(Data.P)]
    feature_3 = mf.SeparateIndependentMof(feat_list_3)
    m3 = SVGP(Data.X, Data.Y, kernel_3, Gaussian(), feature_3, q_mu=q_mu_3, q_sqrt=q_sqrt_3)
    m3.set_trainable(False)
    m3.q_sqrt.set_trainable(True)
    m3.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m3, maxiter=Data.MAXITER)

    check_equality_predictions(session_tf, [m1, m2, m3])
示例#16
0
def test_separate_independent_mok(session_tf):
    """
    We use different independent kernels for each of the output dimensions.
    We can achieve this in two ways:
        1) efficient: SeparateIndependentMok with Shared/SeparateIndependentMof
        2) inefficient: SeparateIndependentMok with InducingPoints
    However, both methods should return the same conditional,
    and after optimization return the same log likelihood.
    """
    # Model 1 (INefficient)
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...]  # 1 x MP x MP
    kern_list_1 = [RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P)]
    kernel_1 = mk.SeparateIndependentMok(kern_list_1)
    feature_1 = InducingPoints(Data.X[:Data.M,...].copy())
    m1 = SVGP(Data.X, Data.Y, kernel_1, Gaussian(), feature_1, q_mu=q_mu_1, q_sqrt=q_sqrt_1)
    m1.set_trainable(False)
    m1.q_sqrt.set_trainable(True)
    m1.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER)

    # Model 2 (efficient)
    q_mu_2 = np.random.randn(Data.M, Data.P)
    q_sqrt_2 = np.array([np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)])  # P x M x M
    kern_list_2 = [RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P)]
    kernel_2 = mk.SeparateIndependentMok(kern_list_2)
    feature_2 = mf.SharedIndependentMof(InducingPoints(Data.X[:Data.M, ...].copy()))
    m2 = SVGP(Data.X, Data.Y, kernel_2, Gaussian(), feature_2, q_mu=q_mu_2, q_sqrt=q_sqrt_2)
    m2.set_trainable(False)
    m2.q_sqrt.set_trainable(True)
    m2.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER)

    check_equality_predictions(session_tf, [m1, m2])
示例#17
0
def test_shared_independent_mok(session_tf):
    """
    In this test we use the same kernel and the same inducing features
    for each of the outputs. The outputs are considered to be uncorrelated.
    This is how GPflow handled multiple outputs before the multioutput framework was added.
    We compare three models here:
        1) an ineffient one, where we use a SharedIndepedentMok with InducingPoints.
           This combination will uses a Kff of size N x P x N x P, Kfu if size N x P x M x P
           which is extremely inefficient as most of the elements are zero.
        2) efficient: SharedIndependentMok and SharedIndependentMof
           This combinations uses the most efficient form of matrices
        3) the old way, efficient way: using Kernel and InducingPoints
        Model 2) and 3) follow more or less the same code path.
    """
    # Model 1
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)  # MP x 1
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P, Data.M * Data.P))[None, ...]  # 1 x MP x MP
    kernel_1 = mk.SharedIndependentMok(RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P)
    feature_1 = InducingPoints(Data.X[:Data.M,...].copy())
    m1 = SVGP(Data.X, Data.Y, kernel_1, Gaussian(), feature_1, q_mu=q_mu_1, q_sqrt=q_sqrt_1)
    m1.set_trainable(False)
    m1.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER)

    # Model 2
    q_mu_2 = np.reshape(q_mu_1, [Data.M, Data.P])  # M x P
    q_sqrt_2 = np.array([np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)])  # P x M x M
    kernel_2 = RBF(Data.D, variance=0.5, lengthscales=1.2)
    feature_2 = InducingPoints(Data.X[:Data.M, ...].copy())
    m2 = SVGP(Data.X, Data.Y, kernel_2, Gaussian(), feature_2, q_mu=q_mu_2, q_sqrt=q_sqrt_2)
    m2.set_trainable(False)
    m2.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER)

    # Model 3
    q_mu_3 = np.reshape(q_mu_1, [Data.M, Data.P])  # M x P
    q_sqrt_3 = np.array([np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)])  # P x M x M
    kernel_3 = mk.SharedIndependentMok(RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P)
    feature_3 = mf.SharedIndependentMof(InducingPoints(Data.X[:Data.M, ...].copy()))
    m3 = SVGP(Data.X, Data.Y, kernel_3, Gaussian(), feature_3, q_mu=q_mu_3, q_sqrt=q_sqrt_3)
    m3.set_trainable(False)
    m3.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m3, maxiter=Data.MAXITER)

    check_equality_predictions(session_tf, [m1, m2, m3])
示例#18
0
def init_neural(x, y, M, Q, n_inits=1, minibatch_size=256, noise_var=0.1, likelihood=None, hidden_sizes=None):
    print('Initializing neural spectral kernel...')
    best_loglik = -np.inf
    best_m = None
    N, input_dim = x.shape
    for k in range(n_inits):
        try:
            # gpflow.reset_default_graph_and_session()
            with gpflow.defer_build():
                Z = random_Z(x, N, M)
                k = NeuralSpectralKernel(input_dim=input_dim, Q=Q, hidden_sizes=hidden_sizes)
                if likelihood is not None:
                    likhood = likelihood
                else:
                    likhood = gpflow.likelihoods.Gaussian(noise_var)
                    likhood.variance.prior = gpflow.priors.LogNormal(mu=0, var=1)
                model = SVGP(X=x, Y=y, Z=Z, kern=k, likelihood=likhood,
                             minibatch_size=minibatch_size)
                model.feature.Z.prior = gpflow.priors.Gaussian(0, 1)
            model.compile()
            loglik = model.compute_log_likelihood()
            if loglik > best_loglik:
                best_loglik = loglik
                best_m = model
                # best_dir = tempfile.TemporaryDirectory()
                # gpflow.saver.Saver().save(best_dir.name + 'model.gpflow', best_m)
            del model
            gc.collect()
        except tf.errors.InvalidArgumentError:  # cholesky fails sometimes (with really bad init?)
            pass
    print('Best init: %f' % best_loglik)
    print(best_m)
    # gpflow.reset_default_graph_and_session()
    # best_m = gpflow.saver.Saver().load(best_dir.name + 'model.gpflow')
    # best_m.compile()
    # print(best_m)
    return best_m
示例#19
0
def _test_cg_svgp(config: ConfigDense,
                  model: SVGP,
                  Xnew: tf.Tensor) -> tf.Tensor:
  """
  Sample generation subroutine common to each unit test
  """
  # Prepare preconditioner for CG
  Z = model.inducing_variable
  Kff = covariances.Kuu(Z, model.kernel, jitter=0)
  max_rank = config.num_cond//(2 if config.num_cond > 1 else 1)
  preconditioner = get_default_preconditioner(Kff,
                                              diag=default_jitter(),
                                              max_rank=max_rank)

  count = 0
  samples = []
  L_joint = None
  while count < config.num_samples:
    # Sample $u ~ N(q_mu, q_sqrt q_sqrt^{T})$
    size = min(config.shard_size, config.num_samples - count)
    shape = model.num_latent_gps, config.num_cond, size
    rvs = tf.random.normal(shape=shape, dtype=floatx())
    u = tf.transpose(model.q_sqrt @ rvs)

    # Generate draws from the joint distribution $p(f(X), g(Z))$
    (f, fnew), L_joint = common.sample_joint(model.kernel,
                                             Z,
                                             Xnew,
                                             num_samples=size,
                                             L=L_joint)

    # Solve for update functions
    update_fns = cg_update(model.kernel,
                           Z,
                           u,
                           f,
                           tol=1e-6,
                           max_iter=config.num_cond,
                           preconditioner=preconditioner)

    samples.append(fnew + update_fns(Xnew))
    count += size

  samples = tf.concat(samples, axis=0)
  if model.mean_function is not None:
    samples += model.mean_function(Xnew)
  return samples
示例#20
0
def _test_exact_svgp(config: Union[ConfigDense, ConfigConv2d], model: SVGP,
                     Xnew: tf.Tensor) -> tf.Tensor:
    """
  Sample generation subroutine common to each unit test
  """
    # Precompute Cholesky factor (optional)
    Z = model.inducing_variable
    Kuu = covariances.Kuu(Z, model.kernel, jitter=default_jitter())
    Luu = tf.linalg.cholesky(Kuu)

    count = 0
    L_joint = None
    samples = []
    while count < config.num_samples:
        # Sample $u ~ N(q_mu, q_sqrt q_sqrt^{T})$
        size = min(config.shard_size, config.num_samples - count)
        shape = model.num_latent_gps, config.num_cond, size
        rvs = tf.random.normal(shape=shape, dtype=floatx())
        u = tf.transpose(model.q_sqrt @ rvs)

        # Generate draws from the joint distribution $p(f(X), g(Z))$
        (f, fnew), L_joint = common.sample_joint(model.kernel,
                                                 Z,
                                                 Xnew,
                                                 num_samples=size,
                                                 L=L_joint)

        # Solve for update functions
        update_fns = exact_update(model.kernel, Z, u, f, L=Luu)
        samples.append(fnew + update_fns(Xnew))
        count += size

    samples = tf.concat(samples, axis=0)
    if model.mean_function is not None:
        samples += model.mean_function(Xnew)
    return samples
示例#21
0
def test_mixed_mok_with_Id_vs_independent_mok(session_tf):
    data = DataMixedKernelWithEye
    # Independent model
    k1 = mk.SharedIndependentMok(RBF(data.D, variance=0.5, lengthscales=1.2), data.L)
    f1 = InducingPoints(data.X[:data.M, ...].copy())
    m1 = SVGP(data.X, data.Y, k1, Gaussian(), f1,
              q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full)
    m1.set_trainable(False)
    m1.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m1, maxiter=data.MAXITER)

    # Mixed Model
    kern_list = [RBF(data.D, variance=0.5, lengthscales=1.2) for _ in range(data.L)]
    k2 = mk.SeparateMixedMok(kern_list, data.W)
    f2 = InducingPoints(data.X[:data.M, ...].copy())
    m2 = SVGP(data.X, data.Y, k2, Gaussian(), f2,
              q_mu=data.mu_data_full, q_sqrt=data.sqrt_data_full)
    m2.set_trainable(False)
    m2.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m2, maxiter=data.MAXITER)

    check_equality_predictions(session_tf, [m1, m2])
示例#22
0
def test_separate_independent_mof(session_tf):
    """
    Same test as above but we use different (i.e. separate) inducing features
    for each of the output dimensions.
    """
    np.random.seed(0)

    # Model 1 (INefficient)
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P,
                                       Data.M * Data.P))[None,
                                                         ...]  # 1 x MP x MP
    kernel_1 = mk.SharedIndependentMok(
        RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P)
    feature_1 = InducingPoints(Data.X[:Data.M, ...].copy())
    m1 = SVGP(Data.X,
              Data.Y,
              kernel_1,
              Gaussian(),
              feature_1,
              q_mu=q_mu_1,
              q_sqrt=q_sqrt_1)
    m1.set_trainable(False)
    m1.q_sqrt.set_trainable(True)
    m1.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER)

    # Model 2 (efficient)
    q_mu_2 = np.random.randn(Data.M, Data.P)
    q_sqrt_2 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_2 = mk.SharedIndependentMok(
        RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P)
    feat_list_2 = [
        InducingPoints(Data.X[:Data.M, ...].copy()) for _ in range(Data.P)
    ]
    feature_2 = mf.SeparateIndependentMof(feat_list_2)
    m2 = SVGP(Data.X,
              Data.Y,
              kernel_2,
              Gaussian(),
              feature_2,
              q_mu=q_mu_2,
              q_sqrt=q_sqrt_2)
    m2.set_trainable(False)
    m2.q_sqrt.set_trainable(True)
    m2.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER)

    # Model 3 (Inefficient): an idenitical feature is used P times,
    # and treated as a separate feature.
    q_mu_3 = np.random.randn(Data.M, Data.P)
    q_sqrt_3 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kern_list = [
        RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P)
    ]
    kernel_3 = mk.SeparateIndependentMok(kern_list)
    feat_list_3 = [
        InducingPoints(Data.X[:Data.M, ...].copy()) for _ in range(Data.P)
    ]
    feature_3 = mf.SeparateIndependentMof(feat_list_3)
    m3 = SVGP(Data.X,
              Data.Y,
              kernel_3,
              Gaussian(),
              feature_3,
              q_mu=q_mu_3,
              q_sqrt=q_sqrt_3)
    m3.set_trainable(False)
    m3.q_sqrt.set_trainable(True)
    m3.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m3, maxiter=Data.MAXITER)

    check_equality_predictions(session_tf, [m1, m2, m3])
示例#23
0
        def main(config):
            assert config is not None, ValueError
            tf.random.set_seed(config.seed)
            gpflow_config.set_default_float(config.floatx)
            gpflow_config.set_default_jitter(config.jitter)

            X_shape = [config.num_test
                       ] + config.image_shape + [config.channels_in]
            X = tf.reshape(tf.range(tf.reduce_prod(X_shape), dtype=floatx()),
                           X_shape)
            X /= tf.reduce_max(X)

            patch_len = config.channels_in * int(
                tf.reduce_prod(config.patch_shape))
            for base_cls in SupportedBaseKernels:
                minval = config.rel_lengthscales_min * (patch_len**0.5)
                maxval = config.rel_lengthscales_max * (patch_len**0.5)
                lenscales = tf.random.uniform(shape=[patch_len],
                                              minval=minval,
                                              maxval=maxval,
                                              dtype=floatx())

                base = base_cls(lengthscales=lenscales,
                                variance=config.kernel_variance)
                Z_shape = [config.num_cond
                           ] + config.patch_shape + [config.channels_in]
                for cls in (kernels_ext.Conv2d, kernels_ext.Conv2dTranspose):
                    kern = cls(kernel=base,
                               image_shape=config.image_shape,
                               patch_shape=config.patch_shape,
                               channels_in=config.channels_in,
                               channels_out=config.num_latent_gps,
                               strides=config.strides,
                               padding=config.padding,
                               dilations=config.dilations)

                    Z = InducingImages(
                        tf.random.uniform(Z_shape, dtype=floatx()))
                    q_sqrt = tf.linalg.cholesky(covariances.Kuu(Z, kern))
                    q_sqrt *= tf.random.uniform([config.num_latent_gps, 1, 1],
                                                minval=0.0,
                                                maxval=0.5,
                                                dtype=floatx())

                    # TODO: GPflow's SVGP class is not setup to support outputs defined
                    #       as spatial feature maps. For now, we content ourselves with
                    #       the following hack...
                    const = tf.random.normal([config.num_latent_gps],
                                             dtype=floatx())
                    mean_function = lambda x: const

                    model = SVGP(kernel=kern,
                                 likelihood=None,
                                 mean_function=mean_function,
                                 inducing_variable=Z,
                                 q_sqrt=q_sqrt,
                                 whiten=False,
                                 num_latent_gps=config.num_latent_gps)

                    mf, Sff = subroutine(config, model, X)
                    mg, Sgg = model.predict_f(X, full_cov=True)

                    tol = config.error_tol
                    assert allclose(mf, mg, tol, tol)
                    assert allclose(Sff, Sgg, tol, tol)
示例#24
0
def test_separate_independent_mof():
    """
    Same test as above but we use different (i.e. separate) inducing inducing
    for each of the output dimensions.
    """
    np.random.seed(0)

    # Model 1 (INefficient)
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P,
                                       Data.M * Data.P))[None,
                                                         ...]  # 1 x MP x MP

    kernel_1 = mk.SharedIndependent(
        SquaredExponential(variance=0.5, lengthscale=1.2), Data.P)
    inducing_variable_1 = InducingPoints(Data.X[:Data.M, ...])
    model_1 = SVGP(kernel_1,
                   Gaussian(),
                   inducing_variable_1,
                   q_mu=q_mu_1,
                   q_sqrt=q_sqrt_1)
    set_trainable(model_1, False)
    model_1.q_sqrt.trainable = True
    model_1.q_mu.trainable = True

    @tf.function(autograph=False)
    def closure1():
        return -model_1.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure1,
                                       variables=model_1.trainable_variables,
                                       method='BFGS')

    # Model 2 (efficient)
    q_mu_2 = np.random.randn(Data.M, Data.P)
    q_sqrt_2 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_2 = mk.SharedIndependent(
        SquaredExponential(variance=0.5, lengthscale=1.2), Data.P)
    inducing_variable_list_2 = [
        InducingPoints(Data.X[:Data.M, ...]) for _ in range(Data.P)
    ]
    inducing_variable_2 = mf.SeparateIndependentInducingVariables(
        inducing_variable_list_2)
    model_2 = SVGP(kernel_2,
                   Gaussian(),
                   inducing_variable_2,
                   q_mu=q_mu_2,
                   q_sqrt=q_sqrt_2)
    set_trainable(model_2, False)
    model_2.q_sqrt.trainable = True
    model_2.q_mu.trainable = True

    @tf.function(autograph=False)
    def closure2():
        return -model_2.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure2,
                                       variables=model_2.trainable_variables,
                                       method='BFGS')

    # Model 3 (Inefficient): an idenitical inducing variable is used P times,
    # and treated as a separate one.
    q_mu_3 = np.random.randn(Data.M, Data.P)
    q_sqrt_3 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kern_list = [
        SquaredExponential(variance=0.5, lengthscale=1.2)
        for _ in range(Data.P)
    ]
    kernel_3 = mk.SeparateIndependent(kern_list)
    inducing_variable_list_3 = [
        InducingPoints(Data.X[:Data.M, ...]) for _ in range(Data.P)
    ]
    inducing_variable_3 = mf.SeparateIndependentInducingVariables(
        inducing_variable_list_3)
    model_3 = SVGP(kernel_3,
                   Gaussian(),
                   inducing_variable_3,
                   q_mu=q_mu_3,
                   q_sqrt=q_sqrt_3)
    set_trainable(model_3, False)
    model_3.q_sqrt.trainable = True
    model_3.q_mu.trainable = True

    @tf.function(autograph=False)
    def closure3():
        return -model_3.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure3,
                                       variables=model_3.trainable_variables,
                                       method='BFGS')

    check_equality_predictions(Data.X, Data.Y, [model_1, model_2, model_3])
示例#25
0
def test_shared_independent_mok():
    """
    In this test we use the same kernel and the same inducing inducing
    for each of the outputs. The outputs are considered to be uncorrelated.
    This is how GPflow handled multiple outputs before the multioutput framework was added.
    We compare three models here:
        1) an ineffient one, where we use a SharedIndepedentMok with InducingPoints.
           This combination will uses a Kff of size N x P x N x P, Kfu if size N x P x M x P
           which is extremely inefficient as most of the elements are zero.
        2) efficient: SharedIndependentMok and SharedIndependentMof
           This combinations uses the most efficient form of matrices
        3) the old way, efficient way: using Kernel and InducingPoints
        Model 2) and 3) follow more or less the same code path.
    """
    np.random.seed(0)
    # Model 1
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)  # MP x 1
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P,
                                       Data.M * Data.P))[None,
                                                         ...]  # 1 x MP x MP
    kernel_1 = mk.SharedIndependent(
        SquaredExponential(variance=0.5, lengthscale=1.2), Data.P)
    inducing_variable = InducingPoints(Data.X[:Data.M, ...])
    model_1 = SVGP(kernel_1,
                   Gaussian(),
                   inducing_variable,
                   q_mu=q_mu_1,
                   q_sqrt=q_sqrt_1,
                   num_latent=Data.Y.shape[-1])
    set_trainable(model_1, False)
    model_1.q_sqrt.trainable = True

    @tf.function(autograph=False)
    def closure1():
        return -model_1.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure1,
                                       variables=model_1.trainable_variables,
                                       options=dict(maxiter=500),
                                       method='BFGS')

    # Model 2
    q_mu_2 = np.reshape(q_mu_1, [Data.M, Data.P])  # M x P
    q_sqrt_2 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_2 = SquaredExponential(variance=0.5, lengthscale=1.2)
    inducing_variable_2 = InducingPoints(Data.X[:Data.M, ...])
    model_2 = SVGP(kernel_2,
                   Gaussian(),
                   inducing_variable_2,
                   num_latent=Data.P,
                   q_mu=q_mu_2,
                   q_sqrt=q_sqrt_2)
    set_trainable(model_2, False)
    model_2.q_sqrt.trainable = True

    @tf.function(autograph=False)
    def closure2():
        return -model_2.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure2,
                                       variables=model_2.trainable_variables,
                                       options=dict(maxiter=500),
                                       method='BFGS')

    # Model 3
    q_mu_3 = np.reshape(q_mu_1, [Data.M, Data.P])  # M x P
    q_sqrt_3 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_3 = mk.SharedIndependent(
        SquaredExponential(variance=0.5, lengthscale=1.2), Data.P)
    inducing_variable_3 = mf.SharedIndependentInducingVariables(
        InducingPoints(Data.X[:Data.M, ...]))
    model_3 = SVGP(kernel_3,
                   Gaussian(),
                   inducing_variable_3,
                   num_latent=Data.P,
                   q_mu=q_mu_3,
                   q_sqrt=q_sqrt_3)
    set_trainable(model_3, False)
    model_3.q_sqrt.trainable = True

    @tf.function(autograph=False)
    def closure3():
        return -model_3.log_marginal_likelihood(Data.X, Data.Y)

    gpflow.optimizers.Scipy().minimize(closure3,
                                       variables=model_3.trainable_variables,
                                       options=dict(maxiter=500),
                                       method='BFGS')

    check_equality_predictions(Data.X, Data.Y, [model_1, model_2, model_3])
示例#26
0
def _svgp(inducing_variable: tf.Tensor) -> SVGP:
    return SVGP(gpflow.kernels.Linear(), gpflow.likelihoods.Gaussian(), inducing_variable)
 def __init__(self, *args, paths: AbstractSampler = None, **kwargs):
   SVGP.__init__(self, *args, **kwargs)
   self._paths = paths
示例#28
0
Z = np.linspace(-2, 2, 100)[:, None]

with tf.Session(graph=tf.Graph()) as sess:
    with gp.defer_build():

        # Define the likelihood
        likelihood = gp.likelihoods.Gaussian()
        # Define the underlying GP mean and kernel
        mean = gp.mean_functions.Zero()
        kernel = gp.kernels.RBF(1)
        # Create the HGP (note the slightly different order from SVGP)
        model = SVGP(X,
                     Y,
                     kernel,
                     likelihood,
                     mean_function=mean,
                     minibatch_size=100,
                     num_latent=1,
                     num_data=None,
                     whiten=False,
                     Z=Z)
        model.compile()

    run_with_adam(model, 1e-3, iterations, PrintAction(model, "Adam"))
    # Predictions uses stochastic sampling and produces
    # [num_samples,N,D] shape output
    ystar, varstar = model.predict_y(X)

# In[5]:

plt.figure(figsize=(4, 4))
plt.plot(X[:, 0], ystar, alpha=1, c='r', label='vanilla-inferred')
示例#29
0
def test_shared_independent_mok(session_tf):
    """
    In this test we use the same kernel and the same inducing features
    for each of the outputs. The outputs are considered to be uncorrelated.
    This is how GPflow handled multiple outputs before the multioutput framework was added.
    We compare three models here:
        1) an ineffient one, where we use a SharedIndepedentMok with InducingPoints.
           This combination will uses a Kff of size N x P x N x P, Kfu if size N x P x M x P
           which is extremely inefficient as most of the elements are zero.
        2) efficient: SharedIndependentMok and SharedIndependentMof
           This combinations uses the most efficient form of matrices
        3) the old way, efficient way: using Kernel and InducingPoints
        Model 2) and 3) follow more or less the same code path.
    """
    # Model 1
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)  # MP x 1
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P,
                                       Data.M * Data.P))[None,
                                                         ...]  # 1 x MP x MP
    kernel_1 = mk.SharedIndependentMok(
        RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P)
    feature_1 = InducingPoints(Data.X[:Data.M, ...].copy())
    m1 = SVGP(Data.X,
              Data.Y,
              kernel_1,
              Gaussian(),
              feature_1,
              q_mu=q_mu_1,
              q_sqrt=q_sqrt_1)
    m1.set_trainable(False)
    m1.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER)

    # Model 2
    q_mu_2 = np.reshape(q_mu_1, [Data.M, Data.P])  # M x P
    q_sqrt_2 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_2 = RBF(Data.D, variance=0.5, lengthscales=1.2)
    feature_2 = InducingPoints(Data.X[:Data.M, ...].copy())
    m2 = SVGP(Data.X,
              Data.Y,
              kernel_2,
              Gaussian(),
              feature_2,
              q_mu=q_mu_2,
              q_sqrt=q_sqrt_2)
    m2.set_trainable(False)
    m2.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER)

    # Model 3
    q_mu_3 = np.reshape(q_mu_1, [Data.M, Data.P])  # M x P
    q_sqrt_3 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_3 = mk.SharedIndependentMok(
        RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P)
    feature_3 = mf.SharedIndependentMof(
        InducingPoints(Data.X[:Data.M, ...].copy()))
    m3 = SVGP(Data.X,
              Data.Y,
              kernel_3,
              Gaussian(),
              feature_3,
              q_mu=q_mu_3,
              q_sqrt=q_sqrt_3)
    m3.set_trainable(False)
    m3.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m3, maxiter=Data.MAXITER)

    check_equality_predictions(session_tf, [m1, m2, m3])
示例#30
0
    def fit(self, X, Y, Xval, Yval):
        N = X.shape[0]

        if self.var_dist == "diag":
            q_diag = True
        elif self.var_dist == "full":
            q_diag = False
        else:
            raise NotImplementedError(
                "GPFlow cannot implement %s variational distribution" %
                (self.var_dist))

        if self.do_classif:
            if self.num_classes == 2:
                likelihood = gpflow.likelihoods.Bernoulli()
                num_latent = 1
            else:
                # Softmax better than Robustmax (apparently per the gpflow slack)
                #likelihood = gpflow.likelihoods.MultiClass(self.num_classes, invlink=invlink)  # Multiclass likelihood
                likelihood = gpflow.likelihoods.Softmax(self.num_classes)
                num_latent = self.num_classes
                # Y must be 1D for the multiclass model to actually work.
                Y = np.argmax(Y, 1).reshape((-1, 1)).astype(int)
        else:
            num_latent = 1
            likelihood = gpflow.likelihoods.Gaussian()

        self.model = SVGP(kernel=self.kernel,
                          likelihood=likelihood,
                          inducing_variable=self.Z,
                          num_data=N,
                          num_latent_gps=num_latent,
                          whiten=False,
                          q_diag=q_diag)
        # Setup training
        if not self.train_hyperparams:
            set_trainable(self.model.inducing_variable.Z, False)
            set_trainable(self.kernel.lengthscales, False)
            set_trainable(self.kernel.variance, False)
        if self.natgrad_lr > 0:
            set_trainable(self.model.q_mu, False)
            set_trainable(self.model.q_sqrt, False)
            variational_params = [(self.model.q_mu, self.model.q_sqrt)]
        # Create the optimizers
        adam_opt = tf.optimizers.Adam(self.lr)
        if self.natgrad_lr > 0:
            natgrad_opt = NaturalGradient(gamma=self.natgrad_lr)

        # Print
        gpflow.utilities.print_summary(self.model)
        print("", flush=True)

        # Giacomo: If shuffle buffer is too large it will run OOM
        if self.num_classes == 2:
            Y = (Y + 1) / 2
            Yval = (Yval + 1) / 2
        generator = partial(data_generator, X, Y)
        #train_dataset = tf.data.Dataset.from_tensor_slices((X, Y)) \
        train_dataset = tf.data.Dataset.from_generator(generator, args=(self.batch_size, ), output_types=(tf.float32, tf.float32)) \
            .prefetch(self.batch_size * 10) \
            .repeat() \
            .shuffle(min(N // self.batch_size, 1_000_000 // self.batch_size)) \
            .batch(1)
        train_iter = iter(train_dataset)

        loss = self.model.training_loss_closure(train_iter)
        t_elapsed = 0
        for step in range(self.num_iter):
            t_s = time.time()
            if self.natgrad_lr > 0:
                natgrad_opt.minimize(loss, var_list=variational_params)
            adam_opt.minimize(loss, var_list=self.model.trainable_variables)
            t_elapsed += time.time() - t_s
            if step % 700 == 0:
                print("Step %d -- Elapsed %.2fs" % (step, t_elapsed),
                      flush=True)
            if (step + 1) % self.error_every == 0:
                preds = self.predict(Xval)
                val_err, err_name = self.err_fn(Yval, preds)
                print(
                    f"Step {step + 1} - {t_elapsed:7.2f}s Elapsed - "
                    f"Validation {err_name} {val_err:7.5f}",
                    flush=True)

        preds = self.predict(Xval)
        val_err, err_name = self.err_fn(Yval, preds)
        print(
            f"Finished optimization - {t_elapsed:7.2f}s Elapsed - "
            f"Validation {err_name} {val_err:7.5f}",
            flush=True)
        print("Final model is ")
        gpflow.utilities.print_summary(self.model)
        print("", flush=True)
        return self
示例#31
0
# Compare GPR and VGP lengthscales after optimization:

# %%
print(f"GPR lengthscales = {gpr.kernel.lengthscales.numpy():.04f}")
print(f"VGP lengthscales = {vgp.kernel.lengthscales.numpy():.04f}")

# %% [markdown]
# ### Natural gradients also work for the sparse model
# Similarly, natural gradients turn SVGP into SGPR in the Gaussian likelihood case. <br>
# We can again combine natural gradients with Adam to update both variational parameters and hyperparameters too.<br>
# Here we'll just do a single natural step demonstration.

# %%
svgp = SVGP(
    kernel=gpflow.kernels.Matern52(),
    likelihood=gpflow.likelihoods.Gaussian(),
    inducing_variable=inducing_variable,
)
sgpr = SGPR(data,
            kernel=gpflow.kernels.Matern52(),
            inducing_variable=inducing_variable)

for model in svgp, sgpr:
    model.likelihood.variance.assign(0.1)

# %% [markdown]
# Analytically optimal sparse model ELBO:

# %%
sgpr.elbo().numpy()
示例#32
0
class TrainableSVGP():
    def __init__(self,
                 kernel,
                 inducing_points,
                 batch_size,
                 num_iter,
                 err_fn,
                 var_dist,
                 classif=None,
                 error_every=100,
                 train_hyperparams: bool = True,
                 lr: float = 0.001,
                 natgrad_lr: float = 0.01):
        self.train_hyperparams = train_hyperparams
        self.lr = lr
        self.natgrad_lr = natgrad_lr
        self.kernel = kernel
        self.Z = inducing_points.copy()
        self.batch_size = batch_size
        self.num_iter = num_iter
        self.err_fn = err_fn
        self.error_every = error_every
        self.do_classif = classif is not None and classif > 0
        self.num_classes = 1
        if self.do_classif:
            self.num_classes = int(classif)
        self.model = None
        self.var_dist = var_dist

    def fit(self, X, Y, Xval, Yval):
        N = X.shape[0]

        if self.var_dist == "diag":
            q_diag = True
        elif self.var_dist == "full":
            q_diag = False
        else:
            raise NotImplementedError(
                "GPFlow cannot implement %s variational distribution" %
                (self.var_dist))

        if self.do_classif:
            if self.num_classes == 2:
                likelihood = gpflow.likelihoods.Bernoulli()
                num_latent = 1
            else:
                # Softmax better than Robustmax (apparently per the gpflow slack)
                #likelihood = gpflow.likelihoods.MultiClass(self.num_classes, invlink=invlink)  # Multiclass likelihood
                likelihood = gpflow.likelihoods.Softmax(self.num_classes)
                num_latent = self.num_classes
                # Y must be 1D for the multiclass model to actually work.
                Y = np.argmax(Y, 1).reshape((-1, 1)).astype(int)
        else:
            num_latent = 1
            likelihood = gpflow.likelihoods.Gaussian()

        self.model = SVGP(kernel=self.kernel,
                          likelihood=likelihood,
                          inducing_variable=self.Z,
                          num_data=N,
                          num_latent_gps=num_latent,
                          whiten=False,
                          q_diag=q_diag)
        # Setup training
        if not self.train_hyperparams:
            set_trainable(self.model.inducing_variable.Z, False)
            set_trainable(self.kernel.lengthscales, False)
            set_trainable(self.kernel.variance, False)
        if self.natgrad_lr > 0:
            set_trainable(self.model.q_mu, False)
            set_trainable(self.model.q_sqrt, False)
            variational_params = [(self.model.q_mu, self.model.q_sqrt)]
        # Create the optimizers
        adam_opt = tf.optimizers.Adam(self.lr)
        if self.natgrad_lr > 0:
            natgrad_opt = NaturalGradient(gamma=self.natgrad_lr)

        # Print
        gpflow.utilities.print_summary(self.model)
        print("", flush=True)

        # Giacomo: If shuffle buffer is too large it will run OOM
        if self.num_classes == 2:
            Y = (Y + 1) / 2
            Yval = (Yval + 1) / 2
        generator = partial(data_generator, X, Y)
        #train_dataset = tf.data.Dataset.from_tensor_slices((X, Y)) \
        train_dataset = tf.data.Dataset.from_generator(generator, args=(self.batch_size, ), output_types=(tf.float32, tf.float32)) \
            .prefetch(self.batch_size * 10) \
            .repeat() \
            .shuffle(min(N // self.batch_size, 1_000_000 // self.batch_size)) \
            .batch(1)
        train_iter = iter(train_dataset)

        loss = self.model.training_loss_closure(train_iter)
        t_elapsed = 0
        for step in range(self.num_iter):
            t_s = time.time()
            if self.natgrad_lr > 0:
                natgrad_opt.minimize(loss, var_list=variational_params)
            adam_opt.minimize(loss, var_list=self.model.trainable_variables)
            t_elapsed += time.time() - t_s
            if step % 700 == 0:
                print("Step %d -- Elapsed %.2fs" % (step, t_elapsed),
                      flush=True)
            if (step + 1) % self.error_every == 0:
                preds = self.predict(Xval)
                val_err, err_name = self.err_fn(Yval, preds)
                print(
                    f"Step {step + 1} - {t_elapsed:7.2f}s Elapsed - "
                    f"Validation {err_name} {val_err:7.5f}",
                    flush=True)

        preds = self.predict(Xval)
        val_err, err_name = self.err_fn(Yval, preds)
        print(
            f"Finished optimization - {t_elapsed:7.2f}s Elapsed - "
            f"Validation {err_name} {val_err:7.5f}",
            flush=True)
        print("Final model is ")
        gpflow.utilities.print_summary(self.model)
        print("", flush=True)
        return self

    def predict(self, X):
        preds = []
        dset = tf.data.Dataset.from_tensor_slices((X, )).batch(self.batch_size)
        for X_batch in iter(dset):
            batch_preds = self.model.predict_y(X_batch[0])[0].numpy()
            if self.do_classif:
                batch_preds = batch_preds.reshape((X_batch[0].shape[0], -1))
            preds.append(batch_preds)
        preds = np.concatenate(preds, axis=0)
        return preds

    @property
    def inducing_points(self):
        return self.model.inducing_variable.Z.numpy()

    def __str__(self):
        return ((
            "TrainableSVGP<kernel=%s, num_inducing_points=%d, batch_size=%d, "
            "num_iter=%d, lr=%f, natgrad_lr=%f, error_every=%d, train_hyperparams=%s, "
            "var_dist=%s, do_classif=%s, model=%s") %
                (self.kernel, self.Z.shape[0], self.batch_size, self.num_iter,
                 self.lr, self.natgrad_lr, self.error_every,
                 self.train_hyperparams, self.var_dist, self.do_classif,
                 self.model))