def test_optimize(self): with defer_build(): input_layer = InputLayer(input_dim=1, output_dim=1, num_inducing=self.M, kernel=RBF(1) + White(1), multitask=True) output_layer = OutputLayer(input_dim=1, output_dim=1, num_inducing=self.M, kernel=RBF(1) + White(1), multitask=True) seq = MultitaskSequential([input_layer, output_layer]) model = MultitaskDSDGP(X=self.X, Y=self.Y, Z=self.Z, layers=seq, likelihood=SwitchedLikelihood( [Gaussian(), Gaussian()]), num_latent=1) model.compile() before = model.compute_log_likelihood() opt = gpflow.train.AdamOptimizer(0.01) opt.minimize(model, maxiter=100) after = model.compute_log_likelihood() self.assertGreaterEqual(after, before)
def setUp(self): self.test_graph = tf.Graph() self.rng = np.random.RandomState(42) self.X1_ind = np.array([0,1,2,2,1,0,1,0,2,1])[:,None] self.X1 = np.hstack([np.random.randn(10, 3), self.X1_ind]).astype(gpflow.settings.float_type) self.X2_ind = np.array([0,1,2,2,1])[:,None] self.X2 = np.hstack([np.random.randn(5, 3), self.X2_ind]).astype(gpflow.settings.float_type) with defer_build(): K1 = DummyKernel(3, 1.0) K2 = DummyKernel(3, 2.0) K3 = DummyKernel(3, 3.0) kern_list = [K1, K2, K3] self.kernel = SwitchedKernel(kern_list, 3)
def prepare(self): N = 100 M = 10 rng = np.random.RandomState(42) X = rng.randn(N, 2) Y = rng.randn(N, 1) Z = rng.randn(M, 2) X_ind = rng.randint(0, 2, (N, 1)) Z_ind = rng.randint(0, 2, (M, 1)) X = np.hstack([X, X_ind]) Y = np.hstack([Y, X_ind]) Z = np.hstack([Z, Z_ind]) Xs = rng.randn(M, 2) Xs_ind = rng.randint(0, 2, (M, 1)) Xs = np.hstack([Xs, Xs_ind]) with defer_build(): lik = SwitchedLikelihood([Gaussian(), Gaussian()]) input_layer = InputLayer(input_dim=2, output_dim=1, num_inducing=M, kernel=RBF(2) + White(2), mean_function=Linear(A=np.ones((3, 1))), multitask=True) output_layer = OutputLayer(input_dim=1, output_dim=1, num_inducing=M, kernel=RBF(1) + White(1), multitask=True) seq = MultitaskSequential([input_layer, output_layer]) model = MultitaskDSDGP(X=X, Y=Y, Z=Z, layers=seq, likelihood=lik, num_latent=1) model.compile() return model, Xs
def test_pos_def(): # N = 10 # Dx = 3 # Dy = 1 # K = 5 from bayesian_benchmarks.data import get_regression_data data = get_regression_data("wilson_3droad") X = data.X_train Y = data.Y_train M = 128 from scipy.cluster.vq import kmeans2 Z = kmeans2(X, M, minit="points")[0] N, Dx = X.shape Dy = Y.shape[1] K = 1 lik = gpflow.likelihoods.Gaussian(variance=0.1) kern = gpflow.kernels.RBF(Dx, lengthscales=0.1) X = np.random.randn(N, Dx) Y = np.random.randn(N, Dy) layers_vi = [LatentVariableLayer(Dx, XY_dim=Dx + Dy), GPLayer(kern, Z, Dy)] layers_iw = [LatentVariableLayer(Dx, XY_dim=Dx + Dy), GPLayer(kern, Z, Dy)] m_dgp_vi = DGP_VI(X, Y, layers_vi, lik, num_samples=K, minibatch_size=512) with defer_build(): m_dgp_iw = DGP_IWVI( X, Y, encoder_minibatch_size=None, layers=layers_iw, likelihood=lik, num_samples=K, minibatch_size=512, ) m_dgp_iw.compile() for model in [m_dgp_vi, m_dgp_iw]: model.layers[-1].q_mu.set_trainable(False) model.layers[-1].q_sqrt.set_trainable(False) optimizer_adam = gpflow.train.AdamOptimizer(0.005) adam_op = optimizer_adam.make_optimize_tensor(model) optimizer_ng = gpflow.train.NatGradOptimizer(gamma=0.01) ng_op = optimizer_ng.make_optimize_tensor( model, var_list=[[model.layers[-1].q_mu, model.layers[-1].q_sqrt]]) sess = model.enquire_session() for _ in range(10): print("{} {:.2f}".format(_, sess.run(model.likelihood_tensor))) sess.run(ng_op) sess.run(adam_op) L_vi = [m_dgp_vi.compute_log_likelihood() for _ in range(100)] L_iw = [m_dgp_iw.compute_log_likelihood() for _ in range(100)] L_vi = np.average(L_vi) L_iw = np.average(L_iw) print(L_vi, L_iw)