def test_2layer_vs_nat_grad(self):
        Ns, N, M = 5, 1, 50
        D_X, D_Y = 1, 1

        lik_var = 0.1

        X = np.random.uniform(size=(N, D_X))
        Y = np.random.uniform(size=(N, D_Y))
        Z = np.random.uniform(size=(M, D_Y))
        Xs = np.random.uniform(size=(Ns, D_X))

        Z[:N, :] = X[:M, :]

        def kerns():
            return [RBF(D_X, lengthscales=0.1), RBF(D_X, lengthscales=0.5)]

        layers_col = init_layers_linear(X, Y, Z, kerns())
        layers_ng = init_layers_linear(X, Y, Z, kerns())

        def lik():
            l = Gaussian()
            l.variance = lik_var
            return l

        last_layer = SGPR_Layer(layers_col[-1].kern,
                                layers_col[-1].feature.Z.read_value(), D_Y,
                                layers_col[-1].mean_function)

        layers_col = layers_col[:-1] + [last_layer]
        m_col = DGP_Collapsed(X, Y, lik(), layers_col)
        m_ng = DGP_Quad(X, Y, lik(), layers_ng, H=200)

        q_mu1 = np.random.randn(M, D_X)
        q_sqrt1 = np.random.randn(M, M)
        q_sqrt1 = np.tril(q_sqrt1)[None, :, :]

        for m in m_col, m_ng:
            m.layers[0].q_mu = q_mu1
            m.layers[0].q_sqrt = q_sqrt1

        p = [[m_ng.layers[-1].q_mu, m_ng.layers[-1].q_sqrt]]
        NatGradOptimizer(gamma=1.).minimize(m_ng, var_list=p, maxiter=1)

        assert_allclose(m_col.compute_log_likelihood(),
                        m_ng.compute_log_likelihood())
Пример #2
0
 def __init__(
         self,
         X,
         Y,
         Z,
         kernels,
         likelihood,
         num_outputs=None,
         mean_function=Zero(),  # the final layer mean function,
         white=False,
         **kwargs):
     layers = init_layers_linear(X,
                                 Y,
                                 Z,
                                 kernels,
                                 num_outputs=num_outputs,
                                 mean_function=mean_function,
                                 white=white)
     DGP_Base.__init__(self, X, Y, likelihood, layers, **kwargs)
    def test_single_layer(self):
        kern = RBF(1, lengthscales=0.1)
        layers = init_layers_linear(self.X, self.Y, self.X, [kern])

        lik = Gaussian()
        lik.variance = self.lik_var

        last_layer = SGPR_Layer(layers[-1].kern,
                                layers[-1].feature.Z.read_value(), self.D_Y,
                                layers[-1].mean_function)
        layers = layers[:-1] + [last_layer]

        m_dgp = DGP_Collapsed(self.X, self.Y, lik, layers)
        L_dgp = m_dgp.compute_log_likelihood()
        mean_dgp, var_dgp = m_dgp.predict_f_full_cov(self.Xs, 1)

        m_exact = GPR(self.X, self.Y, kern)
        m_exact.likelihood.variance = self.lik_var
        L_exact = m_exact.compute_log_likelihood()
        mean_exact, var_exact = m_exact.predict_f_full_cov(self.Xs)

        assert_allclose(L_dgp, L_exact, atol=1e-5, rtol=1e-5)
        assert_allclose(mean_dgp[0], mean_exact, atol=1e-5, rtol=1e-5)
        assert_allclose(var_dgp[0], var_exact, atol=1e-5, rtol=1e-5)
Пример #4
0
        def test_quadrature(self):
            N = 2
            np.random.seed(0)
            X = np.random.uniform(size=(N, 1))
            Y = np.sin(20 * X) + np.random.randn(*X.shape) * 0.001

            kernels = lambda: [
                RBF(1, lengthscales=0.1),
                RBF(1, lengthscales=0.1)
            ]
            layers = lambda: init_layers_linear(X, Y, X, kernels())

            def lik():
                l = Gaussian()
                l.variance = 0.01
                return l

            m_stochastic = DGP_Base(X, Y, lik(), layers(), num_samples=100)
            # it seems 300 is necessary, which suggests that quadrature isn't very easy
            m_quad = DGP_Quad(X, Y, lik(), layers(), H=300)

            # q_mu_0 = np.random.randn(N, 1)
            # q_sqrt_0 = np.random.randn(1, N, N)**2
            #
            # q_mu_1 = np.random.randn(N, 1)
            # q_sqrt_1 = np.random.randn(1, N, N)**2

            for model in m_quad, m_stochastic:
                model.set_trainable(False)
                for layer in model.layers:
                    layer.q_mu.set_trainable(True)
                    layer.q_sqrt.set_trainable(True)

            ScipyOptimizer().minimize(m_quad, maxiter=500)

            q_mu_0 = m_quad.layers[0].q_mu.read_value()
            q_sqrt_0 = m_quad.layers[0].q_sqrt.read_value()

            q_mu_1 = m_quad.layers[1].q_mu.read_value()
            q_sqrt_1 = m_quad.layers[1].q_sqrt.read_value()

            for model in m_stochastic, m_quad:
                model.layers[0].q_mu = q_mu_0
                model.layers[0].q_sqrt = q_sqrt_0

                model.layers[1].q_mu = q_mu_1
                model.layers[1].q_sqrt = q_sqrt_1

            Ls_quad = [m_quad.compute_log_likelihood() for _ in range(2)]
            Ls_stochastic = [
                m_stochastic.compute_log_likelihood() for _ in range(1000)
            ]

            assert_allclose(Ls_quad[0],
                            Ls_quad[1])  # quadrature should be determinsitic
            m = np.average(Ls_stochastic)
            std_err = np.std(Ls_stochastic) / (float(len(Ls_stochastic))**0.5)
            print('sampling average {}'.format(m))
            print('sampling std eff {}'.format(std_err))
            print('quad val {}'.format(Ls_quad[0]))
            assert np.abs(Ls_quad[0] - m) < std_err * 3  # 99.73% CI