def test_message_to_parents(self): np.random.seed(42) N = 5 D1 = 3 D2 = 4 D3 = 2 X1 = Gaussian(np.random.randn(N, D1), random.covariance(D1)) X2 = Gaussian(np.random.randn(N, D2), random.covariance(D2)) X3 = np.random.randn(N, D3) Z = ConcatGaussian(X1, X2, X3) Y = Gaussian(Z, random.covariance(D1 + D2 + D3)) Y.observe(np.random.randn(*(Y.plates + Y.dims[0]))) self.assert_message_to_parent( Y, X1, eps=1e-7, rtol=1e-5, atol=1e-5 ) self.assert_message_to_parent( Y, X2, eps=1e-7, rtol=1e-5, atol=1e-5 ) pass
def test_lower_bound(self): """ Test the Wishart VB lower bound """ # # By having the Wishart node as the only latent node, VB will give exact # results, thus the VB lower bound is the true marginal log likelihood. # Thus, check that they are equal. The true marginal likelihood is the # multivariate Student-t distribution. # np.random.seed(42) D = 3 n = (D-1) + np.random.uniform(0.1, 0.5) V = random.covariance(D) Lambda = Wishart(n, V) mu = np.random.randn(D) Y = Gaussian(mu, Lambda) y = np.random.randn(D) Y.observe(y) Lambda.update() L = Y.lower_bound_contribution() + Lambda.lower_bound_contribution() mu = mu nu = n + 1 - D Cov = V / nu self.assertAllClose(L, _student_logpdf(y, mu, Cov, nu)) pass
def test_lower_bound(self): """ Test the Wishart VB lower bound """ # # By having the Wishart node as the only latent node, VB will give exact # results, thus the VB lower bound is the true marginal log likelihood. # Thus, check that they are equal. The true marginal likelihood is the # multivariate Student-t distribution. # np.random.seed(42) D = 3 n = (D-1) + np.random.uniform(0.1, 0.5) V = random.covariance(D) Lambda = Wishart(n, V) mu = np.random.randn(D) Y = Gaussian(mu, Lambda) y = np.random.randn(D) Y.observe(y) Lambda.update() L = Y.lower_bound_contribution() + Lambda.lower_bound_contribution() mu = mu nu = n + 1 - D Cov = V / nu self.assertAllClose(L, _student_logpdf(y, mu, Cov, nu)) pass
def test_riemannian_gradient(self): """Test Riemannian gradient of a Gaussian node.""" D = 3 # # Without observations # # Construct model mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda) # Random initialization mu0 = np.random.randn(D) Lambda0 = random.covariance(D) X.initialize_from_parameters(mu0, Lambda0) # Initial parameters phi0 = X.phi # Gradient g = X.get_riemannian_gradient() # Parameters after VB-EM update X.update() phi1 = X.phi # Check self.assertAllClose(g[0], phi1[0] - phi0[0]) self.assertAllClose(g[1], phi1[1] - phi0[1]) # TODO/FIXME: Actually, gradient should be zero because cost function # is zero without observations! Use the mask! # # With observations # # Construct model mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda) V = random.covariance(D) Y = Gaussian(X, V) Y.observe(np.random.randn(D)) # Random initialization mu0 = np.random.randn(D) Lambda0 = random.covariance(D) X.initialize_from_parameters(mu0, Lambda0) # Initial parameters phi0 = X.phi # Gradient g = X.get_riemannian_gradient() # Parameters after VB-EM update X.update() phi1 = X.phi # Check self.assertAllClose(g[0], phi1[0] - phi0[0]) self.assertAllClose(g[1], phi1[1] - phi0[1]) pass
def test_moments(self): """ Test the moments of Wishart node """ np.random.seed(42) # Test prior moments D = 3 n = (D-1) + np.random.uniform(0.1,2) V = random.covariance(D) Lambda = Wishart(n, V) Lambda.update() u = Lambda.get_moments() self.assertAllClose(u[0], n*np.linalg.inv(V), msg='Mean incorrect') self.assertAllClose(u[1], (np.sum(special.digamma((n - np.arange(D))/2)) + D*np.log(2) - np.linalg.slogdet(V)[1]), msg='Log determinant incorrect') # Test posterior moments D = 3 n = (D-1) + np.random.uniform(0.1,2) V = random.covariance(D) Lambda = Wishart(n, V) mu = np.random.randn(D) Y = Gaussian(mu, Lambda) y = np.random.randn(D) Y.observe(y) Lambda.update() u = Lambda.get_moments() n = n + 1 V = V + np.outer(y-mu, y-mu) self.assertAllClose(u[0], n*np.linalg.inv(V), msg='Mean incorrect') self.assertAllClose(u[1], (np.sum(special.digamma((n - np.arange(D))/2)) + D*np.log(2) - np.linalg.slogdet(V)[1]), msg='Log determinant incorrect') pass
def test_moments(self): """ Test the moments of Wishart node """ np.random.seed(42) # Test prior moments D = 3 n = (D-1) + np.random.uniform(0.1,2) V = random.covariance(D) Lambda = Wishart(n, V) Lambda.update() u = Lambda.get_moments() self.assertAllClose(u[0], n*np.linalg.inv(V), msg='Mean incorrect') self.assertAllClose(u[1], (np.sum(special.digamma((n - np.arange(D))/2)) + D*np.log(2) - np.linalg.slogdet(V)[1]), msg='Log determinant incorrect') # Test posterior moments D = 3 n = (D-1) + np.random.uniform(0.1,2) V = random.covariance(D) Lambda = Wishart(n, V) mu = np.random.randn(D) Y = Gaussian(mu, Lambda) y = np.random.randn(D) Y.observe(y) Lambda.update() u = Lambda.get_moments() n = n + 1 V = V + np.outer(y-mu, y-mu) self.assertAllClose(u[0], n*np.linalg.inv(V), msg='Mean incorrect') self.assertAllClose(u[1], (np.sum(special.digamma((n - np.arange(D))/2)) + D*np.log(2) - np.linalg.slogdet(V)[1]), msg='Log determinant incorrect') pass
def test_message_to_parents(self): """ Check gradient passed to inputs parent node """ D = 3 X = Gaussian(np.random.randn(D), random.covariance(D)) V = Wishart(D + np.random.rand(), random.covariance(D)) Y = Gaussian(X, V) self.assert_moments( Y, lambda u: [u[0], u[1] + u[1].T] ) Y.observe(np.random.randn(D)) self.assert_message_to_parent(Y, X) #self.assert_message_to_parent(Y, V) pass
def test_rotate_plates(self): # Basic test for Gaussian vectors X = GaussianARD(np.random.randn(3,2), np.random.rand(3,2), shape=(2,), plates=(3,)) (u0, u1) = X.get_moments() Cov = u1 - linalg.outer(u0, u0, ndim=1) Q = np.random.randn(3,3) Qu0 = np.einsum('ik,kj->ij', Q, u0) QCov = np.einsum('k,kij->kij', np.sum(Q, axis=0)**2, Cov) Qu1 = QCov + linalg.outer(Qu0, Qu0, ndim=1) X.rotate_plates(Q, plate_axis=-1) (u0, u1) = X.get_moments() self.assertAllClose(u0, Qu0) self.assertAllClose(u1, Qu1) # Test full covariance, that is, with observations X = GaussianARD(np.random.randn(3,2), np.random.rand(3,2), shape=(2,), plates=(3,)) Y = Gaussian(X, [[2.0, 1.5], [1.5, 3.0]], plates=(3,)) Y.observe(np.random.randn(3,2)) X.update() (u0, u1) = X.get_moments() Cov = u1 - linalg.outer(u0, u0, ndim=1) Q = np.random.randn(3,3) Qu0 = np.einsum('ik,kj->ij', Q, u0) QCov = np.einsum('k,kij->kij', np.sum(Q, axis=0)**2, Cov) Qu1 = QCov + linalg.outer(Qu0, Qu0, ndim=1) X.rotate_plates(Q, plate_axis=-1) (u0, u1) = X.get_moments() self.assertAllClose(u0, Qu0) self.assertAllClose(u1, Qu1) pass
def test_gradient(self): """Test standard gradient of a Gaussian node.""" D = 3 np.random.seed(42) # # Without observations # # Construct model mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda) # Random initialization mu0 = np.random.randn(D) Lambda0 = random.covariance(D) X.initialize_from_parameters(mu0, Lambda0) Q = VB(X) # Initial parameters phi0 = X.phi # Gradient rg = X.get_riemannian_gradient() g = X.get_gradient(rg) # Numerical gradient eps = 1e-6 p0 = X.get_parameters() l0 = Q.compute_lowerbound(ignore_masked=False) g_num = [np.zeros(D), np.zeros((D, D))] for i in range(D): e = np.zeros(D) e[i] = eps p1 = p0[0] + e X.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[0][i] = (l1 - l0) / eps for i in range(D): for j in range(i + 1): e = np.zeros((D, D)) e[i, j] += eps e[j, i] += eps p1 = p0[1] + e X.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[1][i, j] = (l1 - l0) / (2 * eps) g_num[1][j, i] = (l1 - l0) / (2 * eps) # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) # # With observations # # Construct model mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda) # Random initialization mu0 = np.random.randn(D) Lambda0 = random.covariance(D) X.initialize_from_parameters(mu0, Lambda0) V = random.covariance(D) Y = Gaussian(X, V) Y.observe(np.random.randn(D)) Q = VB(Y, X) # Initial parameters phi0 = X.phi # Gradient rg = X.get_riemannian_gradient() g = X.get_gradient(rg) # Numerical gradient eps = 1e-6 p0 = X.get_parameters() l0 = Q.compute_lowerbound() g_num = [np.zeros(D), np.zeros((D, D))] for i in range(D): e = np.zeros(D) e[i] = eps p1 = p0[0] + e X.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound() g_num[0][i] = (l1 - l0) / eps for i in range(D): for j in range(i + 1): e = np.zeros((D, D)) e[i, j] += eps e[j, i] += eps p1 = p0[1] + e X.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound() g_num[1][i, j] = (l1 - l0) / (2 * eps) g_num[1][j, i] = (l1 - l0) / (2 * eps) # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) # # With plates # # Construct model K = D + 1 mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda, plates=(K, )) V = random.covariance(D, size=(K, )) Y = Gaussian(X, V) Y.observe(np.random.randn(K, D)) Q = VB(Y, X) # Random initialization mu0 = np.random.randn(*(X.get_shape(0))) Lambda0 = random.covariance(D, size=X.plates) X.initialize_from_parameters(mu0, Lambda0) # Initial parameters phi0 = X.phi # Gradient rg = X.get_riemannian_gradient() g = X.get_gradient(rg) # Numerical gradient eps = 1e-6 p0 = X.get_parameters() l0 = Q.compute_lowerbound() g_num = [np.zeros(X.get_shape(0)), np.zeros(X.get_shape(1))] for k in range(K): for i in range(D): e = np.zeros(X.get_shape(0)) e[k, i] = eps p1 = p0[0] + e X.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound() g_num[0][k, i] = (l1 - l0) / eps for i in range(D): for j in range(i + 1): e = np.zeros(X.get_shape(1)) e[k, i, j] += eps e[k, j, i] += eps p1 = p0[1] + e X.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound() g_num[1][k, i, j] = (l1 - l0) / (2 * eps) g_num[1][k, j, i] = (l1 - l0) / (2 * eps) # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) pass
def test_lowerbound(self): """ Test the variational Bayesian lower bound term for GaussianARD. """ # Test vector formula with full noise covariance m = np.random.randn(2) alpha = np.random.rand(2) y = np.random.randn(2) X = GaussianARD(m, alpha, ndim=1) V = np.array([[3, 1], [1, 3]]) Y = Gaussian(X, V) Y.observe(y) X.update() Cov = np.linalg.inv(np.diag(alpha) + V) mu = np.dot(Cov, np.dot(V, y) + alpha * m) x2 = np.outer(mu, mu) + Cov logH_X = (+2 * 0.5 * (1 + np.log(2 * np.pi)) + 0.5 * np.log(np.linalg.det(Cov))) logp_X = ( -2 * 0.5 * np.log(2 * np.pi) + 0.5 * np.log(np.linalg.det(np.diag(alpha))) - 0.5 * np.sum( np.diag(alpha) * (x2 - np.outer(mu, m) - np.outer(m, mu) + np.outer(m, m)))) self.assertAllClose(logp_X + logH_X, X.lower_bound_contribution()) def check_lower_bound(shape_mu, shape_alpha, plates_mu=(), **kwargs): M = GaussianARD(np.ones(plates_mu + shape_mu), np.ones(plates_mu + shape_mu), shape=shape_mu, plates=plates_mu) X = GaussianARD(M, 2 * np.ones(shape_alpha), **kwargs) Y = GaussianARD(X, 3 * np.ones(X.get_shape(0)), **kwargs) Y.observe(4 * np.ones(Y.get_shape(0))) X.update() Cov = 1 / (2 + 3) mu = Cov * (2 * 1 + 3 * 4) x2 = mu**2 + Cov logH_X = (+0.5 * (1 + np.log(2 * np.pi)) + 0.5 * np.log(Cov)) logp_X = (-0.5 * np.log(2 * np.pi) + 0.5 * np.log(2) - 0.5 * 2 * (x2 - 2 * mu * 1 + 1**2 + 1)) r = np.prod(X.get_shape(0)) self.assertAllClose(r * (logp_X + logH_X), X.lower_bound_contribution()) # Test scalar formula check_lower_bound((), ()) # Test array formula check_lower_bound((2, 3), (2, 3)) # Test dim-broadcasting of mu check_lower_bound((3, 1), (2, 3, 4)) # Test dim-broadcasting of alpha check_lower_bound((2, 3, 4), (3, 1)) # Test dim-broadcasting of mu and alpha check_lower_bound((3, 1), (3, 1), shape=(2, 3, 4)) # Test dim-broadcasting of mu with plates check_lower_bound((), (), plates_mu=(), shape=(), plates=(5, )) # BUG: Scalar parents for array variable caused einsum error check_lower_bound((), (), shape=(3, )) # BUG: Log-det was summed over plates check_lower_bound((), (), shape=(3, ), plates=(4, )) pass
def test_lowerbound(self): """ Test the variational Bayesian lower bound term for GaussianARD. """ # Test vector formula with full noise covariance m = np.random.randn(2) alpha = np.random.rand(2) y = np.random.randn(2) X = GaussianARD(m, alpha, ndim=1) V = np.array([[3,1],[1,3]]) Y = Gaussian(X, V) Y.observe(y) X.update() Cov = np.linalg.inv(np.diag(alpha) + V) mu = np.dot(Cov, np.dot(V, y) + alpha*m) x2 = np.outer(mu, mu) + Cov logH_X = (+ 2*0.5*(1+np.log(2*np.pi)) + 0.5*np.log(np.linalg.det(Cov))) logp_X = (- 2*0.5*np.log(2*np.pi) + 0.5*np.log(np.linalg.det(np.diag(alpha))) - 0.5*np.sum(np.diag(alpha) * (x2 - np.outer(mu,m) - np.outer(m,mu) + np.outer(m,m)))) self.assertAllClose(logp_X + logH_X, X.lower_bound_contribution()) def check_lower_bound(shape_mu, shape_alpha, plates_mu=(), **kwargs): M = GaussianARD(np.ones(plates_mu + shape_mu), np.ones(plates_mu + shape_mu), shape=shape_mu, plates=plates_mu) if not ('ndim' in kwargs or 'shape' in kwargs): kwargs['ndim'] = len(shape_mu) X = GaussianARD(M, 2*np.ones(shape_alpha), **kwargs) Y = GaussianARD(X, 3*np.ones(X.get_shape(0)), **kwargs) Y.observe(4*np.ones(Y.get_shape(0))) X.update() Cov = 1/(2+3) mu = Cov * (2*1 + 3*4) x2 = mu**2 + Cov logH_X = (+ 0.5*(1+np.log(2*np.pi)) + 0.5*np.log(Cov)) logp_X = (- 0.5*np.log(2*np.pi) + 0.5*np.log(2) - 0.5*2*(x2 - 2*mu*1 + 1**2+1)) r = np.prod(X.get_shape(0)) self.assertAllClose(r * (logp_X + logH_X), X.lower_bound_contribution()) # Test scalar formula check_lower_bound((), ()) # Test array formula check_lower_bound((2,3), (2,3)) # Test dim-broadcasting of mu check_lower_bound((3,1), (2,3,4)) # Test dim-broadcasting of alpha check_lower_bound((2,3,4), (3,1)) # Test dim-broadcasting of mu and alpha check_lower_bound((3,1), (3,1), shape=(2,3,4)) # Test dim-broadcasting of mu with plates check_lower_bound((), (), plates_mu=(), shape=(), plates=(5,)) # BUG: Scalar parents for array variable caused einsum error check_lower_bound((), (), shape=(3,)) # BUG: Log-det was summed over plates check_lower_bound((), (), shape=(3,), plates=(4,)) pass
def test_gradient(self): """Test standard gradient of a Gaussian node.""" D = 3 np.random.seed(42) # # Without observations # # Construct model mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda) # Random initialization mu0 = np.random.randn(D) Lambda0 = random.covariance(D) X.initialize_from_parameters(mu0, Lambda0) Q = VB(X) # Initial parameters phi0 = X.phi # Gradient rg = X.get_riemannian_gradient() g = X.get_gradient(rg) # Numerical gradient eps = 1e-6 p0 = X.get_parameters() l0 = Q.compute_lowerbound(ignore_masked=False) g_num = [np.zeros(D), np.zeros((D,D))] for i in range(D): e = np.zeros(D) e[i] = eps p1 = p0[0] + e X.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[0][i] = (l1 - l0) / eps for i in range(D): for j in range(i+1): e = np.zeros((D,D)) e[i,j] += eps e[j,i] += eps p1 = p0[1] + e X.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound(ignore_masked=False) g_num[1][i,j] = (l1 - l0) / (2*eps) g_num[1][j,i] = (l1 - l0) / (2*eps) # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) # # With observations # # Construct model mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda) # Random initialization mu0 = np.random.randn(D) Lambda0 = random.covariance(D) X.initialize_from_parameters(mu0, Lambda0) V = random.covariance(D) Y = Gaussian(X, V) Y.observe(np.random.randn(D)) Q = VB(Y, X) # Initial parameters phi0 = X.phi # Gradient rg = X.get_riemannian_gradient() g = X.get_gradient(rg) # Numerical gradient eps = 1e-6 p0 = X.get_parameters() l0 = Q.compute_lowerbound() g_num = [np.zeros(D), np.zeros((D,D))] for i in range(D): e = np.zeros(D) e[i] = eps p1 = p0[0] + e X.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound() g_num[0][i] = (l1 - l0) / eps for i in range(D): for j in range(i+1): e = np.zeros((D,D)) e[i,j] += eps e[j,i] += eps p1 = p0[1] + e X.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound() g_num[1][i,j] = (l1 - l0) / (2*eps) g_num[1][j,i] = (l1 - l0) / (2*eps) # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) # # With plates # # Construct model K = D+1 mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda, plates=(K,)) V = random.covariance(D, size=(K,)) Y = Gaussian(X, V) Y.observe(np.random.randn(K,D)) Q = VB(Y, X) # Random initialization mu0 = np.random.randn(*(X.get_shape(0))) Lambda0 = random.covariance(D, size=X.plates) X.initialize_from_parameters(mu0, Lambda0) # Initial parameters phi0 = X.phi # Gradient rg = X.get_riemannian_gradient() g = X.get_gradient(rg) # Numerical gradient eps = 1e-6 p0 = X.get_parameters() l0 = Q.compute_lowerbound() g_num = [np.zeros(X.get_shape(0)), np.zeros(X.get_shape(1))] for k in range(K): for i in range(D): e = np.zeros(X.get_shape(0)) e[k,i] = eps p1 = p0[0] + e X.set_parameters([p1, p0[1]]) l1 = Q.compute_lowerbound() g_num[0][k,i] = (l1 - l0) / eps for i in range(D): for j in range(i+1): e = np.zeros(X.get_shape(1)) e[k,i,j] += eps e[k,j,i] += eps p1 = p0[1] + e X.set_parameters([p0[0], p1]) l1 = Q.compute_lowerbound() g_num[1][k,i,j] = (l1 - l0) / (2*eps) g_num[1][k,j,i] = (l1 - l0) / (2*eps) # Check self.assertAllClose(g[0], g_num[0]) self.assertAllClose(g[1], g_num[1]) pass
def test_riemannian_gradient(self): """Test Riemannian gradient of a Gaussian node.""" D = 3 # # Without observations # # Construct model mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda) # Random initialization mu0 = np.random.randn(D) Lambda0 = random.covariance(D) X.initialize_from_parameters(mu0, Lambda0) # Initial parameters phi0 = X.phi # Gradient g = X.get_riemannian_gradient() # Parameters after VB-EM update X.update() phi1 = X.phi # Check self.assertAllClose(g[0], phi1[0] - phi0[0]) self.assertAllClose(g[1], phi1[1] - phi0[1]) # TODO/FIXME: Actually, gradient should be zero because cost function # is zero without observations! Use the mask! # # With observations # # Construct model mu = np.random.randn(D) Lambda = random.covariance(D) X = Gaussian(mu, Lambda) V = random.covariance(D) Y = Gaussian(X, V) Y.observe(np.random.randn(D)) # Random initialization mu0 = np.random.randn(D) Lambda0 = random.covariance(D) X.initialize_from_parameters(mu0, Lambda0) # Initial parameters phi0 = X.phi # Gradient g = X.get_riemannian_gradient() # Parameters after VB-EM update X.update() phi1 = X.phi # Check self.assertAllClose(g[0], phi1[0] - phi0[0]) self.assertAllClose(g[1], phi1[1] - phi0[1]) pass