def test_cholesky_grad_indef(): scipy = pytest.importorskip("scipy") x = tensor.matrix() matrix = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX) cholesky = Cholesky(lower=True, on_error="raise") chol_f = function([x], grad(cholesky(x).sum(), [x])) with pytest.raises(scipy.linalg.LinAlgError): chol_f(matrix) cholesky = Cholesky(lower=True, on_error="nan") chol_f = function([x], grad(cholesky(x).sum(), [x])) assert np.all(np.isnan(chol_f(matrix)))
def test_cholesky_indef(): if not imported_scipy: raise SkipTest("Scipy needed for the Cholesky op.") x = tensor.matrix() matrix = np.array([[1, 0.2], [0.2, -2]]).astype(config.floatX) cholesky = Cholesky(lower=True, on_error='raise') chol_f = function([x], cholesky(x)) with assert_raises(scipy.linalg.LinAlgError): chol_f(matrix) cholesky = Cholesky(lower=True, on_error='nan') chol_f = function([x], cholesky(x)) assert np.all(np.isnan(chol_f(matrix)))
def test_cholesky_grad(): pytest.importorskip("scipy") rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(5, 5).astype(config.floatX) # The dots are inside the graph since Cholesky needs separable matrices # Check the default. utt.verify_grad(lambda r: cholesky(r.dot(r.T)), [r], 3, rng) # Explicit lower-triangular. utt.verify_grad(lambda r: Cholesky(lower=True)(r.dot(r.T)), [r], 3, rng) # Explicit upper-triangular. utt.verify_grad(lambda r: Cholesky(lower=False)(r.dot(r.T)), [r], 3, rng)
def nlml(Y, hyp, i, X, EyeN, nigp=None, y_var=None): # initialise the (before compilation) kernel function hyps = (hyp[:idims + 1], hyp[idims + 1]) kernel_func = partial(cov.Sum, hyps, self.covs) # We initialise the kernel matrices (one for each output dimension) K = kernel_func(X) # add the contribution from the input noise if nigp: K += tt.diag(nigp[i]) # add the contribution from the output uncertainty (acts as weight) if y_var: K += tt.diag(y_var[i]) # compute chol(K) L = Cholesky()(K) # compute K^-1 and (K^-1)dot(y) rhs = tt.concatenate([EyeN, Y[:, None]], axis=1) sol = solve_upper_triangular(L.T, solve_lower_triangular(L, rhs)) iK = sol[:, :-1] beta = sol[:, -1] return iK, L, beta
def nlml(Y, hyp, X, X_sp, EyeM): # TODO allow for different pseudo inputs for each dimension # initialise the (before compilation) kernel function hyps = [hyp[:idims + 1], hyp[idims + 1]] kernel_func = partial(cov.Sum, hyps, self.covs) sf2 = hyp[idims]**2 sn2 = hyp[idims + 1]**2 N = X.shape[0].astype(theano.config.floatX) ridge = 1e-6 Kmm = kernel_func(X_sp) + ridge * EyeM Kmn = kernel_func(X_sp, X) Lmm = Cholesky()(Kmm) rhs = tt.concatenate([EyeM, Kmn], axis=1) sol = solve_lower_triangular(Lmm, rhs) iKmm = solve_upper_triangular(Lmm.T, sol[:, :EyeM.shape[0]]) Lmn = sol[:, EyeM.shape[0]:] diagQnn = (Lmn**2).sum(0) # Gamma = diag(Knn - Qnn) + sn2*I Gamma = sf2 + sn2 - diagQnn Gamma_inv = 1.0 / Gamma # these operations are done to avoid inverting K_sp = (Qnn+Gamma) sqrtGamma_inv = tt.sqrt(Gamma_inv) Lmn_ = Lmn * sqrtGamma_inv # Kmn_*Gamma^-.5 Yi = Y * (sqrtGamma_inv) # Gamma^-.5* Y Bmm = tt.eye(Kmm.shape[0]) + (Lmn_).dot( Lmn_.T) # I + Lmn * Gamma^-1 * Lnm Amm = Cholesky()(Bmm) LAmm = Lmm.dot(Amm) Kmn_dotYi = Kmn.dot(Yi * (sqrtGamma_inv)) rhs = tt.concatenate([EyeM, Kmn_dotYi[:, None]], axis=1) sol = solve_upper_triangular(LAmm.T, solve_lower_triangular(LAmm, rhs)) iBmm = sol[:, :-1] beta_sp = sol[:, -1] log_det_K_sp = tt.sum(tt.log(Gamma)) log_det_K_sp += 2 * tt.sum(tt.log(tt.diag(Amm))) loss_sp = Yi.dot(Yi) - Kmn_dotYi.dot(beta_sp) loss_sp += log_det_K_sp + N * np.log(2 * np.pi) loss_sp *= 0.5 return loss_sp, iKmm, Lmm, Amm, iBmm, beta_sp
def test_cholesky_grad(): if not imported_scipy: raise SkipTest("Scipy needed for the Cholesky op.") rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(5, 5).astype(config.floatX) # The dots are inside the graph since Cholesky needs separable matrices # Check the default. yield ( lambda: utt.verify_grad(lambda r: cholesky(r.dot(r.T)), [r], 3, rng)) # Explicit lower-triangular. yield (lambda: utt.verify_grad(lambda r: Cholesky(lower=True) (r.dot(r.T)), [r], 3, rng)) # Explicit upper-triangular. yield (lambda: utt.verify_grad(lambda r: Cholesky(lower=False) (r.dot(r.T)), [r], 3, rng))
def test_cholesky_grad(): if not imported_scipy: raise SkipTest("Scipy needed for the Cholesky op.") rng = numpy.random.RandomState(utt.fetch_seed()) r = rng.randn(5, 5).astype(config.floatX) pd = numpy.dot(r, r.T) eps = None if config.floatX == "float64": eps = 2e-8 # Check the default. yield (lambda: utt.verify_grad(cholesky, [pd], 3, rng, eps=eps)) # Explicit lower-triangular. yield (lambda: utt.verify_grad(Cholesky(lower=True), [pd], 3, rng, eps=eps)) # Explicit upper-triangular. yield (lambda: utt.verify_grad(Cholesky(lower=False), [pd], 3, rng, eps=eps))
def nlml(A, phidotY, EyeM): Lmm = Cholesky()(A) rhs = tt.concatenate([EyeM, phidotY[:, None]], axis=1) sol = solve_upper_triangular( Lmm.T, solve_lower_triangular(Lmm, rhs)) iA = sol[:, :-1] beta_ss = sol[:, -1] return iA, Lmm, beta_ss
def test_cholesky(): if not imported_scipy: raise SkipTest("Scipy needed for the Cholesky op.") rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(5, 5).astype(config.floatX) pd = np.dot(r, r.T) x = tensor.matrix() chol = cholesky(x) # Check the default. ch_f = function([x], chol) yield check_lower_triangular, pd, ch_f # Explicit lower-triangular. chol = Cholesky(lower=True)(x) ch_f = function([x], chol) yield check_lower_triangular, pd, ch_f # Explicit upper-triangular. chol = Cholesky(lower=False)(x) ch_f = function([x], chol) yield check_upper_triangular, pd, ch_f
def test_cholesky_and_cholesky_grad_shape(): pytest.importorskip("scipy") rng = np.random.RandomState(utt.fetch_seed()) x = tensor.matrix() for l in (cholesky(x), Cholesky(lower=True)(x), Cholesky(lower=False)(x)): f_chol = theano.function([x], l.shape) g = tensor.grad(l.sum(), x) f_cholgrad = theano.function([x], g.shape) topo_chol = f_chol.maker.fgraph.toposort() topo_cholgrad = f_cholgrad.maker.fgraph.toposort() if config.mode != "FAST_COMPILE": assert sum([node.op.__class__ == Cholesky for node in topo_chol]) == 0 assert ( sum([node.op.__class__ == CholeskyGrad for node in topo_cholgrad]) == 0 ) for shp in [2, 3, 5]: m = np.cov(rng.randn(shp, shp + 10)).astype(config.floatX) np.testing.assert_equal(f_chol(m), (shp, shp)) np.testing.assert_equal(f_cholgrad(m), (shp, shp))
def test_solve_correctness(self): scipy = pytest.importorskip("scipy") rng = np.random.RandomState(utt.fetch_seed()) A = theano.tensor.matrix() b = theano.tensor.matrix() y = self.op(A, b) gen_solve_func = theano.function([A, b], y) cholesky_lower = Cholesky(lower=True) L = cholesky_lower(A) y_lower = self.op(L, b) lower_solve_func = theano.function([L, b], y_lower) cholesky_upper = Cholesky(lower=False) U = cholesky_upper(A) y_upper = self.op(U, b) upper_solve_func = theano.function([U, b], y_upper) b_val = np.asarray(rng.rand(5, 1), dtype=config.floatX) # 1-test general case A_val = np.asarray(rng.rand(5, 5), dtype=config.floatX) # positive definite matrix: A_val = np.dot(A_val.transpose(), A_val) assert np.allclose( scipy.linalg.solve(A_val, b_val), gen_solve_func(A_val, b_val) ) # 2-test lower traingular case L_val = scipy.linalg.cholesky(A_val, lower=True) assert np.allclose( scipy.linalg.solve_triangular(L_val, b_val, lower=True), lower_solve_func(L_val, b_val), ) # 3-test upper traingular case U_val = scipy.linalg.cholesky(A_val, lower=False) assert np.allclose( scipy.linalg.solve_triangular(U_val, b_val, lower=False), upper_solve_func(U_val, b_val), )
def test_cholesky_and_cholesky_grad_shape(): if not imported_scipy: raise SkipTest("Scipy needed for the Cholesky op.") rng = numpy.random.RandomState(utt.fetch_seed()) x = tensor.matrix() for l in (cholesky(x), Cholesky(lower=True)(x), Cholesky(lower=False)(x)): f_chol = theano.function([x], l.shape) g = tensor.grad(l.sum(), x) f_cholgrad = theano.function([x], g.shape) topo_chol = f_chol.maker.fgraph.toposort() topo_cholgrad = f_cholgrad.maker.fgraph.toposort() if config.mode != 'FAST_COMPILE': assert sum([node.op.__class__ == Cholesky for node in topo_chol]) == 0 assert sum([node.op.__class__ == CholeskyGrad for node in topo_cholgrad]) == 0 for shp in [2, 3, 5]: m = numpy.cov(rng.randn(shp, shp + 10)).astype(config.floatX) yield numpy.testing.assert_equal, f_chol(m), (shp, shp) yield numpy.testing.assert_equal, f_cholgrad(m), (shp, shp)
def test_cholesky(): pytest.importorskip("scipy") rng = np.random.RandomState(utt.fetch_seed()) r = rng.randn(5, 5).astype(config.floatX) pd = np.dot(r, r.T) x = tensor.matrix() chol = cholesky(x) # Check the default. ch_f = function([x], chol) check_lower_triangular(pd, ch_f) # Explicit lower-triangular. chol = Cholesky(lower=True)(x) ch_f = function([x], chol) check_lower_triangular(pd, ch_f) # Explicit upper-triangular. chol = Cholesky(lower=False)(x) ch_f = function([x], chol) check_upper_triangular(pd, ch_f) chol = Cholesky(lower=False, on_error="nan")(x) ch_f = function([x], chol) check_upper_triangular(pd, ch_f)
def test_magma_opt_float16(self): ops_to_gpu = [ (MatrixInverse(), GpuMagmaMatrixInverse), (SVD(), GpuMagmaSVD), (QRFull(mode="reduced"), GpuMagmaQR), (QRIncomplete(mode="r"), GpuMagmaQR), # TODO: add support for float16 to Eigh numpy # (Eigh(), GpuMagmaEigh), (Cholesky(), GpuMagmaCholesky), ] for op, gpu_op in ops_to_gpu: A = theano.tensor.matrix("A", dtype="float16") fn = theano.function([A], op(A), mode=mode_with_gpu.excluding("cusolver")) assert any( [isinstance(node.op, gpu_op) for node in fn.maker.fgraph.toposort()] )
def MvNormalLogp(): """Compute the log pdf of a multivariate normal distribution. This should be used in MvNormal.logp once Theano#5908 is released. Parameters ---------- cov : tt.matrix The covariance matrix. delta : tt.matrix Array of deviations from the mean. """ cov = tt.matrix('cov') cov.tag.test_value = floatX(np.eye(3)) delta = tt.matrix('delta') delta.tag.test_value = floatX(np.zeros((2, 3))) solve_lower = tt.slinalg.Solve(A_structure='lower_triangular') solve_upper = tt.slinalg.Solve(A_structure='upper_triangular') cholesky = Cholesky(lower=True, on_error='nan') n, k = delta.shape n, k = f(n), f(k) chol_cov = cholesky(cov) diag = tt.nlinalg.diag(chol_cov) ok = tt.all(diag > 0) chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T result = n * k * tt.log(f(2) * np.pi) result += f(2) * n * tt.sum(tt.log(diag)) result += (delta_trans**f(2)).sum() result = f(-.5) * result logp = tt.switch(ok, result, -np.inf) def dlogp(inputs, gradients): g_logp, = gradients cov, delta = inputs g_logp.tag.test_value = floatX(1.) n, k = delta.shape chol_cov = cholesky(cov) diag = tt.nlinalg.diag(chol_cov) ok = tt.all(diag > 0) chol_cov = tt.switch(ok, chol_cov, tt.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T inner = n * tt.eye(k) - tt.dot(delta_trans.T, delta_trans) g_cov = solve_upper(chol_cov.T, inner) g_cov = solve_upper(chol_cov.T, g_cov.T) tau_delta = solve_upper(chol_cov.T, delta_trans.T) g_delta = tau_delta.T g_cov = tt.switch(ok, g_cov, -np.nan) g_delta = tt.switch(ok, g_delta, -np.nan) return [-0.5 * g_cov * g_logp, -g_delta * g_logp] return theano.OpFromGraph([cov, delta], [logp], grad_overrides=dlogp, inline=True)