Exemplo n.º 1
0
 def _CalculateNecessaryMatrices(self, scales, nuggets):
     K = self.covariance(self.X, self.X, scales)
     K = K + nuggets * nuggets * np.eye(K.shape[0])
     L = np.linalg.cholesky(K)
     self.alpha = solve_triangular(L.transpose(),
                                   solve_triangular(L, self.Y, lower=True))
     self.cholesky = L
     self.cov_matrix = K
Exemplo n.º 2
0
def solve_via_cholesky(k_chol, y):
    """Solves a positive definite linear system via a Cholesky decomposition.

    Args:
        k_chol: The Cholesky factor of the matrix to solve. A lower triangular
            matrix, perhaps more commonly known as L.
        y: The vector to solve.
    """

    # Solve Ls = y
    s = spl.solve_triangular(k_chol, y, lower=True)

    # Solve Lt b = s
    b = spl.solve_triangular(k_chol.T, s)

    return b
Exemplo n.º 3
0
def _testfunc_mahal(a, b, use_my):
    if use_my:
        l = cholesky_factorization(a)
    else:
        l = anp.linalg.cholesky(a)
    x = aspl.solve_triangular(l, b)
    return anp.sum(anp.square(x))
Exemplo n.º 4
0
 def Predict(self, X):
     kstar = self.covariance(X, self.X, self.scales)
     predictive_mean = np.matmul(np.transpose(kstar), self.alpha)
     v = solve_triangular(self.cholesky, kstar, lower=True)
     predictive_variance = self.covariance(X, X, self.scales) - np.matmul(
         np.transpose(v), v)
     return predictive_mean.reshape(
         -1, 1), np.diag(predictive_variance).reshape(-1, 1, 1)
    def predict(self, coords=None):
        """Return posterior mean and variance"""
        S = self.eval_S(self.kappa, self.sigma_f)
        K_chol = self.eval_K_chol(S, self.sigma_n, self.sigma_f)

        woodbury_vector = solve_triangular(K_chol, self.Y, lower=True)
        phi_S_phistar = (self.eigenfunctions[self.X] * S[None, :]) @ \
            self.eigenfunctions.T
        W = solve_triangular(K_chol, phi_S_phistar, lower=True)

        mean = np.einsum('nt,n->t', W, woodbury_vector)

        B = solve_triangular(K_chol, phi_S_phistar, lower=True)
        variance = np.sum((self.eigenfunctions * S[None, :]).T *
                          self.eigenfunctions.T, axis=0) - np.sum(B**2, axis=0)

        return mean, variance
def cholesky_computations(features,
                          targets,
                          mean,
                          kernel,
                          noise_variance,
                          debug_log=False,
                          test_intermediates=None):
    """
    Given input matrix X (features), target matrix Y (targets), mean and kernel
    function, compute posterior state {L, P}, where L is the Cholesky factor
    of
        k(X, X) + sigsq_final * I
    and
        L P = Y - mean(X)
    Here, sigsq_final >= noise_variance is minimal such that the Cholesky
    factorization does not fail.

    :param features: Input matrix X (n,d)
    :param targets: Target matrix Y (n,m)
    :param mean: Mean function
    :param kernel: Kernel function
    :param noise_variance: Noise variance (may be increased)
    :param debug_log: Debug output during add_jitter CustomOp?
    :param test_intermediates: If given, all intermediates are written into
        this dict
    :return: L, P
    """
    kernel_mat = kernel(features, features)
    #   Add jitter to noise_variance (if needed) in order to guarantee that
    #   Cholesky factorization works
    sys_mat = AddJitterOp(flatten_and_concat(kernel_mat, noise_variance),
                          initial_jitter_factor=NOISE_VARIANCE_LOWER_BOUND,
                          debug_log='true' if debug_log else 'false')
    chol_fact = cholesky_factorization(sys_mat)
    centered_y = targets - anp.reshape(mean(features), (-1, 1))
    pred_mat = aspl.solve_triangular(chol_fact, centered_y, lower=True)
    # print('chol_fact', chol_fact)

    if test_intermediates is not None:
        assert isinstance(test_intermediates, dict)
        test_intermediates.update({
            'features': features,
            'targets': targets,
            'noise_variance': noise_variance,
            'kernel_mat': kernel_mat,
            'sys_mat': sys_mat,
            'chol_fact': chol_fact,
            'pred_mat': pred_mat,
            'centered_y': centered_y
        })
        test_intermediates.update(kernel.get_params())
        test_intermediates.update(mean.get_params())

    return chol_fact, pred_mat
Exemplo n.º 7
0
    def objective(self, params):

        kern_variance, kern_lengthscale, noise_variance, Z = self.to_params(
            params)

        sigma2 = noise_variance
        N = self.num_data
        Saa = self.Su_old
        ma = self.mu_old

        # a is old inducing points, b is new
        # f is training points
        Kfdiag = self.Kdiag(self.X, kern_variance)
        (Kbf, Kba, Kaa, Kaa_cur, La, Kbb, Lb, D, LD, Lbinv_Kba, LDinv_Lbinv_c,
         err, Qff) = self._build_objective_terms(Z, kern_variance,
                                                 kern_lengthscale,
                                                 noise_variance)

        LSa = anp.linalg.cholesky(Saa)
        Lainv_ma = solve_triangular(LSa, ma, lower=True)

        bound = 0

        # constant term
        bound = -0.5 * N * anp.log(2 * anp.pi)

        # quadratic term
        bound += -0.5 * anp.sum(anp.square(err)) / sigma2
        bound += -0.5 * anp.sum(anp.square(Lainv_ma))
        bound += 0.5 * anp.sum(anp.square(LDinv_Lbinv_c))

        #        # log det term
        bound += -0.5 * N * anp.sum(anp.log(sigma2))
        bound += -anp.sum(anp.log(anp.diag(LD)))
        #        bound += -0.5 * N * anp.sum(anp.log(anp.where(sigma2,sigma2,1.)))
        #        bound += - anp.sum(anp.log(anp.where(anp.diag(LD),anp.diag(LD),1.)))

        #        # delta 1: trace term
        bound += -0.5 * anp.sum(Kfdiag) / sigma2
        bound += 0.5 * anp.sum(anp.diag(Qff))
        #
        #        # delta 2: a and b difference
        bound += anp.sum(anp.log(anp.diag(La)))
        bound += -anp.sum(anp.log(anp.diag(LSa)))
        #
        Kaadiff = Kaa_cur - anp.matmul(anp.transpose(Lbinv_Kba), Lbinv_Kba)
        Sainv_Kaadiff = anp.linalg.solve(Saa, Kaadiff)
        Kainv_Kaadiff = anp.linalg.solve(Kaa, Kaadiff)
        #
        bound += -0.5 * anp.sum(
            anp.diag(Sainv_Kaadiff) - anp.diag(Kainv_Kaadiff))

        return bound
Exemplo n.º 8
0
def chol_inv(W):
    EYEN = np.eye(W.shape[0])
    try:
        tri_W = np.linalg.cholesky(W)
        tri_W_inv = splg.solve_triangular(tri_W,EYEN,lower=True)
        #tri_W,lower  = splg.cho_factor(W,lower=True)
        # W_inv = splg.cho_solve((tri_W,True),EYEN)
        W_inv = np.matmul(tri_W_inv.T,tri_W_inv)
        W_inv = (W_inv + W_inv.T)/2
        return W_inv
    except Exception as e:
        return False
Exemplo n.º 9
0
Arquivo: mniw.py Projeto: lfywork/svae
    def sample_invwishart(S, nu):
        n = S.shape[0]
        chol = np.linalg.cholesky(S)

        if (nu <= 81 + n) and (nu == np.round(nu)):
            x = npr.randn(nu, n)
        else:
            x = np.diag(np.sqrt(np.atleast_1d(chi2.rvs(nu - np.arange(n)))))
            x[np.triu_indices_from(x, 1)] = npr.randn(n * (n - 1) // 2)
        R = np.linalg.qr(x, 'r')
        T = solve_triangular(R.T, chol.T, lower=True).T
        return np.dot(T, T.T)
Exemplo n.º 10
0
    def sample_invwishart(S, nu):
        n = S.shape[0]
        chol = np.linalg.cholesky(S)

        if (nu <= 81 + n) and (nu == np.round(nu)):
            x = npr.randn(nu, n)
        else:
            x = np.diag(np.sqrt(np.atleast_1d(chi2.rvs(nu - np.arange(n)))))
            x[np.triu_indices_from(x, 1)] = npr.randn(n*(n-1)//2)
        R = np.linalg.qr(x, 'r')
        T = solve_triangular(R.T, chol.T, lower=True).T
        return np.dot(T, T.T)
Exemplo n.º 11
0
    def fit(self, kernel, y, jitter):
        """ len(y) == len(reg)"""
        reg = jitter

        if self.memory_eff is True:
            kernel[np.diag_indices_from(kernel)] += reg
            kernel, lower = cho_factor(kernel,
                                       lower=False,
                                       overwrite_a=True,
                                       check_finite=False)
            L = kernel
            alpha = cho_solve((L, lower), y, overwrite_b=False).reshape(
                (1, -1))
        elif self.memory_eff == 'autograd':
            alpha = np.linalg.solve(kernel + np.diag(reg), y).reshape((1, -1))
        else:
            L = np.linalg.cholesky(kernel + np.diag(reg))
            z = solve_triangular(L, y, lower=True)
            alpha = solve_triangular(L.T, z, lower=False,
                                     overwrite_b=True).reshape((1, -1))

        return alpha
    def _neg_log_likelihood_alt(self, sigma_f, kappa, sigma_n):
        if self.Y is None:
            return 0.0

        S = self.eval_S(kappa, sigma_f)

        K_chol = self.eval_K_chol(S, sigma_n, sigma_f)
        K_chol_inv_Y = solve_triangular(K_chol, self.Y, lower=True)

        data_fit = 0.5 * np.sum(K_chol_inv_Y * K_chol_inv_Y)
        penalty = np.sum(np.log(np.diag(K_chol)))
        objective = data_fit + penalty + 0.5 * len(self.Y) * np.log(2*np.pi)

        return objective
    def posterior_samples(self, nsamples):
        """ Compute `nsamples` posterior samples with the Materon rule """
        prior = self.prior_samples(nsamples)

        S = self.eval_S(self.kappa, self.sigma_f)
        K_chol = self.eval_K_chol(S, self.sigma_n, self.sigma_f)

        residue = (self.Y - prior[..., self.X]).T  # shape (n, s)

        K_chol_inv_R = solve_triangular(K_chol,
                                        residue, lower=True)  # shape (n, s)

        phi_S_phistar = (self.eigenfunctions[self.X] * S[None, :]) @ \
            self.eigenfunctions.T  # shape (n, t)

        K_chol_inv_phi_S_phistar = solve_triangular(K_chol,
                                                    phi_S_phistar,
                                                    lower=True)  # shape (n, t)
        update_term = np.einsum('nt,ns->st',
                                K_chol_inv_phi_S_phistar,
                                K_chol_inv_R)

        return prior + update_term  # shape (s, t)
def sample_posterior_joint(features,
                           mean,
                           kernel,
                           chol_fact,
                           pred_mat,
                           test_features,
                           num_samples=1):
    """
    Draws num_sample samples from joint posterior distribution over inputs
    test_features. This is done by computing mean and covariance matrix of
    this posterior, and using the Cholesky decomposition of the latter. If
    pred_mat is a matrix with m columns, the samples returned have shape
    (n_test, m, num_samples).

    :param features: Training inputs
    :param mean: Mean function
    :param kernel: Kernel function
    :param chol_fact: Part L of posterior state
    :param pred_mat: Part P of posterior state
    :param test_features: Test inputs
    :param num_samples: Number of samples to draw
    :return: Samples, shape (n_test, num_samples) or (n_test, m, num_samples)
    """
    k_tr_te = kernel(features, test_features)
    linv_k_tr_te = aspl.solve_triangular(chol_fact, k_tr_te, lower=True)
    posterior_mean = anp.matmul(anp.transpose(linv_k_tr_te), pred_mat) + \
                     anp.reshape(mean(test_features), (-1, 1))
    posterior_cov = kernel(test_features, test_features) - anp.dot(
        anp.transpose(linv_k_tr_te), linv_k_tr_te)
    jitter_init = anp.ones((1, )) * (1e-5)
    sys_mat = AddJitterOp(flatten_and_concat(posterior_cov, jitter_init),
                          initial_jitter_factor=NOISE_VARIANCE_LOWER_BOUND)
    lfact = cholesky_factorization(sys_mat)
    # Draw samples
    # posterior_mean.shape = (n_test, m), where m is number of cols of pred_mat
    # Reshape to (n_test, m, 1)
    n_test = getval(posterior_mean.shape)[0]
    posterior_mean = anp.expand_dims(posterior_mean, axis=-1)
    n01_vecs = [
        anp.random.normal(size=getval(posterior_mean.shape))
        for _ in range(num_samples)
    ]
    n01_mat = anp.reshape(anp.concatenate(n01_vecs, axis=-1), (n_test, -1))
    samples = anp.reshape(anp.dot(lfact, n01_mat), (n_test, -1, num_samples))
    samples = samples + posterior_mean

    if samples.shape[1] == 1:
        samples = anp.reshape(samples, (n_test, -1))  # (n_test, num_samples)

    return samples
Exemplo n.º 15
0
    def _build_objective_terms(self, Z, kern_variance, kern_lengthscale, noise_variance):

        Mb = anp.shape(Z)[0]
        Ma = self.M_old
        jitter = 1e-3
        sigma2 = noise_variance
        sigma = anp.sqrt(sigma2)

        Saa = self.Su_old
        ma = self.mu_old

        # a is old inducing points, b is new
        # f is training points
        # s is test points
        Kbf = self.K(kern_variance, kern_lengthscale, Z, self.X)
        Kbb = self.K(kern_variance, kern_lengthscale, Z) + anp.eye(Mb, dtype=float_type) * jitter

        
        Kba = self.K(kern_variance, kern_lengthscale, Z, self.Z_old)
        Kaa_cur = self.K(kern_variance, kern_lengthscale, self.Z_old) + anp.eye(Ma, dtype=float_type) * jitter
        Kaa = self.Kaa_old + anp.eye(Ma, dtype=float_type) * jitter

        err = self.Y 

        Sainv_ma = anp.linalg.solve(Saa, ma)
        Sinv_y = self.Y / sigma2
        c1 = anp.matmul(Kbf, Sinv_y)
        c2 = anp.matmul(Kba, Sainv_ma)
        c = c1 + c2

        Lb = anp.linalg.cholesky(Kbb)
        Lbinv_c = solve_triangular(Lb, c, lower=True)
        Lbinv_Kba = solve_triangular(Lb, Kba, lower=True)
        Lbinv_Kbf = solve_triangular(Lb, Kbf, lower=True) / sigma
        d1 = anp.matmul(Lbinv_Kbf, anp.transpose(Lbinv_Kbf))
        
        LSa = anp.linalg.cholesky(Saa)
        Kab_Lbinv = anp.transpose(Lbinv_Kba)
        LSainv_Kab_Lbinv = solve_triangular(
            LSa, Kab_Lbinv, lower=True)
        d2 = anp.matmul(anp.transpose(LSainv_Kab_Lbinv), LSainv_Kab_Lbinv)

        La = anp.linalg.cholesky(Kaa)
        Lainv_Kab_Lbinv = solve_triangular(
            La, Kab_Lbinv, lower=True)
        d3 = anp.matmul(anp.transpose(Lainv_Kab_Lbinv), Lainv_Kab_Lbinv)

        D = anp.eye(Mb, dtype=float_type) + d1 + d2 - d3
        D = D + anp.eye(Mb, dtype=float_type) * jitter
        
        LD = anp.linalg.cholesky(D)
        

        LDinv_Lbinv_c = solve_triangular(LD, Lbinv_c, lower=True)

        return (Kbf, Kba, Kaa, Kaa_cur, La, Kbb, Lb, D, LD,
                Lbinv_Kba, LDinv_Lbinv_c, err, d1)
Exemplo n.º 16
0
    def compute_iKy(self, params, yknt, xt):
        k, n, t = yknt.shape
        Idt = np.eye(self.d * t)
        It = np.eye(t)

        kern_params, C, r, mu = self.unpack_params(params)
        KXX = self.kernel.build_Kxx(xt, xt, kern_params)

        KXX = KXX + np.eye(KXX.shape[0]) * self.fudge
        L = np.linalg.cholesky(KXX)
        iR12 = 1 / (np.sqrt(r))
        A = L.T @ (np.kron(C.T @ np.diag(iR12), It))  # NT by NT

        B = Idt + L.T @ (np.kron(C.T @ np.diag(1 / r) @ C, It)) @ L
        M = np.linalg.cholesky(B)
        R_yk_nt = (yknt * iR12[None, :, None]).reshape([k, -1])
        Ay = (A @ R_yk_nt.T).T  # k by ...
        x = solve_triangular(M, Ay.T, lower=True)
        iBARy = solve_triangular(M.T, x, lower=False).T
        AiBARy = (A.T @ iBARy.T).T
        Idt_AiBARy = R_yk_nt - AiBARy  # k by nt
        x = (Idt_AiBARy.reshape([k, n, t]) * iR12[None, :, None]).reshape(
            [k, -1])
        return x, M, KXX
def predict_posterior_marginals(features,
                                mean,
                                kernel,
                                chol_fact,
                                pred_mat,
                                test_features,
                                test_intermediates=None):
    """
    Computes posterior means and variances for test_features.
    If pred_mat is a matrix, so will be posterior_means, but not
    posterior_variances. Reflects the fact that for GP regression and fixed
    hyperparameters, the posterior mean depends on the targets y, but the
    posterior covariance does not.

    :param features: Training inputs
    :param mean: Mean function
    :param kernel: Kernel function
    :param chol_fact: Part L of posterior state
    :param pred_mat: Part P of posterior state
    :param test_features: Test inputs
    :return: posterior_means, posterior_variances
    """
    k_tr_te = kernel(features, test_features)
    linv_k_tr_te = aspl.solve_triangular(chol_fact, k_tr_te, lower=True)
    posterior_means = anp.matmul(anp.transpose(linv_k_tr_te), pred_mat) + \
                      anp.reshape(mean(test_features), (-1, 1))
    posterior_variances = kernel.diagonal(test_features) - anp.sum(
        anp.square(linv_k_tr_te), axis=0)
    if test_intermediates is not None:
        assert isinstance(test_intermediates, dict)
        test_intermediates.update({
            'k_tr_te':
            k_tr_te,
            'linv_k_tr_te':
            linv_k_tr_te,
            'test_features':
            test_features,
            'pred_means':
            posterior_means,
            'pred_vars':
            anp.reshape(
                anp.maximum(posterior_variances, MIN_POSTERIOR_VARIANCE),
                (-1, ))
        })

    return posterior_means, anp.reshape(
        anp.maximum(posterior_variances, MIN_POSTERIOR_VARIANCE), (-1, ))
Exemplo n.º 18
0
def batch_mahalanobis(L, x):
    """
    Compute the squared Mahalanobis distance.
    :math:`x^T M^{-1} x` for a factored :math:`M = LL^T`.

    Copied from PyTorch torch.distributions.multivariate_normal.

    Parameters
    ----------
    L : array_like (..., D, D)
        Cholesky factorization(s) of covariance matrix

    x : array_like (..., D)
        Points at which to evaluate the quadratic term

    Returns
    -------
    y : array_like (...,)
        squared Mahalanobis distance :math:`x^T (LL^T)^{-1} x`

        x^T (LL^T)^{-1} x = x^T L^{-T} L^{-1} x
    """
    # The most common shapes are x: (T, D) and L : (D, D)
    # Special case that one
    if x.ndim == 2 and L.ndim == 2:
        xs = solve_triangular(L, x.T, lower=True)
        return np.sum(xs**2, axis=0)

    # Flatten the Cholesky into a (-1, D, D) array
    flat_L = flatten_to_dim(L, 2)
    # Invert each of the K arrays and reshape like L
    L_inv = np.reshape(np.array([np.linalg.inv(Li.T) for Li in flat_L]),
                       L.shape)
    # dot with L_inv^T; square and sum.
    xs = np.einsum('...i,...ij->...j', x, L_inv)
    return np.sum(xs**2, axis=-1)
Exemplo n.º 19
0
def cholesky_factorization_backward(l, lbar):
    abar = copyltu(anp.matmul(anp.transpose(l), lbar))
    abar = anp.transpose(aspl.solve_triangular(l, abar, lower=True, trans='T'))
    abar = aspl.solve_triangular(l, abar, lower=True, trans='T')
    return 0.5 * abar
Exemplo n.º 20
0
 def fun(B):
     return to_scalar(
         spla.solve_triangular(A, B, trans=trans, lower=lower))
Exemplo n.º 21
0
Arquivo: util.py Projeto: lfywork/svae
def solve_triangular(L, x, trans='N'):
    return spla.solve_triangular(L, x, lower=True, trans=trans)
Exemplo n.º 22
0
 def fun(B):
     return to_scalar(spla.solve_triangular(A, B, trans=trans, lower=lower))
Exemplo n.º 23
0
def solve_triangular(L, x, trans='N'):
    return spla.solve_triangular(L, x, lower=True, trans=trans)
Exemplo n.º 24
0
 def _triangular_solve(a_, b_):
     return asla.solve_triangular(a_,
                                  b_,
                                  trans="N",
                                  lower=lower_a,
                                  check_finite=False)
def _compute_lvec(features, chol_fact, kernel, feature):
    kvec = anp.reshape(kernel(features, feature), (-1, 1))
    return anp.reshape(aspl.solve_triangular(chol_fact, kvec, lower=True),
                       (1, -1))