Пример #1
0
def infExact(hyp, meanfunc, covfunc, likfunc, x, y, nargout=1):
    """ Exact inference for a GP with Gaussian likelihood. Compute a parametrization
     of the posterior, the negative log marginal likelihood and its derivatives
     w.r.t. the hyperparameters.
    """

    if not (likfunc[0] == "likelihoods.likGauss"):  # NOTE: no explicit call to likGauss
        raise Exception("Exact inference only possible with Gaussian likelihood")

    n, D = x.shape
    K = src.Tools.general.feval(covfunc, hyp.cov, x)  # evaluate covariance matrix
    m = src.Tools.general.feval(meanfunc, hyp.mean, x)  # evaluate mean vector

    sn2 = np.exp(2.0 * hyp.lik)  # noise variance of likGauss
    try:
        L = np.linalg.cholesky(K / sn2 + np.eye(n)).T  # Cholesky factor of covariance with noise
    except np.linalg.LinAlgError:
        L = np.linalg.cholesky(nearPD(K / sn2 + np.eye(n))).T
        print "okay now"
        assert False
    alpha = solve_chol(L, y - m) / sn2

    post = postStruct()

    post.alpha = alpha  # return the posterior parameters
    post.sW = np.ones((n, 1)) / np.sqrt(sn2)  # sqrt of noise precision vector
    post.L = L  # L = chol(eye(n)+sW*sW'.*K)

    if nargout > 1:  # do we want the marginal likelihood?
        nlZ = (
            np.dot((y - m).T, alpha / 2) + np.log(np.diag(L)).sum() + n * np.log(2 * np.pi * sn2) / 2.0
        )  # -log marg lik
        if nargout > 2:  # do we want derivatives?
            dnlZ = dnlzStruct(hyp)  # allocate space for derivatives
            Q = solve_chol(L, np.eye(n)) / sn2 - np.dot(alpha, alpha.T)  # precompute for convenience
            for ii in range(len(hyp.cov)):
                dnlZ.cov[ii] = (Q * src.Tools.general.feval(covfunc, hyp.cov, x, None, ii)).sum() / 2.0

            dnlZ.lik = sn2 * np.trace(Q)
            for ii in range(len(hyp.mean)):
                dnlZ.mean[ii] = np.dot(-src.Tools.general.feval(meanfunc, hyp.mean, x, ii).T, alpha)

            return [post, nlZ[0][0], dnlZ]

        return [post, nlZ[0][0]]

    return [post]
Пример #2
0
def infEP(hyp, meanfunc, covfunc, likfunc, x, y, nargout=1):
    """ Expectation Propagation approximation to the posterior Gaussian Process.
     The function takes a specified covariance function (see kernels.py) and
     likelihood function (see likelihoods.py), and is designed to be used with
     gp.py. In the EP algorithm, the sites are 
     updated in random order, for better performance when cases are ordered
     according to the targets.
    """

    tol = 1e-4
    max_sweep = 10
    min_sweep = 2  # tolerance to stop EP iterations

    inffunc = "inferences.infEP"
    n = x.shape[0]

    K = src.Tools.general.feval(covfunc, hyp.cov, x)  # evaluate the covariance matrix

    m = src.Tools.general.feval(meanfunc, hyp.mean, x)  # evaluate the mean vector

    # A note on naming: variables are given short but descriptive names in
    # accordance with Rasmussen & Williams "GPs for Machine Learning" (2006): mu
    # and s2 are mean and variance, nu and tau are natural parameters. A leading t
    # means tilde, a subscript _ni means "not i" (for cavity parameters), or _n
    # for a vector of cavity parameters.

    # marginal likelihood for ttau = tnu = zeros(n,1); equals n*log(2) for likCum*
    nlZ0 = -src.Tools.general.feval(
        likfunc, hyp.lik, y, m, np.reshape(np.diag(K), (np.diag(K).shape[0], 1)), inffunc
    ).sum()
    if "last_ttau" not in infEP.__dict__:  # find starting point for tilde parameters
        ttau = np.zeros((n, 1))  # initialize to zero if we have no better guess
        tnu = np.zeros((n, 1))
        Sigma = K  # initialize Sigma and mu, the parameters of ..
        mu = np.zeros((n, 1))  # .. the Gaussian posterior approximation
        nlZ = nlZ0
    else:
        ttau = infEP.last_ttau  # try the tilde values from previous call
        tnu = infEP.last_tnu
        [Sigma, mu, nlZ, L] = epComputeParams(K, y, ttau, tnu, likfunc, hyp, m, inffunc)
        if nlZ > nlZ0:  # if zero is better ..
            ttau = np.zeros((n, 1))  # .. then initialize with zero instead
            tnu = np.zeros((n, 1))
            Sigma = K  # initialize Sigma and mu, the parameters of ..
            mu = np.zeros((n, 1))  # .. the Gaussian posterior approximation
            nlZ = nlZ0

    nlZ_old = np.inf
    sweep = 0  # converged, max. sweeps or min. sweeps?
    while (np.abs(nlZ - nlZ_old) > tol and sweep < max_sweep) or (sweep < min_sweep):
        nlZ_old = nlZ
        sweep += 1
        rperm = range(n)  # randperm(n)
        for ii in rperm:  # iterate EP updates (in random order) over examples
            tau_ni = 1 / Sigma[ii, ii] - ttau[ii]  #  first find the cavity distribution ..
            nu_ni = mu[ii] / Sigma[ii, ii] + m[ii] * tau_ni - tnu[ii]  # .. params tau_ni and nu_ni
            # compute the desired derivatives of the indivdual log partition function
            vargout = src.Tools.general.feval(likfunc, hyp.lik, y[ii], nu_ni / tau_ni, 1 / tau_ni, inffunc, None, 3)
            lZ = vargout[0]
            dlZ = vargout[1]
            d2lZ = vargout[2]
            ttau_old = copy(ttau[ii])  # then find the new tilde parameters, keep copy of old

            ttau[ii] = -d2lZ / (1.0 + d2lZ / tau_ni)
            ttau[ii] = max(ttau[ii], 0)  # enforce positivity i.e. lower bound ttau by zero
            tnu[ii] = (dlZ + (m[ii] - nu_ni / tau_ni) * d2lZ) / (1.0 + d2lZ / tau_ni)

            ds2 = ttau[ii] - ttau_old  # finally rank-1 update Sigma ..
            si = np.reshape(Sigma[:, ii], (Sigma.shape[0], 1))
            Sigma = Sigma - ds2 / (1.0 + ds2 * si[ii]) * np.dot(si, si.T)  # takes 70# of total time
            mu = np.dot(Sigma, tnu)  # .. and recompute mu

        # recompute since repeated rank-one updates can destroy numerical precision
        [Sigma, mu, nlZ, L] = epComputeParams(K, y, ttau, tnu, likfunc, hyp, m, inffunc)

    if sweep == max_sweep:
        raise Exception("maximum number of sweeps reached in function infEP")

    infEP.last_ttau = ttau
    infEP.last_tnu = tnu  # remember for next call

    sW = np.sqrt(ttau)
    alpha = tnu - sW * solve_chol(L, sW * np.dot(K, tnu))

    post = postStruct()

    post.alpha = alpha  # return the posterior params
    post.sW = sW
    post.L = L

    if nargout > 1:
        if nargout > 2:  # do we want derivatives?
            dnlZ = dnlzStruct(hyp)  # allocate space for derivatives
            ssi = np.sqrt(ttau)
            V = np.linalg.solve(L.T, np.tile(ssi, (1, n)) * K)
            Sigma = K - np.dot(V.T, V)
            mu = np.dot(Sigma, tnu)
            Dsigma = np.reshape(np.diag(Sigma), (np.diag(Sigma).shape[0], 1))
            tau_n = 1 / Dsigma - ttau  # compute the log marginal likelihood
            nu_n = mu / Dsigma - tnu  # vectors of cavity parameters
            F = np.dot(alpha, alpha.T) - np.tile(sW, (1, n)) * solve_chol(
                L, np.diag(np.reshape(sW, (sW.shape[0],)))
            )  # covariance hypers
            for ii in range(len(hyp.cov)):
                dK = src.Tools.general.feval(covfunc, hyp.cov, x, None, ii)
                dnlZ.cov[ii] = -(F * dK).sum() / 2.0

            for ii in range(len(hyp.lik)):
                dlik = src.Tools.general.feval(likfunc, hyp.lik, y, nu_n / tau_n, 1 / tau_n, inffunc, ii, 1)
                dnlZ.lik[ii] = -dlik.sum()

            [junk, dlZ] = src.Tools.general.feval(
                likfunc, hyp.lik, y, nu_n / tau_n, 1 / tau_n, inffunc, None, 2
            )  # mean hyps
            for ii in range(len(hyp.mean)):
                dm = src.Tools.general.feval(meanfunc, hyp.mean, x, ii)
                dnlZ.mean[ii] = -np.dot(dlZ.T, dm)[0, 0]

            vargout = [post, nlZ, dnlZ]
        else:
            vargout = [post, nlZ]
    else:
        vargout = [post]

    return vargout
Пример #3
0
def infLaplace(hyp, meanfunc, covfunc, likfunc, x, y, nargout=1):
    """ Laplace approximation to the posterior Gaussian process.
     The function takes a specified covariance function (see kernels.py) and
     likelihood function (see likelihoods.py).
    """

    tol = 1e-6
    # tolerance for when to stop the Newton iterations
    smax = 2
    Nline = 20
    thr = 1e-4
    # line search parameters
    maxit = 20
    # max number of Newton steps in f

    inffunc = "inferences.infLaplace"

    K = src.Tools.general.feval(covfunc, hyp.cov, x)  # evaluate the covariance
    m = src.Tools.general.feval(meanfunc, hyp.mean, x)  # evaluate the mean vector

    n, D = x.shape

    Psi_old = np.inf  # make sure while loop starts by the largest old objective val
    if "last_alpha" not in infLaplace.__dict__:  # find a good starting point for alpha and f
        alpha = np.zeros((n, 1))
        f = np.dot(K, alpha) + m  # start at mean if sizes not match
        vargout = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, None, 3)
        lp = vargout[0]
        dlp = vargout[1]
        d2lp = vargout[2]
        W = -d2lp
        Psi_new = -lp.sum()
    else:
        alpha = last_alpha
        f = np.dot(K, alpha) + m  # try last one
        vargout = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, None, 3)
        lp = vargout[0]
        dlp = vargout[1]
        d2lp = vargout[2]
        W = -d2lp
        Psi_new = np.dot(alpha.T, (f - m)) / 2.0 - lp.sum()  # objective for last alpha
        vargout = -src.Tools.general.feval(likfunc, hyp.lik, y, m, None, inffunc, None, 1)
        Psi_def = vargout[0]  # objective for default init f==m
        if Psi_def < Psi_new:  # if default is better, we use it
            alpha = np.zeros((n, 1))
            f = np.dot(K, alpha) + m
            vargout = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, None, 3)
            lp = vargout[0]
            dlp = vargout[1]
            d2lp = vargout[2]
            W = -d2lp
            Psi_new = -lp.sum()

    isWneg = np.any(W < 0)  # flag indicating whether we found negative values of W
    it = 0  # this happens for the Student's t likelihood

    while (Psi_old - Psi_new > tol) and it < maxit:  # begin Newton
        Psi_old = Psi_new
        it += 1
        if isWneg:  # stabilise the Newton direction in case W has negative values
            W = np.maximum(W, 0)  # stabilise the Hessian to guarantee postive definiteness
            tol = 1e-10
            # increase accuracy to also get the derivatives right
            # In Vanhatalo et. al., GPR with Student's t likelihood, NIPS 2009, they use
            # a more conservative strategy then we do being equivalent to 2 lines below.
            # nu  = exp(hyp.lik(1));                    # degree of freedom hyperparameter
            # W  = W + 2/(nu+1)*dlp.^2;                 # add ridge according to Vanhatalo

        sW = np.sqrt(W)
        L = np.linalg.cholesky(np.eye(n) + np.dot(sW, sW.T) * K).T
        b = W * (f - m) + dlp
        dalpha = b - sW * solve_chol(L, sW * np.dot(K, b)) - alpha
        vargout = brentmin(0, smax, Nline, thr, _Psi_line, 4, dalpha, alpha, hyp, K, m, likfunc, y, inffunc)
        s = vargout[0]
        Psi_new = vargout[1]
        Nfun = vargout[2]
        alpha = vargout[3]
        f = vargout[4]
        dlp = vargout[5]
        W = vargout[6]
        isWneg = np.any(W < 0)

    last_alpha = alpha  # remember for next call
    vargout = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, None, 4)
    lp = vargout[0]
    dlp = vargout[1]
    d2lp = vargout[2]
    d3lp = vargout[3]

    W = -d2lp
    isWneg = np.any(W < 0)
    post = postStruct()
    post.alpha = alpha  # return the posterior parameters
    post.sW = np.sqrt(np.abs(W)) * np.sign(W)  # preserve sign in case of negative
    if isWneg:
        [ldA, iA, post.L] = _logdetA(K, W, 3)
        nlZ = np.dot(alpha.T, (f - m)) / 2.0 - lp.sum() + ldA / 2.0
        nlZ = nlZ[0]
    else:
        sW = post.sW
        post.L = np.linalg.cholesky(np.eye(n) + np.dot(sW, sW.T) * K).T
        nlZ = np.dot(alpha.T, (f - m)) / 2.0 + (np.log(np.diag(post.L)) - np.reshape(lp, (lp.shape[0],))).sum()
        nlZ = nlZ[0]

    if nargout > 2:  # do we want derivatives?
        dnlZ = dnlzStruct(hyp)  # allocate space for derivatives
        if isWneg:  # switch between Cholesky and LU decomposition mode
            Z = -post.L  # inv(K+inv(W))
            g = np.atleast_2d((iA * K).sum(axis=1)).T / 2  # deriv. of ln|B| wrt W; g = diag(inv(inv(K)+diag(W)))/2
        else:
            Z = np.tile(sW, (1, n)) * solve_chol(
                post.L, np.diag(np.reshape(sW, (sW.shape[0],)))
            )  # sW*inv(B)*sW=inv(K+inv(W))
            C = np.linalg.solve(post.L.T, np.tile(sW, (1, n)) * K)  # deriv. of ln|B| wrt W
            g = np.atleast_2d((np.diag(K) - (C ** 2).sum(axis=0).T)).T / 2.0  # g = diag(inv(inv(K)+W))/2

        dfhat = g * d3lp  # deriv. of nlZ wrt. fhat
        for ii in range(len(hyp.cov)):  # covariance hypers
            dK = src.Tools.general.feval(covfunc, hyp.cov, x, None, ii)
            dnlZ.cov[ii] = (Z * dK).sum() / 2.0 - np.dot(alpha.T, np.dot(dK, alpha)) / 2.0  # explicit part
            b = np.dot(dK, dlp)
            tmp = np.dot(dfhat.T, b - np.dot(K, np.dot(Z, b)))
            dnlZ.cov[ii] -= np.dot(dfhat.T, b - np.dot(K, np.dot(Z, b)))[0, 0]  # implicit part

        for ii in range(len(hyp.lik)):  # likelihood hypers
            [lp_dhyp, dlp_dhyp, d2lp_dhyp] = src.Tools.general.feval(likfunc, hyp.lik, y, f, None, inffunc, ii, 3)
            dnlZ.lik[ii] = -np.dot(g.T, d2lp_dhyp) - lp_dhyp.sum()  # explicit part
            b = np.dot(K, dlp_dhyp)
            dnlZ.lik[ii] -= np.dot(dfhat.T, b - np.dot(K, np.dot(Z, b)))[0, 0]  # implicit part

        for ii in range(len(hyp.mean)):  # mean hypers
            dm = src.Tools.general.feval(meanfunc, hyp.mean, x, ii)
            dnlZ.mean[ii] = -np.dot(alpha.T, dm)  # explicit part
            dnlZ.mean[ii] -= np.dot(dfhat.T, dm - np.dot(K, np.dot(Z, dm)))[0, 0]  # implicit part

        vargout = [post, nlZ, dnlZ]
    else:
        vargout = [post, nlZ]

    return vargout
Пример #4
0
def infFITC(hyp, meanfunc, covfunc, likfunc, x, y, nargout=1):
    """ FITC approximation to the posterior Gaussian process. The function is
     equivalent to infExact with the covariance function:
    
     Kt = Q + G; G = diag(g); g = diag(K-Q);  Q = Ku' * inv(Quu) * Ku;
    
     where Ku and Kuu are covariances w.r.t. to inducing inputs xu, snu2 = sn2/1e6
     is the noise of the inducing inputs and Quu = Kuu + snu2 * eye(nu).
     We fixed the standard deviation of the inducing inputs snu to be a one per mil
     of the measurement noise's standard deviation sn.
     The implementation exploits the Woodbury matrix identity
     inv(Kt) = inv(G) - inv(G) * V' * inv(eye(nu) + V * inv(G) * V') * V * inv(G)
     in order to be applicable to large datasets. The computational complexity
     is O(n nu^2) where n is the number of data points x and nu the number of
     inducing inputs in xu.
     The function takes a specified covariance function (see kernels.py) and
     likelihood function (see likelihoods.py), and is designed to be used with
     gp.py and in conjunction with covFITC and likGauss. 
    """

    if not (likfunc[0] == "likelihoods.likGauss"):  # NOTE: no explicit call to likGauss
        raise Exception("Exact inference only possible with Gaussian likelihood")

    cov1 = covfunc[0]
    if not cov1 == ["kernels.covFITC"]:
        raise Exception("Only covFITC supported.")  # check cov

    diagK, Kuu, Ku = src.Tools.general.feval(covfunc, hyp.cov, x)  # evaluate covariance matrix
    m = src.Tools.general.feval(meanfunc, hyp.mean, x)  # evaluate mean vector
    n, D = x.shape
    nu = Kuu.shape[0]

    sn2 = np.exp(2 * hyp.lik[0])  # noise variance of likGauss
    snu2 = 1.0e-6 * sn2  # hard coded inducing inputs noise
    Luu = np.linalg.cholesky(Kuu + snu2 * np.eye(nu)).T  # Kuu + snu2*I = Luu'*Luu
    V = np.linalg.solve(Luu.T, Ku)  # V = inv(Luu')*Ku => V'*V = Q
    g_sn2 = diagK + sn2 - np.array([(V * V).sum(axis=0)]).T  # g + sn2 = diag(K) + sn2 - diag(Q)
    Lu = np.linalg.cholesky(np.eye(nu) + np.dot(V / np.tile(g_sn2.T, (nu, 1)), V.T)).T  # Lu'*Lu=I+V*diag(1/g_sn2)*V'
    r = (y - m) / np.sqrt(g_sn2)
    be = np.linalg.solve(Lu.T, np.dot(V, r / np.sqrt(g_sn2)))
    iKuu = solve_chol(Luu, np.eye(nu))  # inv(Kuu + snu2*I) = iKuu

    post = postStruct()

    post.alpha = np.linalg.solve(Luu, np.linalg.solve(Lu, be))  # return the posterior parameters
    post.L = solve_chol(np.dot(Lu, Luu), np.eye(nu)) - iKuu  # Sigma-inv(Kuu)
    post.sW = np.ones((n, 1)) / np.sqrt(sn2)  # unused for FITC prediction  with gp.py

    if nargout > 1:  # do we want the marginal likelihood
        nlZ = (
            np.log(np.diag(Lu)).sum()
            + (np.log(g_sn2).sum() + n * np.log(2 * np.pi) + np.dot(r.T, r) - np.dot(be.T, be)) / 2.0
        )
        if nargout > 2:  # do we want derivatives?
            dnlZ = dnlzStruct(hyp)  # allocate space for derivatives
            al = r / np.sqrt(g_sn2) - np.dot(V.T, np.linalg.solve(Lu, be)) / g_sn2  # al = (Kt+sn2*eye(n))\y
            B = np.dot(iKuu, Ku)
            w = np.dot(B, al)
            W = np.linalg.solve(Lu.T, V / np.tile(g_sn2.T, (nu, 1)))
            for ii in range(len(hyp.cov)):
                [ddiagKi, dKuui, dKui] = src.Tools.general.feval(covfunc, hyp.cov, x, None, ii)  # eval cov deriv
                R = 2.0 * dKui - np.dot(dKuui, B)
                v = ddiagKi - np.array([(R * B).sum(axis=0)]).T  # diag part of cov deriv
                dnlZ.cov[ii] = (
                    np.dot(ddiagKi.T, 1.0 / g_sn2)
                    + np.dot(w.T, (np.dot(dKuui, w) - 2.0 * np.dot(dKui, al)))
                    - np.dot(al.T, (v * al))
                    - np.dot(np.array([(W * W).sum(axis=0)]), v)
                    - (np.dot(R, W.T) * np.dot(B, W.T)).sum()
                ) / 2.0

            dnlZ.lik = sn2 * ((1.0 / g_sn2).sum() - (np.array([(W * W).sum(axis=0)])).sum() - np.dot(al.T, al))
            # since snu2 is a fixed fraction of sn2, there is a covariance-like term in the derivative as well
            dKuui = 2 * snu2
            R = -dKuui * B
            v = -np.array([(R * B).sum(axis=0)]).T  # diag part of cov deriv
            dnlZ.lik += (
                np.dot(w.T, np.dot(dKuui, w))
                - np.dot(al.T, (v * al))
                - np.dot(np.array([(W * W).sum(axis=0)]), v)
                - (np.dot(R, W.T) * np.dot(B, W.T)).sum()
            ) / 2.0
            dnlZ.lik = dnlZ.lik[0]
            for ii in range(len(hyp.mean)):
                dnlZ.mean[ii] = np.dot(-src.Tools.general.feval(meanfunc, hyp.mean, x, ii).T, al)[0, 0]

            return [post, nlZ[0, 0], dnlZ]

        return [post, nlZ[0, 0]]

    return [post]