Exemplo n.º 1
0
    def getCovariances(self, logtheta):
        """[L,Alpha] = getCovariances()
        - special overwritten version of getCovariance (gpr.py)
        - here: EP updates are employed"""


        if (logtheta == self.logtheta).all() and (self.cached_L is not None):
            return [self.cached_L, self.cached_alpha]

        #1. copy logtheta
        self.logtheta = logtheta.copy()
        
        assert (self.Nlogtheta) == logtheta.shape[0], "incorrect shape of kernel parameter matrix"

        #2. vanilla Kernel matrix
        K = self.covar.K(logtheta[self.IlogthetaK], self.x)

        #3. run EP updates
        #EP effectively creates a new Kernel matrix (with input dependent noise) and new effective training means
        #in addition we store a 0th moment which is used for the lMl calculation
        self.updateEP(K, logtheta[self.IlogthetaL])
        #updateEP computes the site parameters which we use here to calcualte the full covarince for test predictions
        Keff = (K + SP.diag(self.vEP))

        self.cached_L = linalg.cholesky(Keff)
        self.cached_alpha = solve_chol(self.cached_L.transpose(), self.muEP)
        return [self.cached_L, self.cached_alpha]
    def find_mode_newton(self, return_full=False):
        """
        Newton search for mode of p(y|f)p(f)
        
        from GP book, algorithm 3.1, added step size
        """
        K = self.gp.K

        if self.newton_start is None:
            f = zeros(len(K))
        else:
            f = self.newton_start

        if return_full:
            steps = [f]

        iteration = 0
        norm_difference = inf
        objective_value = -inf

        while iteration < self.newton_max_iterations and norm_difference > self.newton_epsilon:
            # from GP book, algorithm 3.1, added step size
            # scale log_lik_grad_vector and K^-1 f = a

            w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f)
            w_sqrt = sqrt(w)

            # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt
            L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt)
            b = f * w + self.newton_step * self.gp.likelihood.log_lik_grad_vector(self.gp.y, f)

            # a=b-diag(w_sqrt).dot(inv(eye(len(K)) + (K.T*w_sqrt).T*w_sqrt).dot(diag(w_sqrt).dot(K.dot(b))))
            a = w_sqrt * (K.dot(b))
            a = solve_triangular(L, a, lower=True)
            a = solve_triangular(L.T, a, lower=False)
            a = w_sqrt * a
            a = b - a

            f_new = K.dot(self.newton_step * a)

            # convergence stuff and next iteration
            objective_value_new = -0.5 * a.T.dot(f) + sum(self.gp.likelihood.log_lik_vector(self.gp.y, f))
            norm_difference = norm(f - f_new)

            if objective_value_new > objective_value:
                f = f_new
                if return_full:
                    steps.append(f)
            else:
                self.newton_step /= 2

            iteration += 1
            objective_value = objective_value_new

        self.computed = True

        if return_full:
            return f, L, asarray(steps)
        else:
            return f
    def sample_conditional(self, index):
        if index < 0 or index >= self.dimension:
            raise ValueError("Conditional index out of bounds")
        
        # all indices but the current
        cond_inds = hstack((arange(0, index), arange(index + 1, self.dimension)))
#         print "conditioning on index %d" % index
#         print "other indices:", cond_inds
        
        # partition the Gaussian x|y, precompute matrix inversion
        mu_x = self.full_target.mu[index]
        Sigma_xx = self.full_Sigma[index, index]
        mu_y = self.full_target.mu[cond_inds]
        Sigma_yy = self.full_Sigma[cond_inds, cond_inds].reshape(len(cond_inds), len(cond_inds))
        L_yy = cholesky(Sigma_yy)
        Sigma_xy = self.full_Sigma[index, cond_inds]
        Sigma_yx = self.full_Sigma[cond_inds, index]
        
        y = self.current_state[cond_inds]
        
        # mu=mu_x+Sigma_xy Sigma_yy^(-1)(y-mu_y)
        mu = mu_x + Sigma_xy.dot(MatrixTools.cholesky_solve(L_yy, y - mu_y))
        
        # solve Sigma=Sigma_xx-Sigma_yy^-1 Sigma_yx=Sigma_xy-Sigma_xy L_yy^(-T)_yy^(-1) Sigma_yx
        Sigma = Sigma_xx - Sigma_xy.dot(MatrixTools.cholesky_solve(L_yy, Sigma_yx))
        
        # return sample from x|y
        conditional_sample = randn() * sqrt(Sigma) + mu
        return conditional_sample
Exemplo n.º 4
0
 def __init__(self, mu=asarray([0, 0]), Sigma=eye(2), is_cholesky=False):
     DensityFunction.__init__(self, len(Sigma))
     
     assert(len(shape(mu)) == 1)
     assert(max(shape(Sigma)) == len(mu))
     self.mu = mu
     if is_cholesky: 
         self.L = Sigma
     else: 
         assert(shape(Sigma)[0] == shape(Sigma)[1])
         self.L = cholesky(Sigma)
Exemplo n.º 5
0
    def predict(self, X_test, f_mode=None):
        """
        Predictions for GP with Laplace approximation.
        
        from GP book, algorithm 3.2,
        
        """
        if f_mode is None:
            f_mode = self.find_mode_newton()

        predictions = zeros(len(X_test))

        K = self.gp.K
        K_train_test = self.gp.covariance.compute(self.gp.X, X_test)

        w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f_mode)
        w_sqrt = sqrt(w)

        # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt
        L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt)

        # iterator for all testing points
        for i in range(len(X_test)):
            k = K_train_test[:, i]
            k_self = self.gp.covariance.compute([X_test[i]], [X_test[i]])[0]

            f_mean = k.dot(
                self.gp.likelihood.log_lik_grad_vector(self.gp.y, f_mode))
            v = solve_triangular(L, w_sqrt * k, lower=True)
            f_var = k_self - v.T.dot(v)

            predictions[i] = integrate.quad(
                lambda x: norm.pdf(x, f_mean, f_var), -inf, inf)[0]
#            # integrate over Gaussian using some crude numerical integration
#            samples=randn(1000)*sqrt(f_var) + f_mean
#
#            log_liks=self.gp.likelihood.log_lik_vector(1.0, samples)
#            predictions[i]=1.0/len(samples)*GPTools.log_sum_exp(log_liks)

        return predictions
    def predict(self, X_test, f_mode=None):
        """
        Predictions for GP with Laplace approximation.
        
        from GP book, algorithm 3.2,
        
        """
        if f_mode is None:
            f_mode = self.find_mode_newton()

        predictions = zeros(len(X_test))

        K = self.gp.K
        K_train_test = self.gp.covariance.compute(self.gp.X, X_test)

        w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f_mode)
        w_sqrt = sqrt(w)

        # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt
        L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt)

        # iterator for all testing points
        for i in range(len(X_test)):
            k = K_train_test[:, i]
            k_self = self.gp.covariance.compute([X_test[i]], [X_test[i]])[0]

            f_mean = k.dot(self.gp.likelihood.log_lik_grad_vector(self.gp.y, f_mode))
            v = solve_triangular(L, w_sqrt * k, lower=True)
            f_var = k_self - v.T.dot(v)

            predictions[i] = integrate.quad(lambda x: norm.pdf(x, f_mean, f_var), -inf, inf)[0]
        #            # integrate over Gaussian using some crude numerical integration
        #            samples=randn(1000)*sqrt(f_var) + f_mean
        #
        #            log_liks=self.gp.likelihood.log_lik_vector(1.0, samples)
        #            predictions[i]=1.0/len(samples)*GPTools.log_sum_exp(log_liks)

        return predictions
Exemplo n.º 7
0
def sigma_points(xm, P, kappa):
    """
    Calculate the Sigma Points of an unscented Kalman filter

    Mark Wickert December 2017
    Translated P. Kim's program from m-code
    """
    n = xm.size
    Xi = np.zeros((n, 2 * n + 1))  # sigma points = col of Xi
    W = np.zeros(2 * n + 1)
    Xi[:, 0, None] = xm
    W[0] = kappa / (n + kappa)

    U = cholesky((n + kappa) * P)  # U'*U = (n+kappa)*P

    for k in range(n):
        Xi[:, k + 1, None] = xm + U[k, None, :].T  # row of U
        W[k + 1] = 1 / (2 * (n + kappa))

    for k in range(n):
        Xi[:, n + k + 1, None] = xm - U[k, None, :].T
        W[n + k + 1] = 1 / (2 * (n + kappa))

    return Xi, W
Exemplo n.º 8
0
        exit()
    
    experiment_dir_base = str(sys.argv[1])
    n = int(str(sys.argv[2]))
    
    # loop over parameters here
    
    experiment_dir = experiment_dir_base + str(os.path.abspath(sys.argv[0])).split(os.sep)[-1].split(".")[0] + os.sep
    print "running experiments", n, "times at base", experiment_dir
   
    # load data
    data,labels=GPData.get_glass_data()

    # normalise and whiten dataset
    data-=mean(data, 0)
    L=cholesky(cov(data.T))
    data=solve_triangular(L, data.T, lower=True).T
    dim=shape(data)[1]

    # prior on theta and posterior target estimate
    theta_prior=Gaussian(mu=0*ones(dim), Sigma=eye(dim)*5)
    distribution=PseudoMarginalHyperparameterDistribution(data, labels, \
                                                    n_importance=100, prior=theta_prior, \
                                                    ridge=1e-3)

    sigma = 23.0
    print "using sigma", sigma
    kernel = GaussianKernel(sigma=sigma)
    
    for i in range(n):
        
        savetxt(graphlab_lines.add_edge(node, out_message,"R_t"), Rt)
        savetxt(graphlab_lines.add_edge(node, out_message,"P_t"), Pt)
        
        savetxt(graphlab_lines.add_edge(node, out_message,"W"), Ws.dot(Wt.T))

print "precomputing systems for messages from non-observed nodes"
graphlab_lines.lines.append(os.linesep + "# edges with non-observed targets")
for edge in edges:
    # exclude edges which involve observed nodes
    is_edge_target_observed=len(Set(observations.keys()).intersection(Set(edge)))>0
    if not is_edge_target_observed:
        graphlab_lines.new_edge_observed_target(edge[1], edge[0])
        
        data_source=data[edge][0]
        Ks=kernel.kernel(data_source)
        Ls=cholesky(Ks+eye(shape(Ks)[0])*reg_lambda)
        
        Ls_filename=graphlab_lines.add_edge(edge[1], edge[0],"L_s")
#        print Ls_filename
        savetxt(Ls_filename, Ls)

print "precomputing (non-symmetric) kernels for incoming messages at a node"
graphlab_lines.lines.append("# non-observed nodes")
for node in graph:
    added_node=False
    
    for in_message in graph[node]:
        for out_message in graph[node]:
            if in_message==out_message:
                continue
            
Exemplo n.º 10
0
if __name__ == '__main__':
    # load data
    data, labels = GPData.get_glass_data()

    # throw away some data
    n = 250
    seed(1)
    idx = permutation(len(data))
    idx = idx[:n]
    data = data[idx]
    labels = labels[idx]

    # normalise and whiten dataset
    data -= mean(data, 0)
    L = cholesky(cov(data.T))
    data = solve_triangular(L, data.T, lower=True).T
    dim = shape(data)[1]

    # prior on theta and posterior target estimate
    theta_prior = Gaussian(mu=0 * ones(dim), Sigma=eye(dim) * 5)
    target=PseudoMarginalHyperparameterDistribution(data, labels, \
                                                    n_importance=100, prior=theta_prior, \
                                                    ridge=1e-3)

    # create sampler
    burnin = 10000
    num_iterations = burnin + 300000
    kernel = GaussianKernel(sigma=23.0)
    sampler = KameleonWindowLearnScale(target, kernel, stop_adapt=burnin)
    #    sampler=AdaptiveMetropolisLearnScale(target)
Exemplo n.º 11
0
    def find_mode_newton(self, return_full=False):
        """
        Newton search for mode of p(y|f)p(f)
        
        from GP book, algorithm 3.1, added step size
        """
        K = self.gp.K

        if self.newton_start is None:
            f = zeros(len(K))
        else:
            f = self.newton_start

        if return_full:
            steps = [f]

        iteration = 0
        norm_difference = inf
        objective_value = -inf

        while iteration < self.newton_max_iterations and norm_difference > self.newton_epsilon:
            # from GP book, algorithm 3.1, added step size
            # scale log_lik_grad_vector and K^-1 f = a

            w = -self.gp.likelihood.log_lik_hessian_vector(self.gp.y, f)
            w_sqrt = sqrt(w)

            # diag(w_sqrt).dot(K.dot(diag(w_sqrt))) == (K.T*w_sqrt).T*w_sqrt
            L = cholesky(eye(len(K)) + (K.T * w_sqrt).T * w_sqrt)
            b = f * w + self.newton_step * \
                self.gp.likelihood.log_lik_grad_vector(self.gp.y, f)

            # a=b-diag(w_sqrt).dot(inv(eye(len(K)) + (K.T*w_sqrt).T*w_sqrt).dot(diag(w_sqrt).dot(K.dot(b))))
            a = (w_sqrt * (K.dot(b)))
            a = solve_triangular(L, a, lower=True)
            a = solve_triangular(L.T, a, lower=False)
            a = w_sqrt * a
            a = b - a

            f_new = K.dot(self.newton_step * a)

            # convergence stuff and next iteration
            objective_value_new = -0.5 * a.T.dot(f) + \
                                sum(self.gp.likelihood.log_lik_vector(self.gp.y, f))
            norm_difference = norm(f - f_new)

            if objective_value_new > objective_value:
                f = f_new
                if return_full:
                    steps.append(f)
            else:
                self.newton_step /= 2

            iteration += 1
            objective_value = objective_value_new

        self.computed = True

        if return_full:
            return f, L, asarray(steps)
        else:
            return f
Exemplo n.º 12
0
    def updateEP(self, K, logthetaL=None):
        """update a kernel matrix K using Ep approximation
        [K,t,C0] = updateEP(K,logthetaL)
        logthetaL: likelihood hyperparameters
        t: new means of training targets
        K: new effective kernel matrix
        C0:0th moments
        """
        assert K.shape[0] == K.shape[1], "Kernel matrix must be square"
        assert K.shape[0] == self.n, "Kernel matrix has wrong dimension"
        #approximate site parmeters; 3 moments
        # note g is in natural parameter representation (1,2)
        g = SP.zeros([self.n, 2])
        # a copy for damping
        g2 = SP.zeros([self.n, 2])
        # 0. moment is just catptured in z 
        z = SP.zeros([self.n])
        # damping factors
        damp = SP.ones([self.n])
        #approx is
        #p(f) = N(f|mu,Sigma)
        # where Sigma = (K^{-1} + PI^{-1})^{-1}; PI is created from teh diaginal
        # entries in g; PI = diag(Var(g))
        # mu = Sigma*PI^{-1}Mean(g)
        # where \mu is form the site parameters in g also

        #add some gitter to make it invertible
        K += SP.eye(K.shape[0]) * 1E-6
        #initialize current approx. of full covariance
        Sigma = K.copy()
        #invert Kernel matrix; which is used later on
        #TODO: replace by chol
        KI = linalg.inv(K)
        #current approx. mean
        mu = SP.zeros([self.n])
        
        #conversion nat. parameter/moment representation
        n2mode = lambda x: SP.array([x[0] / x[1], 1 / x[1]])
        #set hyperparameter of likelihood object
        self.likelihood.setLogtheta(logthetaL)

        for nep in range(self.Nep):
            #get order of site function update
            perm = SP.random.permutation(self.n)
            perm = SP.arange(self.n)
            for ni in perm:
                #cavity as natural parameter representation
                cav_np = n2mode([mu[ni], Sigma[ni, ni]]) - g[ni]
                #ensure we don't have negative variances. good idea?
                cav_np[1] = abs(cav_np[1])
                #calculate expectation values (int_, int_y,int_y^2)
                ML = self.likelihood.calcExpectations(self.t[ni], cav_np, x=self.x[ni])
                #1. and 2. moment can be back-calculated to enw site parameters
                #update the site parameters;
                #in natural parameters this is just deviding out the site function; v. convenient
                gn = n2mode(ML[0:2]) - cav_np
                #delta gn in nat. parameters
                dg = gn - g[ni]
                #difference of second moment (old-new)
                ds2 = gn[1] - g[ni, 1]
                #update with damping factor damp[ni]
                g[ni] = g[ni] + damp[ni] * dg
                if(g[ni, 1] < 0):
                    g[ni, 1] = 1E-10
                z[ni] = ML[2]
                if 1:
                    #rank one updates
                    Sigma2 = Sigma
                    Sigma = Sigma - ds2 / (1 + ds2 * Sigma[ni, ni]) * SP.outer(Sigma[:, ni], Sigma[ni, :])
                    if 1:
                        #check that Sigma is still pos. definite, otherweise we need to to do some damping...
                        try:
                            Csigma = linalg.cholesky(Sigma)
                        #except linalg.linalg.LinAlgError:
                        except LinAlgError:
                            logging.debug('damping')
                            Sigma = Sigma2
                            g[ni] = g2[ni]
                            #increase damping factor
                            damp[ni] *= 0.9
                            pass
                    #update mu; mu[i] = Sigma[i,i]*(1/Var(g[i]))*Mean(g[i])
                    #as go is in nat. parameter this is always like this
                    mu = SP.dot(Sigma, g[:, 0])
                else:
                    #slow updates
                    Sigma = linalg.inv(KI + SP.diag(g[:, 1]));
                    mu = SP.dot(Sigma, g[:, 0])
                pass
            #after every sweep recalculate entire covariance structure
            [Sigma, mu, lml] = self.epComputeParams(K, KI, g)
            
            #create a copy for damping
            g2 = g.copy()
            pass
            
        if nep == (self.Nep - 1):
            #LG.warn('maximum number of EP iterations reached')
            pass
        #update site parameters
        self.muEP = g[:, 0] / g[:, 1]
        self.vEP = 1 / g[:, 1]
 def precompute(self):
     # collect lines for Graphlab graph definition file for full rank case
     graphlab_lines=GraphlabLines(output_filename=self.output_filename)
                                         
     # compute all non-symmetric kernels for incoming messages at a node
     print "precomputing (non-symmetric) kernels for incoming messages at a node"
     graphlab_lines.lines.append("# non-observed nodes")
     for node in self.graph:
         added_node=False
         
         for in_message in self.graph[node]:
             for out_message in self.graph[node]:
                 if in_message==out_message:
                     continue
                 
                 # dont add nodes which have no kernels, and only do once if they have
                 if not added_node:
                     graphlab_lines.new_non_observed_node(node)
                     added_node=True
                     
                 edge_in_message=(node, in_message)
                 edge_out_message=(out_message, node)
                 
                 lhs=self.data[edge_in_message][0]
                 rhs=self.data[edge_out_message][1]
                 lhs=reshape(lhs, (len(lhs),1))
                 rhs=reshape(rhs, (len(rhs),1))
                 K=self.kernel.kernel(lhs,rhs)
                 graphlab_lines.add_non_observed_node(node, out_message, in_message, K)
         
     print "precomputing kernel (vectors) at observed nodes"
     graphlab_lines.lines.append(os.linesep + "# observed nodes")
     for node, observation in self.observations.items():
         graphlab_lines.new_observed_node(node)
         
         for out_message in self.graph[node]:
             edge=(out_message, node)
             lhs=self.data[edge][1]
             lhs=reshape(lhs, (len(lhs), 1))
             rhs=[[observation]]
             K=self.kernel.kernel(lhs, rhs)
             graphlab_lines.add_observed_node(node, out_message, K)
             
     
     # now precompute systems for inference
     
     print "precomputing systems for messages from observed nodes"
     graphlab_lines.lines.append(os.linesep + "# edges with observed targets")
     for node, observation in self.observations.items():
         for out_message in self.graph[node]:
             edge=(out_message, node)
             graphlab_lines.new_edge_observed_target(node, out_message)
             
             data_source=self.data[edge][0]
             data_source=reshape(data_source, (len(data_source), 1))
             data_target=self.data[edge][1]
             data_target=reshape(data_target, (len(data_target), 1))
     
             Ks=self.kernel.kernel(data_source)
             Kt=self.kernel.kernel(data_target)
             
             Ls=cholesky(Ks+eye(shape(Ks)[0])*self.reg_lambda)
             Lt=cholesky(Kt+eye(shape(Kt)[0])*self.reg_lambda)
             
             graphlab_lines.add_edge(node, out_message,"L_s", Ls)
             graphlab_lines.add_edge(node, out_message,"L_t", Lt)
     
     print "precomputing systems for messages from non-observed nodes"
     graphlab_lines.lines.append(os.linesep + "# edges with non-observed targets")
     for edge in self.edges:
         # exclude edges which involve observed nodes
         is_edge_target_observed=len(Set(self.observations.keys()).intersection(Set(edge)))>0
         if not is_edge_target_observed:
             graphlab_lines.new_edge_observed_target(edge[1], edge[0])
             
             data_source=self.data[edge][0]
             data_source=reshape(data_source, (len(data_source), 1))
             Ks=self.kernel.kernel(data_source)
             Ls=cholesky(Ks+eye(shape(Ks)[0])*self.reg_lambda)
             graphlab_lines.add_edge(edge[1], edge[0],"L_s", Ls)
             
     # write graph definition file to disc
     graphlab_lines.flush()