Пример #1
0
 def predict(self):
     """
     GP predictions
     Returns: predictions
     """
     Ks = []
     for d in range(self.X.shape[1]):
         K = self.kernels[d].eval(self.kernels[d].params, self.X_dims[d])
         Ks.append(K)
     f_pred = kron_mvp(Ks, kron_mvp(self.K_invs, self.q_mu))
     return f_pred
Пример #2
0
    def predict(self):
        """
        GP predictions
        Returns: predictions

        """
        Ks = []
        for i in range(self.X.shape[1]):
            K = self.kernels[i].eval(
                self.kernels[i].params,
                np.expand_dims(np.unique(self.X[:, i]), 1))
            Ks.append(K)
        f_pred = kron_mvp(Ks, kron_mvp(self.K_invs, self.q_mu))
        return f_pred
Пример #3
0
    def grad_like(self, r, eps):
        """
        Gradient of likelihood w.r.t variational parameters
        Args:
            r (): Transformed random sample
            eps (): Random sample

        Returns: gradient w.r.t covariance, gradient w.r.t mean

        """
        if self.obs_idx is not None:
            r_obs = r[self.obs_idx]
        else:
            r_obs = r
        dr = self.likelihood_grad(r_obs, self.y)
        dr[np.isnan(dr)] = 0.
        self.dr = dr
        grads_R = []
        for d in range(len(self.Rs)):
            Rs_copy = deepcopy(self.Rs)
            n = Rs_copy[d].shape[0]
            grad_R = np.zeros((n, n))
            for i, j in zip(*np.triu_indices(n)):
                R_d = np.zeros((n, n))
                R_d[i, j] = 1.
                Rs_copy[d] = R_d
                dR_eps = kron_mvp(Rs_copy, eps)
                if self.obs_idx is not None:
                    dR_eps = dR_eps[self.obs_idx]
                grad_R[i, j] = np.sum(np.multiply(dr, dR_eps))
            grads_R.append(grad_R)
        grad_mu = np.zeros(self.n)
        grad_mu[self.obs_idx] = dr

        return grads_R, grad_mu
Пример #4
0
    def grad_KL_mu(self):
        """
        Gradient of KL divergence w.r.t variational mean
        Returns: returns gradient

        """
        return kron_mvp(self.K_invs, self.q_mu - self.mu)
Пример #5
0
    def line_search(self, Rs_grads, mu_grads, obj_init, r, eps):
        """
        Performs line search to find optimal step size

        Args:
            Rs_grads (): Gradients of R (variational covariances)
            mu_grads (): Gradients of mu (variational mean)
            obj_init (): Initial objective value
            r (): transformed random Gaussian sample
            eps (): random Gaussian sample

        Returns: Optimal step size

        """
        step = 1.

        while step > 1e-15:

            R_search = [
                np.clip(R + step * R_grad, 0., np.max(R))
                for (R_grad, R) in Rs_grads
            ]
            mu_search = mu_grads[1] + step * mu_grads[0]
            r_search = mu_search + kron_mvp(R_search, eps)
            obj_search, kl_search, like_search = self.eval_obj(
                R_search, mu_search, r_search)
            if obj_init - obj_search > step:
                pos_def = True
                for R in R_search:
                    if np.all(np.linalg.eigvals(R) > 0) == False:
                        pos_def = False
                if pos_def:
                    return R_search, mu_search, obj_search, step
            step = step * 0.5
        return None
Пример #6
0
 def grad_KL_mu(self):
     """
     Natural gradient of KL divergence w.r.t variational mean
     Returns: returns gradient
     """
     return np.multiply(np.exp(self.q_S),
                        -kron_mvp(self.K_invs, self.mu - self.q_mu))
Пример #7
0
    def variance(self, n_s):
        """
        Stochastic approximator of predictive variance.
         Follows "Massively Scalable GPs"
        Args:
            n_s (int): Number of iterations to run stochastic approximation

        Returns: Approximate predictive variance at grid points

        """

        if self.root_eigdecomp is None:
            self.sqrt_eig()
        if self.obs_idx is not None:
            root_K = self.root_eigdecomp[self.obs_idx, :]
        else:
            root_K = self.root_eigdecomp

        diag = kron_list_diag(self.Ks)
        samples = []
        for i in range(n_s):
            g_m = np.random.normal(size=self.m)
            g_n = np.random.normal(size=self.n)
            right_side = np.sqrt(self.W).dot(np.dot(root_K, g_m)) +\
                         np.sqrt(self.noise) * g_n
            r = self.opt.cg(self.Ks, right_side)
            if self.obs_idx is not None:
                Wr = np.zeros(self.m)
                Wr[self.obs_idx] = np.multiply(np.sqrt(self.W), r)
            else:
                Wr = np.multiply(np.sqrt(self.W), r)
            samples.append(kron_mvp(self.Ks, Wr))
        var = np.var(samples, axis=0)
        return np.clip(diag - var, 0, 1e12).flatten(), var
Пример #8
0
    def search_step(self, obj_prev, min_obj, delta_alpha,
                    step_size, max_it, t, opt_step):
        """
        Executes one step of a backtracking line search
        Args:
            obj_prev (np.array): previous objective
            obj_search (np.array): current objective
            min_obj (np.array): current minimum objective
            delta_alpha (np.array): change in step size
            step_size (np.array): current step size
            max_it (int): maximum number of line search iterations
            t (np.array): current line search iteration
            opt_step (np.array): optimal step size until now

        Returns: updated parameters
        """
        alpha_search = np.squeeze(self.alpha + step_size * delta_alpha)
        f_search = np.squeeze(kron_mvp(self.Ks, alpha_search)) + self.mu

        if self.k_diag is not None:
            f_search += np.multiply(self.k_diag, alpha_search)

        obj_search = self.log_joint(f_search, alpha_search)

        if min_obj > obj_search:
            opt_step = step_size
            min_obj = obj_search
        step_size = self.tau * step_size

        t = t + 1

        return obj_prev, min_obj, delta_alpha,\
            step_size, max_it, t, opt_step
Пример #9
0
    def sample_post(self):
        """
        Draws a sample from the GPR posterior
        Returns: sample

        """

        eps = np.random.normal(size=self.n)
        return self.q_mu + kron_mvp(self.Rs, eps)
Пример #10
0
    def cg_prod(self, Ks, p):
        """

        Args:
            p (): potential solution to linear system

        Returns: product Ap (left side of linear system)

        """
        if self.precondition is None:
            return p + np.multiply(np.sqrt(self.W),
                                   kron_mvp(Ks,
                                   np.multiply(np.sqrt(self.W), p)))
        Cp = np.multiply(self.precondition, p)
        noise = np.multiply(np.multiply(self.precondition,
                                        np.multiply(self.W, self.k_diag)), Cp)
        wkw = np.multiply(np.multiply(self.precondition, np.sqrt(self.W)),
                          kron_mvp(Ks, np.multiply(np.sqrt(self.W), Cp)))
        return noise + wkw + np.multiply(self.precondition, Cp)
Пример #11
0
    def run(self, its):
        """
        Runs stochastic variational inference
        Args:
            its (): Number of iterations

        Returns: Nothing, but updates instance variables

        """

        t = trange(its, leave=True)

        for i in t:
            self.calc_trace_term()
            KL_grad_R = self.grad_KL_R()
            KL_grad_mu = self.grad_KL_mu()

            eps = np.random.normal(size=self.n)
            r = self.q_mu + kron_mvp(self.Rs, eps)
            like_grad_R, like_grad_mu = self.grad_like(r, eps)
            grad_R = [
                -KL_grad_R[i] + like_grad_R[i] for i in range(len(KL_grad_R))
            ]
            grad_mu = -KL_grad_mu + like_grad_mu
            R_and_grads = list(zip(grad_R, self.Rs))
            mu_and_grad = (grad_mu, self.q_mu)

            obj, kl, like = self.eval_obj(self.Rs, self.q_mu, r)
            self.elbos.append(-obj)

            if self.linesearch:
                ls_res = self.line_search(R_and_grads, mu_and_grad, obj, r,
                                          eps)
                step = 0.
                if ls_res is not None:
                    step = ls_res[-1]
                t.set_description("ELBO: " + '{0:.2f}'.format(-obj) +
                                  " | KL: " + '{0:.2f}'.format(kl) +
                                  " | logL: " + '{0:.2f}'.format(like) +
                                  " | step: " + str(step))
                if ls_res is not None:
                    self.Rs = ls_res[0]
                    self.q_mu = ls_res[1]
            else:
                t.set_description("ELBO: " + '{0:.2f}'.format(-obj) +
                                  " | KL: " + '{0:.2f}'.format(kl) +
                                  " | logL: " + '{0:.2f}'.format(like))
                self.q_mu, self.mu_params = \
                    self.optimizer.step(mu_and_grad, self.mu_params)
                for d in range(self.d):
                    self.Rs[d], self.R_params[d] = \
                        self.optimizer.step(R_and_grads[d], self.R_params[d])
        self.f_pred = self.predict()
        return
Пример #12
0
    def variance_pmap(self, n_s=30):
        """
        Stochastic approximator of predictive variance.
         Follows "Massively Scalable GPs"
        Args:
            n_s (int): Number of iterations to run stochastic approximation

        Returns: Approximate predictive variance at grid points

        """
        if self.eigvals or self.eigvecs is None:
            self.eig_decomp()

        Q = self.eigvecs
        Q_t = [v.T for v in self.eigvecs]
        Vr = [np.nan_to_num(np.sqrt(e)) for e in self.eigvals]

        diag = kron_list_diag(self.Ks) + self.noise
        samples = []

        for i in range(n_s):
            g_m = np.random.normal(size=self.m)
            g_n = np.random.normal(size=self.n)

            Kroot_g = kron_mvp(Q, kron_mvp(Vr, kron_mvp(Q_t, g_m)))
            if self.obs_idx is not None:
                Kroot_g = Kroot_g[self.obs_idx]
            right_side = Kroot_g + np.sqrt(self.noise) * g_n

            r = self.cg_opt.cg(self.Ks, right_side)
            if self.obs_idx is not None:
                Wr = np.zeros(self.m)
                Wr[self.obs_idx] = r
            else:
                Wr = r
            samples.append(kron_mvp(self.Ks, Wr))

        est = np.var(samples, axis=0)
        return np.clip(diag - est, 0, a_max=None).flatten()
Пример #13
0
    def KLqp(self, S, q_mu):
        """
        Calculates KL divergence between q and p
        Args:
            S (): Variational variances
            q_mu (): Variational mean
        Returns: KL divergence between q and p
        """

        k_inv_mu = kron_mvp(self.K_invs, self.mu - q_mu)
        mu_penalty = np.sum(np.multiply(self.mu - q_mu, k_inv_mu))
        det_S = np.sum(S)
        trace_term = np.sum(np.multiply(self.k_inv_diag, np.exp(S)))
        kl = 0.5 * (self.det_K - self.m - det_S + trace_term + mu_penalty)
        return kl
Пример #14
0
    def KL_calc(self, Rs, q_mu):
        """
        Calculates KL divergence between q and p
        Args:
            Rs (): Variational covariance
            q_mu (): Variational mean

        Returns: KL divergence between q and p

        """
        k_inv_mu = kron_mvp(self.K_invs, self.mu - q_mu)
        mu_penalty = np.sum(np.multiply(self.mu - q_mu, k_inv_mu))
        det_S = self.log_det_S(Rs)
        trace_term = self.calc_trace_term(Rs)[0]
        kl = 0.5 * (self.det_K - self.n - det_S + trace_term + mu_penalty)
        return max(0, kl)
Пример #15
0
    def step(self, max_it, it, delta):
        """
        Runs one step of Kronecker inference
        Args:
            max_it (int): maximum number of Kronecker iterations
            it (int): current iteration
            delta (np.array): change in step size

        Returns: max iteration, current iteration, previous objective,
         change in objective

        """

        self.f = kron_mvp(self.Ks, self.alpha) + self.mu
        if self.k_diag is not None:
            self.f += np.multiply(self.alpha, self.k_diag)
        psi = self.log_joint(self.f, self.alpha)
        self.update_derivs()

        b = np.multiply(self.W, self.f - self.mu) + self.grads
        if self.precondition is not None:
            z = self.opt.cg(self.Ks, np.multiply(self.precondition,
                            np.multiply(1.0/np.sqrt(self.W), b)))
        else:
            z = self.opt.cg(self.Ks, np.multiply(1.0/np.sqrt(self.W), b))

        delta_alpha = np.multiply(np.sqrt(self.W), z) - self.alpha
        step_size = self.line_search(delta_alpha, psi, 20)
        delta = step_size
        if delta > 1e-9:
            self.alpha = self.alpha + delta_alpha*step_size
            self.alpha = np.where(np.isnan(self.alpha),
                                  np.ones_like(self.alpha) * 1e-9, self.alpha)

        it = it + 1

        return max_it, it, delta, step_size, psi
Пример #16
0
    def run(self, max_it):
        """
        Runs Kronecker inference. Updates instance variables.

        Args:
            max_it (int): maximum number of iterations.

        Returns: max iterations, iteration number, objective

        """
        if self.obs_idx is not None:
            k_diag = np.ones(self.X.shape[0]) * 1e12
            k_diag[self.obs_idx] = self.noise
            self.k_diag = k_diag
            self.precondition = np.clip(1.0 / np.sqrt(self.k_diag),
                                                 0, 1)
        else:
            self.k_diag = None
            self.precondition = None

        delta = sys.float_info.max
        it = 0

        t = trange(max_it)

        for i in t:
            max_it, it, delta, step, psi = self.step(max_it, it, delta)
            t.set_description("Objective: " + '{0:.2f}'.format(psi) +
                              " | Step Size: " + '{0:.2f}'.format(step))
            if delta < 1e-9:
                break

        self.f_pred = kron_mvp(self.Ks, self.alpha) + self.mu
        self.update_derivs()

        return