    def test_par_gradient(self):
        dim = 2
        x = np.hstack((np.zeros((dim, 1)), np.eye(dim), -np.eye(dim)))
        y = x[0, :]

        par = np.array([[1, 1, 3]], dtype=float)
        kernel = RBFGauss(dim, par)
        dK_dpar = kernel.der_par(par.squeeze(), x)
 def setUpClass(cls):
     cls.par_1d = np.array([[1, 3]])
     cls.par_2d = np.array([[1, 3, 3]])
     cls.kern_rbf_1d = RBFGauss(1, cls.par_1d)
     cls.kern_rbf_2d = RBFGauss(2, cls.par_2d)
     cls.data_1d = np.array([[1, -1, 0]], dtype=float)
     cls.data_2d = np.hstack((np.zeros((2, 1)), np.eye(2), -np.eye(2)))
     cls.test_data_1d = np.atleast_2d(np.linspace(-5, 5, 50))
     cls.test_data_2d = np.random.multivariate_normal(
         np.zeros((2, )), np.eye(2), 50).T
    def exp_x_kxdkx(self, par, x, scaling=False, which_der=None):
        """Expectation E_x[k_ff(x_n, x) k_fd(x, x_m)]"""
        dim, num_pts = x.shape
        which_der = np.arange(num_pts) if which_der is None else which_der
        num_der = len(which_der)

        _, sqrt_inv_lam = RBFGauss._unpack_parameters(par)
        inv_lam = sqrt_inv_lam**2
        lam = np.diag(inv_lam.diagonal()**-1)
        eye_d = np.eye(dim)

        # quantities for covariance weights
        Sig_q = cho_solve(cho_factor(inv_lam + eye_d), eye_d)  # B^-1*I
        eta = Sig_q.dot(x)  # (D,N) Sig_q*x
        inn = inv_lam.dot(x)  # inp / el[:, na]**2
        Q = self.exp_x_kxkx(par, par, x, scaling)  # (N,N)

        cho_LamSig = cho_factor(lam + Sig_q)
        eta_tilde = inv_lam.dot(cho_solve(
            cho_LamSig, eta))  # Lambda^-1(Lambda+Sig_q)^-1*eta
        mu_Q = eta_tilde[
            na] + eta_tilde[:,
                            na, :]  # (D,N_der,N) pairwise sum of pre-multiplied eta's

        E_dfff = np.empty((num_der * dim, num_pts))
        for i in range(num_der):
            for j in range(num_pts):
                istart, iend = i * dim, i * dim + dim
                i_d = which_der[i]
                       j] = Q[i_d, j] * (mu_Q[:, i_d, j] - inn[:, i_d])

        return E_dfff.T  # (num_pts, num_der*dim)
    def exp_x_xdkx(self, par, x, scaling=False, which_der=None):
        """Expectation E_x[x k_fd(x, x_m)]"""
        dim, num_pts = x.shape
        which_der = np.arange(num_pts) if which_der is None else which_der
        num_der = len(which_der)
        _, sqrt_inv_lam = RBFGauss._unpack_parameters(par)

        inv_lam = sqrt_inv_lam**2
        eye_d = np.eye(dim)

        q = self.exp_x_kx(par, x, scaling)
        Sig_q = cho_solve(cho_factor(inv_lam + eye_d), eye_d)  # B^-1*I
        eta = Sig_q.dot(x)  # (D,N) Sig_q*x
        mu_q = inv_lam.dot(eta)  # (D,N)
        r = q[na, which_der] * inv_lam.dot(
            mu_q[:, which_der] - x[:, which_der])  # -t.dot(iLam) * q  # (D, N)

        #  quantities for cross-covariance "weights"
        iLamSig = inv_lam.dot(Sig_q)  # (D,D)
        r_tilde = np.empty((dim, num_der * dim))
        for i in range(num_der):
            i_d = which_der[i]
            r_tilde[:, i * dim:i * dim +
                    dim] = q[i_d] * iLamSig + np.outer(mu_q[:, i_d], r[:, i].T)

        return r_tilde  # (dim, num_pts*dim)
    def eval(self, par, x1, x2=None, diag=False, scaling=True, which_der=None):

        if x2 is None:
            x2 = x1.copy()

        alpha, sqrt_inv_lam = RBFGauss._unpack_parameters(par)
        alpha = 1.0 if not scaling else alpha

        x1 = sqrt_inv_lam.dot(x1)  # sqrt(Lam^-1) * x
        x2 = sqrt_inv_lam.dot(x2)
        if diag:  # only diagonal of kernel matrix
            assert x1.shape == x2.shape
            dx = x1 - x2
            Kff = np.exp(2 * np.log(alpha) - 0.5 * np.sum(dx * dx, axis=0))
            Kff = np.exp(2 * np.log(alpha) - 0.5 * maha(x1.T, x2.T))

        x1, x2 = np.atleast_2d(x1), np.atleast_2d(x2)
        D, N = x1.shape
        Ds, Ns = x2.shape
        assert Ds == D
        which_der = np.arange(N) if which_der is None else which_der
        Nd = len(which_der)  # points w/ derivative observations
        # iLam = np.diag(el ** -1 * np.ones(D))  # sqrt(Lam^-1)
        # iiLam = np.diag(el ** -2 * np.ones(D))  # Lam^-1

        # x1 = iLam.dot(x1)  # sqrt(Lambda^-1) * X
        # x2 = iLam.dot(x2)
        # Kff = np.exp(2 * np.log(alpha) - 0.5 * maha(x2.T, x1.T))  # cov(f(xi), f(xj))
        x1 = sqrt_inv_lam.dot(x1)  # Lambda^-1 * X
        x2 = sqrt_inv_lam.dot(x2)
        inv_lam = sqrt_inv_lam**2
        XmX = x2[..., na] - x1[:, na, :]  # pair-wise differences

        # NOTE: benchmark vs. np.kron(), replace with np.kron() if possible, but which_der complicates the matter
        Kfd = np.zeros((Ns, D * Nd))  # cov(f(xi), df(xj))
        for i in range(Ns):
            for j in range(Nd):
                jstart, jend = j * D, j * D + D
                j_d = which_der[j]
                Kfd[i, jstart:jend] = Kff[i, j_d] * XmX[:, i, j_d]

        Kdd = np.zeros((D * Nd, D * Nd))  # cov(df(xi), df(xj))
        for i in range(Nd):
            for j in range(Nd):
                istart, iend = i * D, i * D + D
                jstart, jend = j * D, j * D + D
                i_d, j_d = which_der[i], which_der[
                    j]  # indices of points with derivatives
                Kdd[istart:iend, jstart:jend] = Kff[i_d, j_d] * (
                    inv_lam - np.outer(XmX[:, i_d, j_d], XmX[:, i_d, j_d]))
        if Ns == N:
            return np.vstack((np.hstack((Kff, Kfd)), np.hstack((Kfd.T, Kdd))))
            return np.hstack((Kff, Kfd))
    def exp_x_dkx(self, par, x, scaling=False, which_der=None):
        """Expectation E_x[k_fd(x, x_n)]"""

        dim, num_pts = x.shape
        alpha, sqrt_inv_lam = RBFGauss._unpack_parameters(par)
        # alpha = 1.0 if not scaling else alpha
        inv_lam = sqrt_inv_lam**2
        lam = np.diag(inv_lam.diagonal()**-1)
        which_der = np.arange(num_pts) if which_der is None else which_der

        q = self.exp_x_kx(par, x, scaling)  # kernel mean E_x[k_ff(x, x_n)]

        eye_d = np.eye(dim)
        Sig_q = cho_solve(cho_factor(inv_lam + eye_d), eye_d)  # B^-1*I
        eta = Sig_q.dot(x)  # (D,N) Sig_q*x
        mu_q = inv_lam.dot(eta)  # (D,N)
        r = q[na, which_der] * inv_lam.dot(
            mu_q[:, which_der] - x[:, which_der])  # -t.dot(iLam) * q  # (D, N)

        return r.T.ravel()  # (1, n_der*D)
    def test_total_nlml_gradient(self):
        # nonlinear vector function from some SSM
        dyn = CoordinatedTurnTransition(GaussRV(5), GaussRV(5))

        # generate inputs
        num_x = 20
        x = 10 + np.random.randn(dyn.dim_in, num_x)

        # evaluate function at inputs
        y = np.apply_along_axis(dyn.dyn_eval, 0, x, None)

        # kernel and it's initial parameters
        from ssmtoybox.bq.bqkern import RBFGauss
        lhyp = np.log([1.0] + 5 * [3.0])
        kernel = RBFGauss(dyn.dim_in, self.ker_par_5d)

        from scipy.optimize import check_grad
        err = check_grad(self._total_nlml, self._total_nlml_grad, lhyp, kernel,
                         y.T, x)
        self.assertTrue(err <= 1e-5, 'Gradient error: {:.4f}'.format(err))