Exemplo n.º 1
0
        def predict_odim(Lmm, Amm, beta_sp, hyp, X_sp, x):
            hyps = (hyp[:idims+1], hyp[idims+1])
            kernel_func = partial(cov.Sum, hyps, self.covs)

            k = kernel_func(x, X_sp).flatten()
            mean = k.dot(beta_sp)
            kL = solve_lower_triangular(Lmm, k)
            kA = solve_lower_triangular(Amm, Lmm.T.dot(k))
            variance = kernel_func(x, all_pairs=False)
            variance += -(kL.dot(kL) + kA.dot(kA))
            variance = tt.largest(variance, 0.0) + 1e-3

            return mean, variance
Exemplo n.º 2
0
        def nlml(Y, hyp, i, X, EyeN, nigp=None, y_var=None):
            # initialise the (before compilation) kernel function
            hyps = (hyp[:idims + 1], hyp[idims + 1])
            kernel_func = partial(cov.Sum, hyps, self.covs)

            # We initialise the kernel matrices (one for each output dimension)
            K = kernel_func(X)

            # add the contribution from the input noise
            if nigp:
                K += tt.diag(nigp[i])
            # add the contribution from the output uncertainty (acts as weight)
            if y_var:
                K += tt.diag(y_var[i])

            # compute chol(K)
            L = Cholesky()(K)

            # compute K^-1 and (K^-1)dot(y)
            rhs = tt.concatenate([EyeN, Y[:, None]], axis=1)
            sol = solve_upper_triangular(L.T, solve_lower_triangular(L, rhs))
            iK = sol[:, :-1]
            beta = sol[:, -1]

            return iK, L, beta
Exemplo n.º 3
0
    def predict_symbolic(self, mx, Sx):
        odims = self.E
        idims = self.D

        # compute the mean and variance for each output dimension
        mean = [[]]*odims
        variance = [[]]*odims
        for i in range(odims):
            sr = self.sr[i]
            M = sr.shape[0].astype(floatX)
            sf2 = self.hyp[i, idims]**2
            sn2 = self.hyp[i, idims+1]**2
            # sr.T.dot(x) for all sr and X. size n_inducing x N
            srdotX = sr.dot(mx)
            # convert to sin cos
            phi_x = tt.concatenate([tt.sin(srdotX), tt.cos(srdotX)])

            mean[i] = phi_x.T.dot(self.beta_ss[i])
            phi_x_L = solve_lower_triangular(self.Lmm[i], phi_x)
            variance[i] = sn2*(1 + (sf2/M)*phi_x_L.dot(phi_x_L)) + 1e-6

        # reshape output variables
        M = tt.stack(mean).T.flatten()
        S = tt.diag(tt.stack(variance).T.flatten())
        V = tt.zeros((self.D, self.E))

        return M, S, V
Exemplo n.º 4
0
            def nlml(Y, hyp, X, X_sp, EyeM):
                # TODO allow for different pseudo inputs for each dimension
                # initialise the (before compilation) kernel function
                hyps = [hyp[:idims+1], hyp[idims+1]]
                kernel_func = partial(cov.Sum, hyps, self.covs)

                sf2 = hyp[idims]**2
                sn2 = hyp[idims+1]**2
                N = X.shape[0].astype(theano.config.floatX)

                ridge = 1e-6
                Kmm = kernel_func(X_sp) + ridge*EyeM
                Kmn = kernel_func(X_sp, X)
                Lmm = cholesky(Kmm)
                rhs = tt.concatenate([EyeM, Kmn], axis=1)
                sol = solve_lower_triangular(Lmm, rhs)
                iKmm = solve_upper_triangular(Lmm.T, sol[:, :EyeM.shape[0]])
                Lmn = sol[:, EyeM.shape[0]:]
                diagQnn = (Lmn**2).sum(0)

                # Gamma = diag(Knn - Qnn) + sn2*I
                Gamma = sf2 + sn2 - diagQnn
                Gamma_inv = 1.0/Gamma

                # these operations are done to avoid inverting Qnn+Gamma)
                sqrtGamma_inv = tt.sqrt(Gamma_inv)
                Lmn_ = Lmn*sqrtGamma_inv                      # Kmn_*Gamma^-.5
                Yi = Y*(sqrtGamma_inv)                        # Gamma^-.5* Y
                # I + Lmn * Gamma^-1 * Lnm
                Bmm = tt.eye(Kmm.shape[0]) + (Lmn_).dot(Lmn_.T)
                Amm = cholesky(Bmm)
                LAmm = Lmm.dot(Amm)
                Kmn_dotYi = Kmn.dot(Yi*(sqrtGamma_inv))
                rhs = tt.concatenate([EyeM, Kmn_dotYi[:, None]], axis=1)
                sol = solve_upper_triangular(
                    LAmm.T, solve_lower_triangular(LAmm, rhs))
                iBmm = sol[:, :-1]
                beta_sp = sol[:, -1]

                log_det_K_sp = tt.sum(tt.log(Gamma))
                log_det_K_sp += 2*tt.sum(tt.log(tt.diag(Amm)))

                loss_sp = Yi.dot(Yi) - Kmn_dotYi.dot(beta_sp)
                loss_sp += log_det_K_sp + N*np.log(2*np.pi)
                loss_sp *= 0.5

                return loss_sp, iKmm, Lmm, Amm, iBmm, beta_sp
Exemplo n.º 5
0
        def nlml(A, phidotY, EyeM):
            Lmm = Cholesky()(A)
            rhs = tt.concatenate([EyeM, phidotY[:, None]], axis=1)
            sol = solve_upper_triangular(
                Lmm.T, solve_lower_triangular(Lmm, rhs))
            iA = sol[:, :-1]
            beta_ss = sol[:, -1]

            return iA, Lmm, beta_ss
Exemplo n.º 6
0
 def marginal_tgp(self):
     value = tt.vector('marginal_tgp')
     value.tag.test_value = zeros(1)
     delta = self.mapping.inv(value) - self.mean(self.space)
     cov = self.kernel.cov(self.space)
     cho = cholesky_robust(cov)
     L = sL.solve_lower_triangular(cho, delta)
     return value, tt.exp(-np.float32(0.5) * (cov.shape[0].astype(th.config.floatX) * tt.log(np.float32(2.0 * np.pi))
                                              + L.T.dot(L)) - tt.sum(tt.log(nL.extract_diag(cho))) + self.mapping.logdet_dinv(value))
Exemplo n.º 7
0
 def logprob(x, m, S):
     delta = x - m
     L = cholesky(S)
     beta = solve_lower_triangular(L, delta.T).T
     lp = -0.5 * tt.square(beta).sum(-1)
     lp -= tt.sum(tt.log(tt.diagonal(L)))
     lp -= (0.5 * m.size * tt.log(2 * np.pi)).astype(
         theano.config.floatX)
     return lp
Exemplo n.º 8
0
        def predict_odim(L, beta, hyp, X, mx):
            hyps = (hyp[:idims + 1], hyp[idims + 1])
            kernel_func = partial(cov.Sum, hyps, self.covs)

            k = kernel_func(mx[None, :], X)
            mean = k.dot(beta)
            kc = solve_lower_triangular(L, k.flatten())
            variance = kernel_func(mx[None, :], all_pairs=False) - kc.dot(kc)

            return mean, variance
Exemplo n.º 9
0
 def th_scaling(self, prior=False, noise=False):
     if prior:
         return np.float32(1.0)
     np2 = np.float32(2.0)
     alpha = tsl.solve_lower_triangular(
         cholesky_robust(self.prior_kernel_inputs),
         self.mapping_outputs - self.prior_location_inputs)
     beta = alpha.T.dot(alpha)
     coeff = (self.th_freedom(prior=True) + beta -
              np2) / (self.th_freedom(prior=False) - np2)
     return coeff
Exemplo n.º 10
0
    def logp_cho(cls, value, mu, cho, mapping):
        """
        Calculates the log p of the parameters given the data
        :param value: the data
        :param mu: the location (obtained from the hiperparameters)
        :param cho: the cholesky decomposition of the dispersion matrix
        :param mapping: the mapping of the warped.
        :return: it returns the value of the log p of the parameters given the data (values)
        """
        #print(value.tag.test_value)
        #print(mu.tag.test_value)
        #print(mapping.inv(value).tag.test_value)
        #mu = debug(mu, 'mu', force=True)

        #value = debug(value, 'value', force=False)
        delta = mapping.inv(value) - mu

        #delta = debug(delta, 'delta', force=True)
        #cho = debug(cho, 'cho', force=True)
        lcho = tsl.solve_lower_triangular(cho, delta)
        #lcho = debug(lcho, 'lcho', force=False)

        lcho2 = lcho.T.dot(lcho)
        #lcho2 = debug(lcho2, 'lcho2', force=True)

        npi = np.float32(-0.5) * cho.shape[0].astype(
            th.config.floatX) * tt.log(np.float32(2.0 * np.pi))
        dot2 = np.float32(-0.5) * lcho2

        #diag = debug(tnl.diag(cho), 'diag', force=True)
        #_log= debug(tt.log(diag), 'log', force=True)

        det_k = -tt.sum(tt.log(tnl.diag(cho)))
        det_m = mapping.logdet_dinv(value)

        #npi = debug(npi, 'npi', force=False)
        #dot2 = debug(dot2, 'dot2', force=False)
        #det_k = debug(det_k, 'det_k', force=False)
        #det_m = debug(det_m, 'det_m', force=False)

        r = npi + dot2 + det_k + det_m

        cond1 = tt.or_(tt.any(tt.isinf_(delta)), tt.any(tt.isnan_(delta)))
        cond2 = tt.or_(tt.any(tt.isinf_(det_m)), tt.any(tt.isnan_(det_m)))
        cond3 = tt.or_(tt.any(tt.isinf_(cho)), tt.any(tt.isnan_(cho)))
        cond4 = tt.or_(tt.any(tt.isinf_(lcho)), tt.any(tt.isnan_(lcho)))
        return ifelse(
            cond1, np.float32(-1e30),
            ifelse(
                cond2, np.float32(-1e30),
                ifelse(cond3, np.float32(-1e30),
                       ifelse(cond4, np.float32(-1e30), r))))
Exemplo n.º 11
0
def energy_func_hier_binocular(state, y_data, consts):
    state_partition = [
        consts['n_bone_length_input'], consts['n_joint_angle_latent'],
        consts['n_joint_angle'] * 2, 1, 1, 1
    ]
    (log_bone_lengths, joint_ang_latent, joint_ang_cos_sin, cam_pos_x,
     cam_pos_y, log_cam_pos_z) = partition(state, state_partition)
    ang_cos_sin_mean, ang_cos_sin_std = joint_angles_cos_sin_vae_decoder(
        joint_ang_latent[None, :], consts['joint_angles_vae_decoder_layers'],
        consts['n_joint_angle'])
    joint_angles = tt.arctan2(joint_ang_cos_sin[consts['n_joint_angle']:],
                              joint_ang_cos_sin[:consts['n_joint_angle']])
    bone_lengths = tt.exp(log_bone_lengths)
    joint_pos_3d = tt.stack(
        theano_renderer.joint_positions(consts['skeleton'],
                                        joint_angles,
                                        consts['fixed_joint_angles'],
                                        lengths=bone_lengths,
                                        lengths_map=consts['bone_lengths_map'],
                                        skip=consts['joints_to_skip']), 1)
    cam_foc = consts['cam_foc']
    cam_pos = tt.concatenate([cam_pos_x, cam_pos_y, tt.exp(log_cam_pos_z)])
    cam_ang = consts['cam_ang']
    cam_mtx_1 = theano_renderer.camera_matrix(
        cam_foc, cam_pos + consts['cam_pos_offset'],
        cam_ang + consts['cam_ang_offset'])
    cam_mtx_2 = theano_renderer.camera_matrix(
        cam_foc, cam_pos - consts['cam_pos_offset'],
        cam_ang - consts['cam_ang_offset'])
    joint_pos_2d_hom_1 = tt.dot(cam_mtx_1, joint_pos_3d)
    joint_pos_2d_1 = joint_pos_2d_hom_1[:2] / joint_pos_2d_hom_1[2]
    joint_pos_2d_hom_2 = tt.dot(cam_mtx_2, joint_pos_3d)
    joint_pos_2d_2 = joint_pos_2d_hom_2[:2] / joint_pos_2d_hom_2[2]
    y_model = tt.concatenate(
        [joint_pos_2d_1.flatten(),
         joint_pos_2d_2.flatten()], 0)
    log_lengths_minus_mean = log_bone_lengths - consts['log_lengths_mean']
    return 0.5 * (
        (y_data - y_model).dot(y_data - y_model) /
        consts['output_noise_std']**2 +
        (((joint_ang_cos_sin - ang_cos_sin_mean) / ang_cos_sin_std)**2).sum() +
        joint_ang_latent.dot(joint_ang_latent) + log_lengths_minus_mean.dot(
            sla.solve_upper_triangular(
                consts['log_lengths_covar_chol'],
                sla.solve_lower_triangular(consts['log_lengths_covar_chol'].T,
                                           log_lengths_minus_mean))) +
        ((cam_pos_x - consts['cam_pos_x_mean']) / consts['cam_pos_x_std'])**2 +
        ((cam_pos_y - consts['cam_pos_y_mean']) / consts['cam_pos_y_std'])**2 +
        ((log_cam_pos_z - consts['log_cam_pos_z_mean']) /
         consts['log_cam_pos_z_std'])**2)[0]
Exemplo n.º 12
0
Arquivo: mmd.py Projeto: yux94/opt-mmd
def linear_mmd2_and_hotelling(X, Y, biased=True, reg=0):
    if not biased:
        raise ValueError("linear_mmd2_and_hotelling only works for biased est")

    n = X.shape[0]
    p = X.shape[1]
    Z = X - Y
    Z_bar = Z.mean(axis=0)
    mmd2 = Z_bar.dot(Z_bar)

    Z_cent = Z - Z_bar
    S = Z_cent.T.dot(Z_cent) / (n - 1)
    # z' inv(S) z = z' inv(L L') z = z' inv(L)' inv(L) z = ||inv(L) z||^2
    L = slinalg.cholesky(S + reg * T.eye(p))
    Linv_Z_bar = slinalg.solve_lower_triangular(L, Z_bar)
    lambda_ = n * Linv_Z_bar.dot(Linv_Z_bar)
    # happens on the CPU!
    return mmd2, lambda_
Exemplo n.º 13
0
    def logp_cho(cls, value, mu, cho, freedom, mapping):
        delta = mapping.inv(value) - mu

        lcho = tsl.solve_lower_triangular(cho, delta)
        beta = lcho.T.dot(lcho)
        n = cho.shape[0].astype(th.config.floatX)

        np5 = np.float32(0.5)
        np2 = np.float32(2.0)
        npi = np.float32(np.pi)

        r1 = -np5 * (freedom + n) * tt.log1p(beta / (freedom - np2))
        r2 = ifelse(
            tt.le(np.float32(1e6), freedom), -n * np5 * np.log(np2 * npi),
            tt.gammaln((freedom + n) * np5) - tt.gammaln(freedom * np5) -
            np5 * n * tt.log((freedom - np2) * npi))
        r3 = -tt.sum(tt.log(tnl.diag(cho)))
        det_m = mapping.logdet_dinv(value)

        r1 = debug(r1, name='r1', force=True)
        r2 = debug(r2, name='r2', force=True)
        r3 = debug(r3, name='r3', force=True)
        det_m = debug(det_m, name='det_m', force=True)

        r = r1 + r2 + r3 + det_m

        cond1 = tt.or_(tt.any(tt.isinf_(delta)), tt.any(tt.isnan_(delta)))
        cond2 = tt.or_(tt.any(tt.isinf_(det_m)), tt.any(tt.isnan_(det_m)))
        cond3 = tt.or_(tt.any(tt.isinf_(cho)), tt.any(tt.isnan_(cho)))
        cond4 = tt.or_(tt.any(tt.isinf_(lcho)), tt.any(tt.isnan_(lcho)))
        return ifelse(
            cond1, np.float32(-1e30),
            ifelse(
                cond2, np.float32(-1e30),
                ifelse(cond3, np.float32(-1e30),
                       ifelse(cond4, np.float32(-1e30), r))))
Exemplo n.º 14
0
    def predict_symbolic(self, mx, Sx, unroll_scan=False):
        idims = self.D
        odims = self.E

        Ms = self.sr.shape[1]
        sf2M = (self.hyp[:, idims]**2)/tt.cast(Ms, floatX)
        sn2 = self.hyp[:, idims+1]**2

        # TODO this should just fallback to the method from the SSGP class
        if Sx is None:
            # first check if we received a vector [D] or a matrix [nxD]
            if mx.ndim == 1:
                mx = mx[None, :]

            srdotx = self.sr.dot(self.X.T).transpose(0,2,1)
            phi_x = tt.concatenate([tt.sin(srdotx), tt.cos(srdotx)], 2)
            M = (phi_x*self.beta_ss[:, None, :]).sum(-1)
            phi_x_L = tt.stack([
                solve_lower_triangular(self.Lmm[i], phi_x[i].T)
                for i in range(odims)])
            S = sn2[:, None]*(1 + (sf2M[:, None])*(phi_x_L**2).sum(-2)) + 1e-6

            return M, S

        # precompute some variables
        srdotx = self.sr.dot(mx)
        srdotSx = self.sr.dot(Sx)
        srdotSxdotsr = tt.sum(srdotSx*self.sr, 2)
        e = tt.exp(-0.5*srdotSxdotsr)
        cos_srdotx = tt.cos(srdotx)
        sin_srdotx = tt.sin(srdotx)
        cos_srdotx_e = cos_srdotx*e
        sin_srdotx_e = sin_srdotx*e

        # compute the mean vector
        mphi = tt.horizontal_stack(sin_srdotx_e, cos_srdotx_e)  # E x 2*Ms
        M = tt.sum(mphi*self.beta_ss, 1)

        # input output covariance
        mx_c = mx.dimshuffle(0, 'x')
        sin_srdotx_e_r = sin_srdotx_e.dimshuffle(0, 'x', 1)
        cos_srdotx_e_r = cos_srdotx_e.dimshuffle(0, 'x', 1)
        srdotSx_tr = srdotSx.transpose(0, 2, 1)
        c = tt.concatenate([mx_c*sin_srdotx_e_r + srdotSx_tr*cos_srdotx_e_r,
                            mx_c*cos_srdotx_e_r - srdotSx_tr*sin_srdotx_e_r],
                           axis=2)  # E x D x 2*Ms
        beta_ss_r = self.beta_ss.dimshuffle(0, 'x', 1)

        # input output covariance (notice this is not premultiplied by the
        # input covariance inverse)
        V = tt.sum(c*beta_ss_r, 2).T - tt.outer(mx, M)

        srdotSxdotsr_c = srdotSxdotsr.dimshuffle(0, 1, 'x')
        srdotSxdotsr_r = srdotSxdotsr.dimshuffle(0, 'x', 1)
        M2 = tt.zeros((odims, odims))

        # initialize indices
        triu_indices = np.triu_indices(odims)
        indices = [tt.as_index_variable(idx) for idx in triu_indices]

        def second_moments(i, j, M2, beta, iA, sn2, sf2M, sr, srdotSx,
                           srdotSxdotsr_c, srdotSxdotsr_r,
                           sin_srdotx, cos_srdotx, *args):
            # compute the second moments of the spectrum feature vectors
            siSxsj = srdotSx[i].dot(sr[j].T)  # Ms x Ms
            sijSxsij = -0.5*(srdotSxdotsr_c[i] + srdotSxdotsr_r[j])
            em = tt.exp(sijSxsij+siSxsj)      # MsxMs
            ep = tt.exp(sijSxsij-siSxsj)     # MsxMs
            si = sin_srdotx[i]       # Msx1
            ci = cos_srdotx[i]       # Msx1
            sj = sin_srdotx[j]       # Msx1
            cj = cos_srdotx[j]       # Msx1
            sicj = tt.outer(si, cj)  # MsxMs
            cisj = tt.outer(ci, sj)  # MsxMs
            sisj = tt.outer(si, sj)  # MsxMs
            cicj = tt.outer(ci, cj)  # MsxMs
            sm = (sicj-cisj)*em
            sp = (sicj+cisj)*ep
            cm = (sisj+cicj)*em
            cp = (cicj-sisj)*ep

            # Populate the second moment matrix of the feature vector
            Q_up = tt.concatenate([cm-cp, sm+sp], axis=1)
            Q_lo = tt.concatenate([sp-sm, cm+cp], axis=1)
            Q = tt.concatenate([Q_up, Q_lo], axis=0)

            # Compute the second moment of the output
            m2 = 0.5*matrix_dot(beta[i], Q, beta[j].T)

            m2 = theano.ifelse.ifelse(
                tt.eq(i, j),
                m2 + sn2[i]*(1.0 + sf2M[i]*tt.sum(self.iA[i]*Q)) + 1e-6,
                m2)
            M2 = tt.set_subtensor(M2[i, j], m2)
            return M2

        nseq = [self.beta_ss, self.iA, sn2, sf2M, self.sr, srdotSx,
                srdotSxdotsr_c, srdotSxdotsr_r, sin_srdotx, cos_srdotx,
                self.Lmm]

        if unroll_scan:
            from lasagne.utils import unroll_scan
            [M2_] = unroll_scan(second_moments, indices,
                                [M2], nseq, len(triu_indices[0]))
            updts = {}
        else:
            M2_, updts = theano.scan(fn=second_moments,
                                     sequences=indices,
                                     outputs_info=[M2],
                                     non_sequences=nseq,
                                     allow_gc=False,
                                     name="%s>M2_scan" % (self.name))

        M2 = M2_[-1]
        M2 = M2 + tt.triu(M2, k=1).T
        S = M2 - tt.outer(M, M)

        return M, S, V
Exemplo n.º 15
0
    # Hyperpriors for mixture components' means/cov matrices
    mus = [pm.MvNormal('mu_'+str(k),
                        mu=np.zeros(D,dtype=np.float32),
                        cov=10000*np.eye(D),
                        shape=(D,))
            for k in range(K)]

    taus = []
    sd_dist = pm.HalfCauchy.dist(beta=10000)
    for k in range(K):
        packed_chol = pm.LKJCholeskyCov('packed_chol'+str(k),
                        n=D,
                        eta=1,
                        sd_dist=sd_dist)
        chol = pm.expand_packed_triangular(n=D, packed=packed_chol)
        invchol = solve_lower_triangular(chol,np.eye(D))
        taus.append(tt.dot(invchol.T,invchol))


    # Mixture density
    pi = pm.Dirichlet('pi',a=np.ones(K),shape=(K,))
    B = pm.DensityDist('B', logp_gmix(mus,pi,taus), shape=(n_samples,D))

    Y_hat = tt.sum(X[:,:,np.newaxis]*B.reshape((n_samples,D//2,2)),axis=1)

    # Model error
    err = pm.HalfCauchy('err',beta=10)
    # Data likelihood
    Y_logp = pm.MvNormal('Y_logp', mu=Y_hat, cov=err*np.eye(2), observed=Y)

with model:
Exemplo n.º 16
0
 def inv(self, inputs, outputs, noise=False):
     if noise:
         cho = cholesky_robust(self.noisy.cov(inputs))
     else:
         cho = cholesky_robust(self.kernel.cov(inputs))
     return tsl.solve_lower_triangular(cho, outputs)
Exemplo n.º 17
0
    return logp_


with pm.Model() as model:
    # Hyperpriors for mixture components' means/cov matrices
    mu = pm.MvNormal('mu',
                     mu=np.zeros(D, dtype=np.float32),
                     cov=10000 * np.eye(D),
                     shape=(D, ))

    sd_dist = pm.HalfCauchy.dist(beta=10000)

    packed_chol = pm.LKJCholeskyCov('packed_chol', n=D, eta=1, sd_dist=sd_dist)
    chol = pm.expand_packed_triangular(n=D, packed=packed_chol)
    invchol = solve_lower_triangular(chol, np.eye(D))
    tau = tt.dot(invchol.T, invchol)

    # Mixture density
    B = pm.DensityDist('B', logp_g(mu, tau), shape=(n_samples, D))

    Y_hat = tt.sum(X[:, :, np.newaxis] * B.reshape((n_samples, D // 2, 2)),
                   axis=1)

    # Model error
    err = pm.HalfCauchy('err', beta=10)
    # Data likelihood
    Y_logp = pm.MvNormal('Y_logp', mu=Y_hat, cov=err * np.eye(2), observed=Y)

with model:
    approx = pm.variational.inference.fit(
Exemplo n.º 18
0
    def create_model(self):
        with pm.Model() as self.model:
                # Again, f_sample is just a dummy variable
            self.mean = pm.gp.mean.Zero()

            # covariance function
            l_L = pm.Gamma("l_L", alpha=2, beta=2, shape = self.dim)
            # informative, positive normal prior on the period
            eta_L = pm.HalfNormal("eta_L", sd=5)
            self.cov_L = eta_L * pm.gp.cov.ExpQuad(self.dim, l_L)

                # covariance function
            l_H = pm.Gamma("l_H", alpha=2, beta=2, shape = self.dim)
            delta = pm.Normal("delta", sd=10)
            # informative, positive normal prior on the period
            eta_H = pm.HalfNormal("eta_H", sd=5)
            self.cov_H = eta_H * pm.gp.cov.ExpQuad(self.dim, l_H)

            ###############################################################
            #compute Kuu
            K_LLu = self.cov_L(self.X_Lu)
            K_HHu = delta**2*self.cov_L(self.X_Hu) + self.cov_H(self.X_Hu)
            K_LHu = delta*self.cov_L(self.X_Lu, self.X_Hu)

            K1 = tt.concatenate([K_LLu, K_LHu], axis = 1)
            K2 = tt.concatenate([K_LHu.T, K_HHu], axis = 1)
            self.Kuu= pm.gp.util.stabilize(tt.concatenate([K1,K2], axis = 0))


            ##############################################################
            #compute Kuf
            K_LLuf = self.cov_L(self.X_Lu, self.X_L) # uL x L
            K_HHuf = delta**2*self.cov_L(self.X_Hu, self.X_H) + self.cov_H(self.X_Hu, self.X_H) # uH x H
            K_LHuf = delta*self.cov_L(self.X_Lu, self.X_H) # uL x H
            K_HLuf = delta*self.cov_L(self.X_Hu, self.X_L) # uH x L

            K1 = tt.concatenate([K_LLuf, K_LHuf], axis = 1)
            K2 = tt.concatenate([K_HLuf, K_HHuf], axis = 1)
            self.Kuf= pm.gp.util.stabilize(tt.concatenate([K1,K2], axis = 0))

            ##############################################################

            self.Luu = tt.slinalg.cholesky(self.Kuu)


            vu = pm.Normal("u_rotated_", mu=0.0, sd=1.0, shape=pm.gp.util.infer_shape(self.Xu))
            u = pm.Deterministic("u", self.Luu.dot(vu))

            Luuinv_u = solve_lower_triangular(self.Luu,u)
            A = solve_lower_triangular(self.Luu, self.Kuf)

            self.Qffd = tt.sum(A * A, 0)

            K_LLff = self.cov_L(self.X_L, diag = True)
            K_HHff = delta**2*self.cov_L(self.X_Hu, diag = True) + self.cov_H(self.X_Hu, diag = True)

            Kffd = tt.concatenate([K_LLff, K_HHff])
            self.Lamd = tt.clip(Kffd - self.Qffd , 0.0, np.inf) + self.sigma**2

            v = pm.Normal("fp_rotated_", mu=0.0, sd=1.0, shape=pm.gp.util.infer_shape(self.X))
            fp = pm.Deterministic("fp", tt.dot(tt.transpose(A), Luuinv_u) + tt.sqrt(self.Lamd)*v)

            p = pm.Deterministic("p", pm.math.invlogit(fp))
            y = pm.Bernoulli("y", p=p, observed=self.Y)