def predict_odim(Lmm, Amm, beta_sp, hyp, X_sp, x): hyps = (hyp[:idims+1], hyp[idims+1]) kernel_func = partial(cov.Sum, hyps, self.covs) k = kernel_func(x, X_sp).flatten() mean = k.dot(beta_sp) kL = solve_lower_triangular(Lmm, k) kA = solve_lower_triangular(Amm, Lmm.T.dot(k)) variance = kernel_func(x, all_pairs=False) variance += -(kL.dot(kL) + kA.dot(kA)) variance = tt.largest(variance, 0.0) + 1e-3 return mean, variance
def nlml(Y, hyp, i, X, EyeN, nigp=None, y_var=None): # initialise the (before compilation) kernel function hyps = (hyp[:idims + 1], hyp[idims + 1]) kernel_func = partial(cov.Sum, hyps, self.covs) # We initialise the kernel matrices (one for each output dimension) K = kernel_func(X) # add the contribution from the input noise if nigp: K += tt.diag(nigp[i]) # add the contribution from the output uncertainty (acts as weight) if y_var: K += tt.diag(y_var[i]) # compute chol(K) L = Cholesky()(K) # compute K^-1 and (K^-1)dot(y) rhs = tt.concatenate([EyeN, Y[:, None]], axis=1) sol = solve_upper_triangular(L.T, solve_lower_triangular(L, rhs)) iK = sol[:, :-1] beta = sol[:, -1] return iK, L, beta
def predict_symbolic(self, mx, Sx): odims = self.E idims = self.D # compute the mean and variance for each output dimension mean = [[]]*odims variance = [[]]*odims for i in range(odims): sr = self.sr[i] M = sr.shape[0].astype(floatX) sf2 = self.hyp[i, idims]**2 sn2 = self.hyp[i, idims+1]**2 # sr.T.dot(x) for all sr and X. size n_inducing x N srdotX = sr.dot(mx) # convert to sin cos phi_x = tt.concatenate([tt.sin(srdotX), tt.cos(srdotX)]) mean[i] = phi_x.T.dot(self.beta_ss[i]) phi_x_L = solve_lower_triangular(self.Lmm[i], phi_x) variance[i] = sn2*(1 + (sf2/M)*phi_x_L.dot(phi_x_L)) + 1e-6 # reshape output variables M = tt.stack(mean).T.flatten() S = tt.diag(tt.stack(variance).T.flatten()) V = tt.zeros((self.D, self.E)) return M, S, V
def nlml(Y, hyp, X, X_sp, EyeM): # TODO allow for different pseudo inputs for each dimension # initialise the (before compilation) kernel function hyps = [hyp[:idims+1], hyp[idims+1]] kernel_func = partial(cov.Sum, hyps, self.covs) sf2 = hyp[idims]**2 sn2 = hyp[idims+1]**2 N = X.shape[0].astype(theano.config.floatX) ridge = 1e-6 Kmm = kernel_func(X_sp) + ridge*EyeM Kmn = kernel_func(X_sp, X) Lmm = cholesky(Kmm) rhs = tt.concatenate([EyeM, Kmn], axis=1) sol = solve_lower_triangular(Lmm, rhs) iKmm = solve_upper_triangular(Lmm.T, sol[:, :EyeM.shape[0]]) Lmn = sol[:, EyeM.shape[0]:] diagQnn = (Lmn**2).sum(0) # Gamma = diag(Knn - Qnn) + sn2*I Gamma = sf2 + sn2 - diagQnn Gamma_inv = 1.0/Gamma # these operations are done to avoid inverting Qnn+Gamma) sqrtGamma_inv = tt.sqrt(Gamma_inv) Lmn_ = Lmn*sqrtGamma_inv # Kmn_*Gamma^-.5 Yi = Y*(sqrtGamma_inv) # Gamma^-.5* Y # I + Lmn * Gamma^-1 * Lnm Bmm = tt.eye(Kmm.shape[0]) + (Lmn_).dot(Lmn_.T) Amm = cholesky(Bmm) LAmm = Lmm.dot(Amm) Kmn_dotYi = Kmn.dot(Yi*(sqrtGamma_inv)) rhs = tt.concatenate([EyeM, Kmn_dotYi[:, None]], axis=1) sol = solve_upper_triangular( LAmm.T, solve_lower_triangular(LAmm, rhs)) iBmm = sol[:, :-1] beta_sp = sol[:, -1] log_det_K_sp = tt.sum(tt.log(Gamma)) log_det_K_sp += 2*tt.sum(tt.log(tt.diag(Amm))) loss_sp = Yi.dot(Yi) - Kmn_dotYi.dot(beta_sp) loss_sp += log_det_K_sp + N*np.log(2*np.pi) loss_sp *= 0.5 return loss_sp, iKmm, Lmm, Amm, iBmm, beta_sp
def nlml(A, phidotY, EyeM): Lmm = Cholesky()(A) rhs = tt.concatenate([EyeM, phidotY[:, None]], axis=1) sol = solve_upper_triangular( Lmm.T, solve_lower_triangular(Lmm, rhs)) iA = sol[:, :-1] beta_ss = sol[:, -1] return iA, Lmm, beta_ss
def marginal_tgp(self): value = tt.vector('marginal_tgp') value.tag.test_value = zeros(1) delta = self.mapping.inv(value) - self.mean(self.space) cov = self.kernel.cov(self.space) cho = cholesky_robust(cov) L = sL.solve_lower_triangular(cho, delta) return value, tt.exp(-np.float32(0.5) * (cov.shape[0].astype(th.config.floatX) * tt.log(np.float32(2.0 * np.pi)) + L.T.dot(L)) - tt.sum(tt.log(nL.extract_diag(cho))) + self.mapping.logdet_dinv(value))
def logprob(x, m, S): delta = x - m L = cholesky(S) beta = solve_lower_triangular(L, delta.T).T lp = -0.5 * tt.square(beta).sum(-1) lp -= tt.sum(tt.log(tt.diagonal(L))) lp -= (0.5 * m.size * tt.log(2 * np.pi)).astype( theano.config.floatX) return lp
def predict_odim(L, beta, hyp, X, mx): hyps = (hyp[:idims + 1], hyp[idims + 1]) kernel_func = partial(cov.Sum, hyps, self.covs) k = kernel_func(mx[None, :], X) mean = k.dot(beta) kc = solve_lower_triangular(L, k.flatten()) variance = kernel_func(mx[None, :], all_pairs=False) - kc.dot(kc) return mean, variance
def th_scaling(self, prior=False, noise=False): if prior: return np.float32(1.0) np2 = np.float32(2.0) alpha = tsl.solve_lower_triangular( cholesky_robust(self.prior_kernel_inputs), self.mapping_outputs - self.prior_location_inputs) beta = alpha.T.dot(alpha) coeff = (self.th_freedom(prior=True) + beta - np2) / (self.th_freedom(prior=False) - np2) return coeff
def logp_cho(cls, value, mu, cho, mapping): """ Calculates the log p of the parameters given the data :param value: the data :param mu: the location (obtained from the hiperparameters) :param cho: the cholesky decomposition of the dispersion matrix :param mapping: the mapping of the warped. :return: it returns the value of the log p of the parameters given the data (values) """ #print(value.tag.test_value) #print(mu.tag.test_value) #print(mapping.inv(value).tag.test_value) #mu = debug(mu, 'mu', force=True) #value = debug(value, 'value', force=False) delta = mapping.inv(value) - mu #delta = debug(delta, 'delta', force=True) #cho = debug(cho, 'cho', force=True) lcho = tsl.solve_lower_triangular(cho, delta) #lcho = debug(lcho, 'lcho', force=False) lcho2 = lcho.T.dot(lcho) #lcho2 = debug(lcho2, 'lcho2', force=True) npi = np.float32(-0.5) * cho.shape[0].astype( th.config.floatX) * tt.log(np.float32(2.0 * np.pi)) dot2 = np.float32(-0.5) * lcho2 #diag = debug(tnl.diag(cho), 'diag', force=True) #_log= debug(tt.log(diag), 'log', force=True) det_k = -tt.sum(tt.log(tnl.diag(cho))) det_m = mapping.logdet_dinv(value) #npi = debug(npi, 'npi', force=False) #dot2 = debug(dot2, 'dot2', force=False) #det_k = debug(det_k, 'det_k', force=False) #det_m = debug(det_m, 'det_m', force=False) r = npi + dot2 + det_k + det_m cond1 = tt.or_(tt.any(tt.isinf_(delta)), tt.any(tt.isnan_(delta))) cond2 = tt.or_(tt.any(tt.isinf_(det_m)), tt.any(tt.isnan_(det_m))) cond3 = tt.or_(tt.any(tt.isinf_(cho)), tt.any(tt.isnan_(cho))) cond4 = tt.or_(tt.any(tt.isinf_(lcho)), tt.any(tt.isnan_(lcho))) return ifelse( cond1, np.float32(-1e30), ifelse( cond2, np.float32(-1e30), ifelse(cond3, np.float32(-1e30), ifelse(cond4, np.float32(-1e30), r))))
def energy_func_hier_binocular(state, y_data, consts): state_partition = [ consts['n_bone_length_input'], consts['n_joint_angle_latent'], consts['n_joint_angle'] * 2, 1, 1, 1 ] (log_bone_lengths, joint_ang_latent, joint_ang_cos_sin, cam_pos_x, cam_pos_y, log_cam_pos_z) = partition(state, state_partition) ang_cos_sin_mean, ang_cos_sin_std = joint_angles_cos_sin_vae_decoder( joint_ang_latent[None, :], consts['joint_angles_vae_decoder_layers'], consts['n_joint_angle']) joint_angles = tt.arctan2(joint_ang_cos_sin[consts['n_joint_angle']:], joint_ang_cos_sin[:consts['n_joint_angle']]) bone_lengths = tt.exp(log_bone_lengths) joint_pos_3d = tt.stack( theano_renderer.joint_positions(consts['skeleton'], joint_angles, consts['fixed_joint_angles'], lengths=bone_lengths, lengths_map=consts['bone_lengths_map'], skip=consts['joints_to_skip']), 1) cam_foc = consts['cam_foc'] cam_pos = tt.concatenate([cam_pos_x, cam_pos_y, tt.exp(log_cam_pos_z)]) cam_ang = consts['cam_ang'] cam_mtx_1 = theano_renderer.camera_matrix( cam_foc, cam_pos + consts['cam_pos_offset'], cam_ang + consts['cam_ang_offset']) cam_mtx_2 = theano_renderer.camera_matrix( cam_foc, cam_pos - consts['cam_pos_offset'], cam_ang - consts['cam_ang_offset']) joint_pos_2d_hom_1 = tt.dot(cam_mtx_1, joint_pos_3d) joint_pos_2d_1 = joint_pos_2d_hom_1[:2] / joint_pos_2d_hom_1[2] joint_pos_2d_hom_2 = tt.dot(cam_mtx_2, joint_pos_3d) joint_pos_2d_2 = joint_pos_2d_hom_2[:2] / joint_pos_2d_hom_2[2] y_model = tt.concatenate( [joint_pos_2d_1.flatten(), joint_pos_2d_2.flatten()], 0) log_lengths_minus_mean = log_bone_lengths - consts['log_lengths_mean'] return 0.5 * ( (y_data - y_model).dot(y_data - y_model) / consts['output_noise_std']**2 + (((joint_ang_cos_sin - ang_cos_sin_mean) / ang_cos_sin_std)**2).sum() + joint_ang_latent.dot(joint_ang_latent) + log_lengths_minus_mean.dot( sla.solve_upper_triangular( consts['log_lengths_covar_chol'], sla.solve_lower_triangular(consts['log_lengths_covar_chol'].T, log_lengths_minus_mean))) + ((cam_pos_x - consts['cam_pos_x_mean']) / consts['cam_pos_x_std'])**2 + ((cam_pos_y - consts['cam_pos_y_mean']) / consts['cam_pos_y_std'])**2 + ((log_cam_pos_z - consts['log_cam_pos_z_mean']) / consts['log_cam_pos_z_std'])**2)[0]
def linear_mmd2_and_hotelling(X, Y, biased=True, reg=0): if not biased: raise ValueError("linear_mmd2_and_hotelling only works for biased est") n = X.shape[0] p = X.shape[1] Z = X - Y Z_bar = Z.mean(axis=0) mmd2 = Z_bar.dot(Z_bar) Z_cent = Z - Z_bar S = Z_cent.T.dot(Z_cent) / (n - 1) # z' inv(S) z = z' inv(L L') z = z' inv(L)' inv(L) z = ||inv(L) z||^2 L = slinalg.cholesky(S + reg * T.eye(p)) Linv_Z_bar = slinalg.solve_lower_triangular(L, Z_bar) lambda_ = n * Linv_Z_bar.dot(Linv_Z_bar) # happens on the CPU! return mmd2, lambda_
def logp_cho(cls, value, mu, cho, freedom, mapping): delta = mapping.inv(value) - mu lcho = tsl.solve_lower_triangular(cho, delta) beta = lcho.T.dot(lcho) n = cho.shape[0].astype(th.config.floatX) np5 = np.float32(0.5) np2 = np.float32(2.0) npi = np.float32(np.pi) r1 = -np5 * (freedom + n) * tt.log1p(beta / (freedom - np2)) r2 = ifelse( tt.le(np.float32(1e6), freedom), -n * np5 * np.log(np2 * npi), tt.gammaln((freedom + n) * np5) - tt.gammaln(freedom * np5) - np5 * n * tt.log((freedom - np2) * npi)) r3 = -tt.sum(tt.log(tnl.diag(cho))) det_m = mapping.logdet_dinv(value) r1 = debug(r1, name='r1', force=True) r2 = debug(r2, name='r2', force=True) r3 = debug(r3, name='r3', force=True) det_m = debug(det_m, name='det_m', force=True) r = r1 + r2 + r3 + det_m cond1 = tt.or_(tt.any(tt.isinf_(delta)), tt.any(tt.isnan_(delta))) cond2 = tt.or_(tt.any(tt.isinf_(det_m)), tt.any(tt.isnan_(det_m))) cond3 = tt.or_(tt.any(tt.isinf_(cho)), tt.any(tt.isnan_(cho))) cond4 = tt.or_(tt.any(tt.isinf_(lcho)), tt.any(tt.isnan_(lcho))) return ifelse( cond1, np.float32(-1e30), ifelse( cond2, np.float32(-1e30), ifelse(cond3, np.float32(-1e30), ifelse(cond4, np.float32(-1e30), r))))
def predict_symbolic(self, mx, Sx, unroll_scan=False): idims = self.D odims = self.E Ms = self.sr.shape[1] sf2M = (self.hyp[:, idims]**2)/tt.cast(Ms, floatX) sn2 = self.hyp[:, idims+1]**2 # TODO this should just fallback to the method from the SSGP class if Sx is None: # first check if we received a vector [D] or a matrix [nxD] if mx.ndim == 1: mx = mx[None, :] srdotx = self.sr.dot(self.X.T).transpose(0,2,1) phi_x = tt.concatenate([tt.sin(srdotx), tt.cos(srdotx)], 2) M = (phi_x*self.beta_ss[:, None, :]).sum(-1) phi_x_L = tt.stack([ solve_lower_triangular(self.Lmm[i], phi_x[i].T) for i in range(odims)]) S = sn2[:, None]*(1 + (sf2M[:, None])*(phi_x_L**2).sum(-2)) + 1e-6 return M, S # precompute some variables srdotx = self.sr.dot(mx) srdotSx = self.sr.dot(Sx) srdotSxdotsr = tt.sum(srdotSx*self.sr, 2) e = tt.exp(-0.5*srdotSxdotsr) cos_srdotx = tt.cos(srdotx) sin_srdotx = tt.sin(srdotx) cos_srdotx_e = cos_srdotx*e sin_srdotx_e = sin_srdotx*e # compute the mean vector mphi = tt.horizontal_stack(sin_srdotx_e, cos_srdotx_e) # E x 2*Ms M = tt.sum(mphi*self.beta_ss, 1) # input output covariance mx_c = mx.dimshuffle(0, 'x') sin_srdotx_e_r = sin_srdotx_e.dimshuffle(0, 'x', 1) cos_srdotx_e_r = cos_srdotx_e.dimshuffle(0, 'x', 1) srdotSx_tr = srdotSx.transpose(0, 2, 1) c = tt.concatenate([mx_c*sin_srdotx_e_r + srdotSx_tr*cos_srdotx_e_r, mx_c*cos_srdotx_e_r - srdotSx_tr*sin_srdotx_e_r], axis=2) # E x D x 2*Ms beta_ss_r = self.beta_ss.dimshuffle(0, 'x', 1) # input output covariance (notice this is not premultiplied by the # input covariance inverse) V = tt.sum(c*beta_ss_r, 2).T - tt.outer(mx, M) srdotSxdotsr_c = srdotSxdotsr.dimshuffle(0, 1, 'x') srdotSxdotsr_r = srdotSxdotsr.dimshuffle(0, 'x', 1) M2 = tt.zeros((odims, odims)) # initialize indices triu_indices = np.triu_indices(odims) indices = [tt.as_index_variable(idx) for idx in triu_indices] def second_moments(i, j, M2, beta, iA, sn2, sf2M, sr, srdotSx, srdotSxdotsr_c, srdotSxdotsr_r, sin_srdotx, cos_srdotx, *args): # compute the second moments of the spectrum feature vectors siSxsj = srdotSx[i].dot(sr[j].T) # Ms x Ms sijSxsij = -0.5*(srdotSxdotsr_c[i] + srdotSxdotsr_r[j]) em = tt.exp(sijSxsij+siSxsj) # MsxMs ep = tt.exp(sijSxsij-siSxsj) # MsxMs si = sin_srdotx[i] # Msx1 ci = cos_srdotx[i] # Msx1 sj = sin_srdotx[j] # Msx1 cj = cos_srdotx[j] # Msx1 sicj = tt.outer(si, cj) # MsxMs cisj = tt.outer(ci, sj) # MsxMs sisj = tt.outer(si, sj) # MsxMs cicj = tt.outer(ci, cj) # MsxMs sm = (sicj-cisj)*em sp = (sicj+cisj)*ep cm = (sisj+cicj)*em cp = (cicj-sisj)*ep # Populate the second moment matrix of the feature vector Q_up = tt.concatenate([cm-cp, sm+sp], axis=1) Q_lo = tt.concatenate([sp-sm, cm+cp], axis=1) Q = tt.concatenate([Q_up, Q_lo], axis=0) # Compute the second moment of the output m2 = 0.5*matrix_dot(beta[i], Q, beta[j].T) m2 = theano.ifelse.ifelse( tt.eq(i, j), m2 + sn2[i]*(1.0 + sf2M[i]*tt.sum(self.iA[i]*Q)) + 1e-6, m2) M2 = tt.set_subtensor(M2[i, j], m2) return M2 nseq = [self.beta_ss, self.iA, sn2, sf2M, self.sr, srdotSx, srdotSxdotsr_c, srdotSxdotsr_r, sin_srdotx, cos_srdotx, self.Lmm] if unroll_scan: from lasagne.utils import unroll_scan [M2_] = unroll_scan(second_moments, indices, [M2], nseq, len(triu_indices[0])) updts = {} else: M2_, updts = theano.scan(fn=second_moments, sequences=indices, outputs_info=[M2], non_sequences=nseq, allow_gc=False, name="%s>M2_scan" % (self.name)) M2 = M2_[-1] M2 = M2 + tt.triu(M2, k=1).T S = M2 - tt.outer(M, M) return M, S, V
# Hyperpriors for mixture components' means/cov matrices mus = [pm.MvNormal('mu_'+str(k), mu=np.zeros(D,dtype=np.float32), cov=10000*np.eye(D), shape=(D,)) for k in range(K)] taus = [] sd_dist = pm.HalfCauchy.dist(beta=10000) for k in range(K): packed_chol = pm.LKJCholeskyCov('packed_chol'+str(k), n=D, eta=1, sd_dist=sd_dist) chol = pm.expand_packed_triangular(n=D, packed=packed_chol) invchol = solve_lower_triangular(chol,np.eye(D)) taus.append(tt.dot(invchol.T,invchol)) # Mixture density pi = pm.Dirichlet('pi',a=np.ones(K),shape=(K,)) B = pm.DensityDist('B', logp_gmix(mus,pi,taus), shape=(n_samples,D)) Y_hat = tt.sum(X[:,:,np.newaxis]*B.reshape((n_samples,D//2,2)),axis=1) # Model error err = pm.HalfCauchy('err',beta=10) # Data likelihood Y_logp = pm.MvNormal('Y_logp', mu=Y_hat, cov=err*np.eye(2), observed=Y) with model:
def inv(self, inputs, outputs, noise=False): if noise: cho = cholesky_robust(self.noisy.cov(inputs)) else: cho = cholesky_robust(self.kernel.cov(inputs)) return tsl.solve_lower_triangular(cho, outputs)
return logp_ with pm.Model() as model: # Hyperpriors for mixture components' means/cov matrices mu = pm.MvNormal('mu', mu=np.zeros(D, dtype=np.float32), cov=10000 * np.eye(D), shape=(D, )) sd_dist = pm.HalfCauchy.dist(beta=10000) packed_chol = pm.LKJCholeskyCov('packed_chol', n=D, eta=1, sd_dist=sd_dist) chol = pm.expand_packed_triangular(n=D, packed=packed_chol) invchol = solve_lower_triangular(chol, np.eye(D)) tau = tt.dot(invchol.T, invchol) # Mixture density B = pm.DensityDist('B', logp_g(mu, tau), shape=(n_samples, D)) Y_hat = tt.sum(X[:, :, np.newaxis] * B.reshape((n_samples, D // 2, 2)), axis=1) # Model error err = pm.HalfCauchy('err', beta=10) # Data likelihood Y_logp = pm.MvNormal('Y_logp', mu=Y_hat, cov=err * np.eye(2), observed=Y) with model: approx = pm.variational.inference.fit(
def create_model(self): with pm.Model() as self.model: # Again, f_sample is just a dummy variable self.mean = pm.gp.mean.Zero() # covariance function l_L = pm.Gamma("l_L", alpha=2, beta=2, shape = self.dim) # informative, positive normal prior on the period eta_L = pm.HalfNormal("eta_L", sd=5) self.cov_L = eta_L * pm.gp.cov.ExpQuad(self.dim, l_L) # covariance function l_H = pm.Gamma("l_H", alpha=2, beta=2, shape = self.dim) delta = pm.Normal("delta", sd=10) # informative, positive normal prior on the period eta_H = pm.HalfNormal("eta_H", sd=5) self.cov_H = eta_H * pm.gp.cov.ExpQuad(self.dim, l_H) ############################################################### #compute Kuu K_LLu = self.cov_L(self.X_Lu) K_HHu = delta**2*self.cov_L(self.X_Hu) + self.cov_H(self.X_Hu) K_LHu = delta*self.cov_L(self.X_Lu, self.X_Hu) K1 = tt.concatenate([K_LLu, K_LHu], axis = 1) K2 = tt.concatenate([K_LHu.T, K_HHu], axis = 1) self.Kuu= pm.gp.util.stabilize(tt.concatenate([K1,K2], axis = 0)) ############################################################## #compute Kuf K_LLuf = self.cov_L(self.X_Lu, self.X_L) # uL x L K_HHuf = delta**2*self.cov_L(self.X_Hu, self.X_H) + self.cov_H(self.X_Hu, self.X_H) # uH x H K_LHuf = delta*self.cov_L(self.X_Lu, self.X_H) # uL x H K_HLuf = delta*self.cov_L(self.X_Hu, self.X_L) # uH x L K1 = tt.concatenate([K_LLuf, K_LHuf], axis = 1) K2 = tt.concatenate([K_HLuf, K_HHuf], axis = 1) self.Kuf= pm.gp.util.stabilize(tt.concatenate([K1,K2], axis = 0)) ############################################################## self.Luu = tt.slinalg.cholesky(self.Kuu) vu = pm.Normal("u_rotated_", mu=0.0, sd=1.0, shape=pm.gp.util.infer_shape(self.Xu)) u = pm.Deterministic("u", self.Luu.dot(vu)) Luuinv_u = solve_lower_triangular(self.Luu,u) A = solve_lower_triangular(self.Luu, self.Kuf) self.Qffd = tt.sum(A * A, 0) K_LLff = self.cov_L(self.X_L, diag = True) K_HHff = delta**2*self.cov_L(self.X_Hu, diag = True) + self.cov_H(self.X_Hu, diag = True) Kffd = tt.concatenate([K_LLff, K_HHff]) self.Lamd = tt.clip(Kffd - self.Qffd , 0.0, np.inf) + self.sigma**2 v = pm.Normal("fp_rotated_", mu=0.0, sd=1.0, shape=pm.gp.util.infer_shape(self.X)) fp = pm.Deterministic("fp", tt.dot(tt.transpose(A), Luuinv_u) + tt.sqrt(self.Lamd)*v) p = pm.Deterministic("p", pm.math.invlogit(fp)) y = pm.Bernoulli("y", p=p, observed=self.Y)