def construct_posterior(N, conjugate_parameters, prior_points, partition, tables): posterior = Posterior(partition, prior_points) alphas, betas = conjugate_parameters alphas = numpy.array([alphas]) betas = numpy.array([betas]) beta_table, alpha_table = tables t = numpy.dot(numpy.array([1.]*(N-1)), (alphas.transpose() * alpha_table + betas.transpose() * beta_table)) t = lognormalize(t) posterior.update(t) return posterior
def multinomial(cls, inputs, outputs, buckets=None, frequencies=None, zero=0.0): ''' Create a Naive Bayes Classifier with a Multinomial A Priori distribution ''' print "Training Naive Bayes Classifier..." start = time.time() # Determine Priori and Posterior lambda functions: posterior = Posterior.immutable(inputs, outputs, zero=zero) priori = Priori.multinomial(outputs, frequencies=frequencies, zero=zero) print "Finished Training Naive Bayes Classifier (%.2fs)" % ( time.time() - start) # Determine the classifier's classes: buckets = set(outputs) if buckets is None else buckets # Create & Return classifier return cls(priori, posterior, buckets)
def inference(self, kern, X, likelihood, Y, Y_metadata=None): """ Returns a Posterior class containing essential quantities of the posterior """ YYT_factor = self.get_YYTfactor(Y) K = kern.K(X) Ky = K.copy() diag.add(Ky, likelihood.gaussian_variance(Y_metadata)) Wi, LW, LWi, W_logdet = pdinv(Ky) alpha, _ = dpotrs(LW, YYT_factor, lower=1) log_marginal = 0.5 * (-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor)) dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi) dL_dthetaL = likelihood.exact_inference_gradients( np.diag(dL_dK), Y_metadata) return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, { 'dL_dK': dL_dK, 'dL_dthetaL': dL_dthetaL }
def nllcost(gp, Xt, Yt, log_sf2, log_rsn, log_W, Ht): sf2 = numpy.exp(log_sf2) noise = numpy.exp(log_sf2 + log_rsn) W = numpy.exp(log_W) if not numpy.isfinite(numpy.r_[sf2, noise, W]).all(): return numpy.inf kernel = gp.KernelFun(sf2, W) Ktt = kernel(Xt, Xt) if not numpy.isfinite(Ktt).all(): return numpy.inf post = Posterior(Ktt, Yt, noise, Ht) gp.post = post return gp._loolik(Ktt, Yt, Ht)
def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None): num_data, output_dim = Y.shape assert output_dim ==1, "ep in 1D only (for now!)" K = kern.K(X) if self._ep_approximation is None: mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata) else: mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde)) alpha, _ = dpotrs(LW, mu_tilde, lower=1) log_marginal = 0.5*(-num_data * log_2_pi - W_logdet - np.sum(alpha * mu_tilde)) # TODO: add log Z_hat?? dL_dK = 0.5 * (tdot(alpha[:,None]) - Wi) dL_dthetaL = np.zeros(likelihood.size)#TODO: derivatives of the likelihood parameters return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
def optimize(gp, Xt, Yt, HPini=None, nelderMeadIters=50): if gp.KernelFun is None: gp.KernelFun = KernelSE KernelFun = gp.KernelFun if HPini is None: HPiniK = KernelFun.defaultHP(Xt) HPini = numpy.log(numpy.r_[Yt.var(), 1e-2, HPiniK]) if gp.Basis is None: gp.Basis = BasisQuad() Basis = gp.Basis if Basis is not None: Ht = Basis(Xt) else: Ht = None f = lambda hp: nllcost(gp, Xt, Yt, hp[0], hp[1], hp[2:], Ht) # Starts with few Nelder-Mead iterations res = minimize(f, HPini, method='Nelder-Mead', options={ 'maxiter': nelderMeadIters, 'disp': False }) hpopt = res.x # Search res = minimize(f, hpopt, method='SLSQP', options={ 'disp': False, }) HP = res.x kernel = KernelFun(numpy.exp(HP[0]), numpy.exp(HP[2:])) noise = numpy.exp(HP[0] + HP[1]) Ktt = kernel(Xt, Xt) post = Posterior(Ktt, Yt, noise, Ht) return post, kernel, HP, noise, res.fun
def inference(self, kern, X, likelihood, Y, Y_metadata=None): """ Returns a Posterior class containing essential quantities of the posterior """ # Compute K K = kern.K(X) #Find mode if self.bad_fhat or self.first_run: Ki_f_init = np.zeros_like(Y) first_run = False else: Ki_f_init = self._previous_Ki_fhat f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata) self.f_hat = f_hat self.Ki_fhat = Ki_fhat self.K = K.copy() #Compute hessian and other variables at mode log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata) self._previous_Ki_fhat = Ki_fhat.copy() return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
meshwidth = 1. / 32. variance = 1e-4 fem_ip = FEMInverseProblem(num_true_inputs, eval_pts, meshwidth, variance) #print(fem_ip.locations) #print(fem_ip.observations) #print(fem_ip.true_observations) #print(fem_ip.variance) mean_fct = ZeroMean() cov_fct = MaternCov(2.5) gp = GaussianProcess(mean_fct, cov_fct) num_design_pts = 25 design_ptset = Mesh1d.construct(num_design_pts) posterior = Posterior(fem_ip) approx_post = ApproximatePosterior(posterior, gp) #print(approx_post.potential) approx_post.approximate_likelihood(design_ptset) #print(approx_post.potential) #gp_data = ApproxDataPotential(design_ptset, 1, posterior) #cond_gp = ConditionedGaussianProcess(gp, gp_data) #print(gp_data.locations,, gp_data.observations) import matplotlib.pyplot as plt # gp_v = GPVisual(approx_post.cond_gp) # gp_v.addplot_mean() # gp_v.addplot_deviation()
def inference_likelihood(self, kern, X, Z, likelihood, Y): """ The first phase of inference: Compute: log-likelihood, dL_dKmm Cached intermediate results: Kmm, KmmInv, """ num_data, output_dim = Y.shape input_dim = Z.shape[0] if self.mpi_comm != None: num_data_all = np.array(num_data, dtype=np.int32) self.mpi_comm.Allreduce([np.int32(num_data), MPI.INT], [num_data_all, MPI.INT]) num_data = num_data_all if isinstance(X, VariationalPosterior): uncertain_inputs = True else: uncertain_inputs = False #see whether we've got a different noise variance for each datum beta = 1. / np.fmax(likelihood.variance, 1e-6) het_noise = beta.size > 1 if het_noise: self.batchsize = 1 psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat( kern, X, Z, Y, beta, uncertain_inputs) #====================================================================== # Compute Common Components #====================================================================== Kmm = kern.K(Z).copy() diag.add(Kmm, self.const_jitter) Lm = jitchol(Kmm, maxtries=100) LmInvPsi2LmInvT = backsub_both_sides(Lm, psi2_full, transpose='right') Lambda = np.eye(Kmm.shape[0]) + LmInvPsi2LmInvT LL = jitchol(Lambda, maxtries=100) logdet_L = 2. * np.sum(np.log(np.diag(LL))) b = dtrtrs(LL, dtrtrs(Lm, psi1Y_full.T)[0])[0] bbt = np.square(b).sum() v = dtrtrs(Lm, dtrtrs(LL, b, trans=1)[0], trans=1)[0] tmp = -backsub_both_sides( LL, tdot(b) + output_dim * np.eye(input_dim), transpose='left') dL_dpsi2R = backsub_both_sides( Lm, tmp + output_dim * np.eye(input_dim), transpose='left') / 2. # Cache intermediate results self.midRes['dL_dpsi2R'] = dL_dpsi2R self.midRes['v'] = v #====================================================================== # Compute log-likelihood #====================================================================== if het_noise: logL_R = -np.log(beta).sum() else: logL_R = -num_data * np.log(beta) logL = -(output_dim * (num_data * log_2_pi + logL_R + psi0_full - np.trace(LmInvPsi2LmInvT)) + YRY_full - bbt) / 2. - output_dim * logdet_L / 2. #====================================================================== # Compute dL_dKmm #====================================================================== dL_dKmm = dL_dpsi2R - output_dim * backsub_both_sides( Lm, LmInvPsi2LmInvT, transpose='left') / 2. #====================================================================== # Compute the Posterior distribution of inducing points p(u|Y) #====================================================================== if not self.Y_speedup or het_noise: wd_inv = backsub_both_sides( Lm, np.eye(input_dim) - backsub_both_sides( LL, np.identity(input_dim), transpose='left'), transpose='left') post = Posterior(woodbury_inv=wd_inv, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm) else: post = None #====================================================================== # Compute dL_dthetaL for uncertian input and non-heter noise #====================================================================== if not het_noise: dL_dthetaL = (YRY_full * beta + beta * output_dim * psi0_full - num_data * output_dim * beta) / 2. - beta * ( dL_dpsi2R * psi2_full).sum() - beta * ( v.T * psi1Y_full).sum() self.midRes['dL_dthetaL'] = dL_dthetaL return logL, dL_dKmm, post
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None): _, output_dim = Y.shape uncertain_inputs = isinstance(X, VariationalPosterior) #see whether we've got a different noise variance for each datum beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6) # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! #self.YYTfactor = self.get_YYTfactor(Y) #VVT_factor = self.get_VVTfactor(self.YYTfactor, beta) het_noise = beta.size > 1 if beta.ndim == 1: beta = beta[:, None] VVT_factor = beta*Y #VVT_factor = beta*Y trYYT = self.get_trYYT(Y) # do the inference: num_inducing = Z.shape[0] num_data = Y.shape[0] # kernel computations, using BGPLVM notation Kmm = kern.K(Z).copy() diag.add(Kmm, self.const_jitter) if Lm is None: Lm = jitchol(Kmm) # The rather complex computations of A, and the psi stats if uncertain_inputs: psi0 = kern.psi0(Z, X) psi1 = kern.psi1(Z, X) if het_noise: psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0) else: psi2_beta = kern.psi2(Z,X) * beta LmInv = dtrtri(Lm) A = LmInv.dot(psi2_beta.dot(LmInv.T)) else: psi0 = kern.Kdiag(X) psi1 = kern.K(X, Z) if het_noise: tmp = psi1 * (np.sqrt(beta)) else: tmp = psi1 * (np.sqrt(beta)) tmp, _ = dtrtrs(Lm, tmp.T, lower=1) A = tdot(tmp) #print A.sum() # factor B B = np.eye(num_inducing) + A LB = jitchol(B) psi1Vf = np.dot(psi1.T, VVT_factor) # back substutue C into psi1Vf tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0) _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0) tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1) # data fit and derivative of L w.r.t. Kmm delit = tdot(_LBi_Lmi_psi1Vf) data_fit = np.trace(delit) DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit) if dL_dKmm is None: delit = -0.5 * DBi_plus_BiPBi delit += -0.5 * B * output_dim delit += output_dim * np.eye(num_inducing) # Compute dL_dKmm dL_dKmm = backsub_both_sides(Lm, delit) # derivatives of L w.r.t. psi dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs) # log marginal likelihood log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit, Y) #noise derivatives dL_dR = _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT, Y, VVT_factor) dL_dthetaL = likelihood.exact_inference_gradients(dL_dR,Y_metadata) #put the gradients in the right places if uncertain_inputs: grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dpsi0':dL_dpsi0, 'dL_dpsi1':dL_dpsi1, 'dL_dpsi2':dL_dpsi2, 'dL_dthetaL':dL_dthetaL} else: grad_dict = {'dL_dKmm': dL_dKmm, 'dL_dKdiag':dL_dpsi0, 'dL_dKnm':dL_dpsi1, 'dL_dthetaL':dL_dthetaL} #get sufficient things for posterior prediction #TODO: do we really want to do this in the loop? if VVT_factor.shape[1] == Y.shape[1]: woodbury_vector = Cpsi1Vf # == Cpsi1V else: print 'foobar' import ipdb; ipdb.set_trace() psi1V = np.dot(Y.T*beta, psi1).T tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) tmp, _ = dpotrs(LB, tmp, lower=1) woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) Bi, _ = dpotri(LB, lower=1) symmetrify(Bi) Bi = -dpotri(LB, lower=1)[0] diag.add(Bi, 1) woodbury_inv = backsub_both_sides(Lm, Bi) #construct a posterior object post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) return post, log_marginal, grad_dict
def inference(self, kern, X, X_variance, Z, likelihood, Y, Y_metadata): assert X_variance is None, "cannot use X_variance with DTC. Try varDTC." num_inducing, _ = Z.shape num_data, output_dim = Y.shape #make sure the noise is not hetero beta = 1./likelihood.gaussian_variance(Y_metadata) if beta.size > 1: raise NotImplementedError, "no hetero noise with this implementation of DTC" Kmm = kern.K(Z) Knn = kern.Kdiag(X) Knm = kern.K(X, Z) U = Knm Uy = np.dot(U.T,Y) #factor Kmm Kmmi, L, Li, _ = pdinv(Kmm) # Compute A LiUTbeta = np.dot(Li, U.T)*np.sqrt(beta) A_ = tdot(LiUTbeta) trace_term = -0.5*(np.sum(Knn)*beta - np.trace(A_)) A = A_ + np.eye(num_inducing) # factor A LA = jitchol(A) # back substutue to get b, P, v tmp, _ = dtrtrs(L, Uy, lower=1) b, _ = dtrtrs(LA, tmp*beta, lower=1) tmp, _ = dtrtrs(LA, b, lower=1, trans=1) v, _ = dtrtrs(L, tmp, lower=1, trans=1) tmp, _ = dtrtrs(LA, Li, lower=1, trans=0) P = tdot(tmp.T) stop #compute log marginal log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \ -np.sum(np.log(np.diag(LA)))*output_dim + \ 0.5*num_data*output_dim*np.log(beta) + \ -0.5*beta*np.sum(np.square(Y)) + \ 0.5*np.sum(np.square(b)) + \ trace_term # Compute dL_dKmm vvT_P = tdot(v.reshape(-1,1)) + P LAL = Li.T.dot(A).dot(Li) dL_dK = Kmmi - 0.5*(vvT_P + LAL) # Compute dL_dU vY = np.dot(v.reshape(-1,1),Y.T) #dL_dU = vY - np.dot(vvT_P, U.T) dL_dU = vY - np.dot(vvT_P - Kmmi, U.T) dL_dU *= beta #compute dL_dR Uv = np.dot(U, v) dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2 dL_dR -=beta*trace_term/num_data dL_dthetaL = likelihood.exact_inference_gradients(dL_dR) grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL} #construct a posterior object post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L) return post, log_marginal, grad_dict
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): num_data, output_dim = Y.shape assert output_dim == 1, "ep in 1D only (for now!)" Kmm = kern.K(Z) Kmn = kern.K(Z, X) if self._ep_approximation is None: mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation( Kmm, Kmn, Y, likelihood, Y_metadata) else: mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation if isinstance(X, VariationalPosterior): uncertain_inputs = True psi0 = kern.psi0(Z, X) psi1 = Kmn.T #kern.psi1(Z, X) psi2 = kern.psi2(Z, X) else: uncertain_inputs = False psi0 = kern.Kdiag(X) psi1 = Kmn.T #kern.K(X, Z) psi2 = None #see whether we're using variational uncertain inputs _, output_dim = Y.shape #see whether we've got a different noise variance for each datum #beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6) beta = tau_tilde VVT_factor = beta[:, None] * mu_tilde[:, None] trYYT = self.get_trYYT(mu_tilde[:, None]) # do the inference: het_noise = beta.size > 1 num_inducing = Z.shape[0] num_data = Y.shape[0] # kernel computations, using BGPLVM notation Kmm = kern.K(Z).copy() diag.add(Kmm, self.const_jitter) Lm = jitchol(Kmm) # The rather complex computations of A if uncertain_inputs: if het_noise: psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0) else: psi2_beta = psi2.sum(0) * beta LmInv = dtrtri(Lm) A = LmInv.dot(psi2_beta.dot(LmInv.T)) else: if het_noise: tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) else: tmp = psi1 * (np.sqrt(beta)) tmp, _ = dtrtrs(Lm, tmp.T, lower=1) A = tdot(tmp) #print A.sum() # factor B B = np.eye(num_inducing) + A LB = jitchol(B) psi1Vf = np.dot(psi1.T, VVT_factor) # back substutue C into psi1Vf tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0) _LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0) tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1) Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1) # data fit and derivative of L w.r.t. Kmm delit = tdot(_LBi_Lmi_psi1Vf) data_fit = np.trace(delit) DBi_plus_BiPBi = backsub_both_sides( LB, output_dim * np.eye(num_inducing) + delit) delit = -0.5 * DBi_plus_BiPBi delit += -0.5 * B * output_dim delit += output_dim * np.eye(num_inducing) # Compute dL_dKmm dL_dKmm = backsub_both_sides(Lm, delit) # derivatives of L w.r.t. psi dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi( num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs) # log marginal likelihood log_marginal = _compute_log_marginal_likelihood( likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit, VVT_factor) #put the gradients in the right places dL_dR = _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT, mu_tilde[:, None]) dL_dthetaL = 0 #likelihood.exact_inference_gradients(dL_dR,Y_metadata) if uncertain_inputs: grad_dict = { 'dL_dKmm': dL_dKmm, 'dL_dpsi0': dL_dpsi0, 'dL_dpsi1': dL_dpsi1, 'dL_dpsi2': dL_dpsi2, 'dL_dthetaL': dL_dthetaL } else: grad_dict = { 'dL_dKmm': dL_dKmm, 'dL_dKdiag': dL_dpsi0, 'dL_dKnm': dL_dpsi1, 'dL_dthetaL': dL_dthetaL } #get sufficient things for posterior prediction #TODO: do we really want to do this in the loop? if VVT_factor.shape[1] == Y.shape[1]: woodbury_vector = Cpsi1Vf # == Cpsi1V else: print 'foobar' psi1V = np.dot(mu_tilde[:, None].T * beta, psi1).T tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) tmp, _ = dpotrs(LB, tmp, lower=1) woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) Bi, _ = dpotri(LB, lower=1) symmetrify(Bi) Bi = -dpotri(LB, lower=1)[0] diag.add(Bi, 1) woodbury_inv = backsub_both_sides(Lm, Bi) #construct a posterior object post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm) return post, log_marginal, grad_dict
def cross_validation(sequences, training_method, decoder): """ Performs the 10-fold cross-validation Requieres an array of dict sequences Requires the training function Requires a decoder objetct (Viterbi or Posterior) """ # here we store the total_ac for each cross-validation vit_total_ac = np.array([.0] * len(sequences)) post_total_ac = np.array([.0] * len(sequences)) vit = Viterbi() post = Posterior() for i in range(len(sequences)): vit_total_scores = np.zeros([4]) post_total_scores = np.zeros([4]) # arrays with the sequences for training and for validation training_data_array = sequences[:] validation_data_array = [training_data_array.pop(i)] # merging the arrays into dictionaries training_data = merge(training_data_array) validation_data = merge(validation_data_array) # the training function returns a model model = training_method(training_data) #do viterbi prediction on set i for key, sequence in validation_data.items(): # the sequence from the file true_seq = sequence['Z'] # the sequence decoded using viterbi, or posterior and the model generated vit_pred_seq = vit.decode(model, sequence['X']) post_pred_seq = post.decode(model, sequence['X']) """ print key print "PREDICTED" print pred_seq print "TRUE" print true_seq """ tp, fp, tn, fn = compare_tm_pred.count(true_seq, vit_pred_seq) vit_total_scores += np.array([tp, fp, tn, fn]) tp, fp, tn, fn = compare_tm_pred.count(true_seq, post_pred_seq) post_total_scores += np.array([tp, fp, tn, fn]) if VERBOSE: print ">" + key compare_tm_pred.print_stats(tp, fp, tn, fn) print vit_total_ac[i] = compare_tm_pred.compute_stats(*vit_total_scores)[3] post_total_ac[i] = compare_tm_pred.compute_stats(*post_total_scores)[3] #print total_ac if VERBOSE: print "Summary 10-fold cross validation over index %i :" % (i) # compare_tm_pred.print_stats( *total_scores ) print print print print "-------------------------------------------------------" if DEBUG: raw_input("press any key to continue\n") print "Overall viterbi result mean: %s, variance: %s" % ( np.mean(vit_total_ac), np.var(vit_total_ac)) print "Posterior mean: %s, variance %s" % (np.mean(post_total_ac), np.var(post_total_ac))
def fit(self, X, Y): if self.kernel is None or self.Basis is None: raise RuntimeError('you should call autoFit before') K = self.kernel(X, X) H = self.Basis(X) self.post = Posterior(K, Y, self.noise, H)
def cross_validation(sequences, training_method, decoder): """ Performs the 10-fold cross-validation Requieres an array of dict sequences Requires the training function Requires a decoder objetct (Viterbi or Posterior) """ # here we store the total_ac for each cross-validation vit_total_ac = np.array([.0] * len(sequences)) post_total_ac = np.array([.0] * len(sequences)) vit = Viterbi() post = Posterior() for i in range(len(sequences)): vit_total_scores = np.zeros([4]) post_total_scores = np.zeros([4]) # arrays with the sequences for training and for validation training_data_array = sequences[:] validation_data_array = [ training_data_array.pop(i) ] # merging the arrays into dictionaries training_data = merge(training_data_array) validation_data = merge(validation_data_array) # the training function returns a model model = training_method(training_data) #do viterbi prediction on set i for key, sequence in validation_data.items(): # the sequence from the file true_seq = sequence['Z'] # the sequence decoded using viterbi, or posterior and the model generated vit_pred_seq = vit.decode(model, sequence['X']) post_pred_seq = post.decode(model, sequence['X']) """ print key print "PREDICTED" print pred_seq print "TRUE" print true_seq """ tp, fp, tn, fn = compare_tm_pred.count(true_seq, vit_pred_seq) vit_total_scores += np.array([tp, fp, tn, fn]) tp, fp, tn, fn = compare_tm_pred.count(true_seq, post_pred_seq) post_total_scores += np.array([tp, fp, tn, fn]) if VERBOSE: print ">" + key compare_tm_pred.print_stats(tp, fp, tn, fn) print vit_total_ac[i] = compare_tm_pred.compute_stats(*vit_total_scores)[3] post_total_ac[i] = compare_tm_pred.compute_stats(*post_total_scores)[3] #print total_ac if VERBOSE: print "Summary 10-fold cross validation over index %i :"%(i) # compare_tm_pred.print_stats( *total_scores ) print print print print "-------------------------------------------------------" if DEBUG: raw_input("press any key to continue\n") print "Overall viterbi result mean: %s, variance: %s"%(np.mean(vit_total_ac), np.var(vit_total_ac)) print "Posterior mean: %s, variance %s"%(np.mean(post_total_ac), np.var(post_total_ac))
#outputs.to_project_1_sequences_file_from_posterior_decoding(sequences.get(), probs, 'posterior-decoding-sequences.txt') outputs.to_project_1_sequences_file(sequences.get(), probs, 'viterbi-sequences.txt') outputs.to_project_1_probs_file(sequences.get(), probs, 'viterbi-probs.txt') """ if __name__ == '__main__': model = hmm.Model(KEYS) model.load(HMMFILE) sequences = sequences.Sequences(SEQUENCEFILE) # load methods vit = Viterbi() post = Posterior() # viterbi probs = {} for key, sequence in sequences.get().items(): probs[key] = vit.decode(model, sequence) outputs.to_project_2_viterbi(sequences.get(), probs, 'pred-test-sequences-project2-viterbi.txt') probs = {} for key, value in sequences.get().items(): sequence = {'Z': post.decode(model, value), 'X': value} log_joint = compute_hmm(model, sequence) probs[key] = (log_joint, sequence['Z'])
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): num_inducing, _ = Z.shape num_data, output_dim = Y.shape #make sure the noise is not hetero sigma_n = likelihood.gaussian_variance(Y_metadata) if sigma_n.size > 1: raise NotImplementedError, "no hetero noise with this implementation of FITC" Kmm = kern.K(Z) Knn = kern.Kdiag(X) Knm = kern.K(X, Z) U = Knm #factor Kmm diag.add(Kmm, self.const_jitter) Kmmi, L, Li, _ = pdinv(Kmm) #compute beta_star, the effective noise precision LiUT = np.dot(Li, U.T) sigma_star = Knn + sigma_n - np.sum(np.square(LiUT), 0) beta_star = 1. / sigma_star # Compute and factor A A = tdot(LiUT * np.sqrt(beta_star)) + np.eye(num_inducing) LA = jitchol(A) # back substutue to get b, P, v URiy = np.dot(U.T * beta_star, Y) tmp, _ = dtrtrs(L, URiy, lower=1) b, _ = dtrtrs(LA, tmp, lower=1) tmp, _ = dtrtrs(LA, b, lower=1, trans=1) v, _ = dtrtrs(L, tmp, lower=1, trans=1) tmp, _ = dtrtrs(LA, Li, lower=1, trans=0) P = tdot(tmp.T) #compute log marginal log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \ -np.sum(np.log(np.diag(LA)))*output_dim + \ 0.5*output_dim*np.sum(np.log(beta_star)) + \ -0.5*np.sum(np.square(Y.T*np.sqrt(beta_star))) + \ 0.5*np.sum(np.square(b)) #compute dL_dR Uv = np.dot(U, v) dL_dR = 0.5 * (np.sum(U * np.dot(U, P), 1) - 1. / beta_star + np.sum(np.square(Y), 1) - 2. * np.sum(Uv * Y, 1) + np.sum(np.square(Uv), 1)) * beta_star**2 # Compute dL_dKmm vvT_P = tdot(v.reshape(-1, 1)) + P dL_dK = 0.5 * (Kmmi - vvT_P) KiU = np.dot(Kmmi, U.T) dL_dK += np.dot(KiU * dL_dR, KiU.T) # Compute dL_dU vY = np.dot(v.reshape(-1, 1), Y.T) dL_dU = vY - np.dot(vvT_P, U.T) dL_dU *= beta_star dL_dU -= 2. * KiU * dL_dR dL_dthetaL = likelihood.exact_inference_gradients(dL_dR) grad_dict = { 'dL_dKmm': dL_dK, 'dL_dKdiag': dL_dR, 'dL_dKnm': dL_dU.T, 'dL_dthetaL': dL_dthetaL } #construct a posterior object post = Posterior(woodbury_inv=Kmmi - P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L) return post, log_marginal, grad_dict