def observe(self, population): lweights = np.array([s.lweight for s in population]) #print(lweights) lweights = lweights - logsumexp(lweights) #+ 1000 #print(lweights) indices = np.array([self.prop2idx[s.prop_obj] for s in population]) for i in range(len(lweights)): prop_idx = indices[i] self.num_samp[prop_idx] = self.num_samp[prop_idx] + 1 self.sum[prop_idx] = logsumexp((self.sum[prop_idx], lweights[i])) self.sqr_sum[prop_idx] = logsumexp((self.sqr_sum[prop_idx], 2*lweights[i])) lnum_samp = log(self.num_samp) self.var = exp(logsumexp([self.sum, self.sqr_sum - lnum_samp], 0) - lnum_samp) #self.var = exp(self.var - logsumexp(self.var)) if self.var.size > 1: tmp = self.var.sum() if tmp == 0 or np.isnan(tmp): prop_prob = np.array([1./self.var.size] * self.var.size) else: prop_prob = (self.var.sum() - self.var) prop_prob = prop_prob/prop_prob.sum()/2 + np.random.dirichlet(1 + self.num_samp)/2 else: prop_prob = np.array([1./self.var.size] * self.var.size) self.prop_dist = categorical(prop_prob)
def _get_predictive_likelihoods(k): future_likelihoods = logsumexp( np.log(scaled_alphal[:-k].dot(np.linalg.matrix_power(trans_matrix,k))) \ + cmaxes[:-k,None] + aBl[k:], axis=1) past_likelihoods = logsumexp(alphal[:-k], axis=1) return future_likelihoods - past_likelihoods
def compute_forward_messages(optimizer, preorder_node_lst, gen_per_len, subtree_data_likelihoods): node_likelihoods = {} for node in preorder_node_lst: # Skip root node if node.parent_node is None: root_id = node.oid continue have_data = False trans_matrix = numpy.log(optimizer.get_transition_matrix(int(node.edge_length*gen_per_len))).transpose() if node.parent_node.oid in node_likelihoods: trans_matrix += node_likelihoods[node.parent_node.oid] have_data = True if node.parent_node.oid in subtree_data_likelihoods: for sibling in node.parent_node.child_nodes(): if sibling.oid == node.oid: continue if sibling.oid in subtree_data_likelihoods[node.parent_node.oid]: have_data = True trans_matrix += subtree_data_likelihoods[node.parent_node.oid][sibling.oid] if have_data: tot_probs = logsumexp(trans_matrix, axis=1) norm_factor = logsumexp(tot_probs) log_posteriors = tot_probs - norm_factor node_likelihoods[node.oid] = log_posteriors return node_likelihoods
def magic_function(encoding, train_toks, classifier): V, N, M = encoding.encode(train_toks) Np = (np.dot(N, np.dot(classifier.weight_n(), M.transpose()))) Vp = (np.dot(V, np.dot(classifier.weight_v(), M.transpose()))) aclist = [(lambda x,y: 'n' if x > y else 'v')(x,y) for (x,y) in zip(np.diag(Np), np.diag(Vp))] total = 0 correct = 0 Z = np.exp(Np) + np.exp(Vp) nprob = sm.logsumexp(Vp)/Z vprob = sm.logsumexp(Vp)/Z ll = [] for (tok, tag) in train_toks: if tag == 'n': ll.append(np.exp(np.log(np.exp(np.diag(Np)[total])) - np.log(np.exp(np.diag(Vp)[total]) + np.exp(np.diag(Np)[total])))) elif tag == 'v': ll.append(np.exp(np.log(np.exp(np.diag(Vp)[total])) - np.log(np.exp(np.diag(Vp)[total]) + np.exp(np.diag(Np)[total])))) if aclist[total] == tag: correct += 1 total += 1 acc = float(correct)/total empn, empv = classifier.getemp() nfeat = np.dot(np.exp(nprob), empn) vfeat = np.dot(np.exp(vprob), empv) grad_n = (nfeat - empn) grad_v = (vfeat - empv) return acc, -float(sum(ll))/len(ll), grad_n, grad_v
def predictive_log_likelihood(self, X_pred, data_index=0, M=100): """ Hacky way of computing the predictive log likelihood :param X_pred: :param data_index: :param M: :return: """ Tpred = X_pred.shape[0] data = self.data_list[data_index] conditional_mean = self.emission_distn.conditional_mean(data) conditional_cov = self.emission_distn.conditional_cov(data, flat=True) lls = [] z_preds = data["states"].predict_states( conditional_mean, conditional_cov, Tpred=Tpred, Npred=M) for m in range(M): ll_pred = self.emission_distn.log_likelihood( {"z": z_preds[...,m], "x": X_pred}) lls.append(ll_pred) # Compute the average hll = logsumexp(lls) - np.log(M) # Use bootstrap to compute error bars samples = np.random.choice(lls, size=(100, M), replace=True) hll_samples = logsumexp(samples, axis=1) - np.log(M) std_hll = hll_samples.std() return hll, std_hll
def transitioncounts(fwdlattice, bwdlattice, framelogprob, log_transmat): n_observations, n_components = fwdlattice.shape lneta = np.zeros((n_observations-1, n_components, n_components)) from scipy.misc import logsumexp logprob = logsumexp(fwdlattice[n_observations-1, :]) print 'logprob', logprob for t in range(n_observations - 1): for i in range(n_components): for j in range(n_components): lneta[t, i, j] = fwdlattice[t, i] + log_transmat[i, j] \ + framelogprob[t + 1, j] + bwdlattice[t + 1, j] - logprob print framelogprob print 'fwdlattice[:, 0]' print fwdlattice[:, 0] print 'logtransmat[0,0]' print log_transmat[0,0] print 'framelogprob' print framelogprob[:, 0] print 'bwdlattice[:, 0]' print bwdlattice[:, 0] print 'lneta{0,0}' print lneta[:, 0, 0] return np.exp(logsumexp(lneta, 0))
def log_weighted_ave(arrs, weights): arrs = np.array(arrs) log_weights = np.log(weights) log_weights -= logsumexp(log_weights) for _ in range(len(arrs.shape) - 1): log_weights = log_weights[..., np.newaxis] return logsumexp(arrs + log_weights, axis=0)
def sample_lpost_based(num_samples, initial_particles, proposal_method, population_size = 20): rval = [] anc = proposal_method.process_initial_samples(initial_particles) num_initial = len(rval) while len(rval) - num_initial < num_samples: ancest_dist = np.array([a.lpost for a in anc]) ancest_dist = categorical(ancest_dist - logsumexp(ancest_dist), p_in_logspace = True) #choose ancestor uniformly at random from previous samples pop = [proposal_method.gen_proposal(anc[ancest_dist.rvs()]) for _ in range(population_size)] prop_w = np.array([s.lweight for s in pop]) prop_w = exp(prop_w - logsumexp(prop_w)) # Importance Resampling while True: try: draws = np.random.multinomial(population_size, prop_w) break except ValueError: prop_w /= prop_w.sum() for idx in range(len(draws)): rval.extend([pop[idx]] * draws[idx]) anc.append(pop[idx]) return (np.array([s.sample for s in rval]), np.array([s.lpost for s in rval]))
def e_step(x,N,d): global miu global p global sigma soft_p = np.zeros((N)) labels = np.zeros(N) #e-step for i in range(N): [t, w] = Gaussian(x[i]) #print 't',t #print 'w',w num1 = np.exp(logsumexp(t[0], b = w[0])) #print 'num',num1 num2 = np.exp(logsumexp(t[1], b = w[1])) soft_p[i] = num1 / (num1 + num2) if soft_p[i] >= 0.5 : labels[i] = 1 else: labels[i] = 0 #print 'label',labels #print 'soft_p',soft_p #print np.sum(soft_p) #return soft_p return labels
def basis_log_like(beta): beta = beta[0] + beta[1:] logBk = beta - logsumexp(beta) - np.log(self._dx) logLams = logsumexp(logW[k,:]) + logBk + np.log(self._dt) + np.log(self._dx) stable_ll = np.sum(logLams*Zbasis[k] - np.exp(logLams)) #print "!!!!!!!!!! basis_log_like %2.5f, %2.5f"%(ll, stable_ll) return stable_ll
def recalc_log_gt_posteriors(log_gt_priors, down, up, p_geom, read_counts_array, nalleles, allele_sizes, diploid=False, norm=False): stutter_dist = geom(p_geom) nsamples = read_counts_array.shape[0] log_down, log_eq, log_up = map(numpy.log, [down, 1-down-up, up]) if diploid: num_gts = nalleles**2 LLs = numpy.zeros((nsamples, num_gts)) + log_gt_priors gtind = 0 for a1 in xrange(nalleles): for a2 in xrange(nalleles): if a1 != a2 and DEBUG_HAPLOID: LLs[:,gtind] = numpy.log(0) gtind += 1 continue step_probs1 = numpy.hstack(([log_down + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[a1])) for x in range(0, a1)], [log_eq], [log_up + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[a1])) for x in range(a1+1, nalleles)])) step_probs2 = numpy.hstack(([log_down + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[a2])) for x in range(0, a2)], [log_eq], [log_up + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[a2])) for x in range(a2+1, nalleles)])) step_probs = numpy.logaddexp(step_probs1+log_one_half, step_probs2+log_one_half) LLs[:,gtind] += numpy.sum(read_counts_array*step_probs, axis=1) # if a1 == a2: LLs[:,gtind]+= numpy.log(2) # account for phase gtind += 1 else: LLs = numpy.zeros((nsamples, nalleles)) + log_gt_priors for j in xrange(nalleles): step_probs = numpy.hstack(([log_down + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[j])) for x in range(0, j)], [log_eq], [log_up + stutter_dist.logpmf(abs(allele_sizes[x]-allele_sizes[j])) for x in range(j+1, nalleles)])) LLs [:,j] += numpy.sum(read_counts_array*step_probs, axis=1) if norm: return numpy.sum(logsumexp(LLs, axis=1)) else: log_samp_totals = logsumexp(LLs, axis=1)[numpy.newaxis].T return LLs - log_samp_totals
def _whitened_logpdf(self, X, pool=None): logpdfs = [logweight + kde(X, pool=pool) for logweight, kde in zip(self._logweights, self._kdes)] if len(X.shape) == 1: return logsumexp(logpdfs) else: return logsumexp(logpdfs, axis=0)
def hsmm_messages_backwards_log( trans_potentials, initial_state_potential, cumulative_obs_potentials, dur_potentials, dur_survival_potentials, betal, betastarl, left_censoring=False, right_censoring=True): errs = np.seterr(invalid='ignore') # logaddexp(-inf,-inf) T, _ = betal.shape betal[-1] = 0. for t in xrange(T-1,-1,-1): cB, offset = cumulative_obs_potentials(t) dp = dur_potentials(t) betastarl[t] = logsumexp( betal[t:t+cB.shape[0]] + cB + dur_potentials(t), axis=0) betastarl[t] -= offset if right_censoring: np.logaddexp(betastarl[t], cB[-1] - offset + dur_survival_potentials(t), out=betastarl[t]) betal[t-1] = logsumexp(betastarl[t] + trans_potentials(t-1), axis=1) betal[-1] = 0. # overwritten on last iteration if not left_censoring: normalizer = logsumexp(initial_state_potential + betastarl[0]) else: raise NotImplementedError np.seterr(**errs) return betal, betastarl, normalizer
def predictStar(clfstar, clfgalaxy, X, Xerr, index): if np.any(np.isnan(Xerr)): print index #numerator = PStar * np.exp(clfstar.logprob_a(X, Xerr)) #demominator = PStar * np.exp(clfstar.logprob_a(X, Xerr)) + PGalaxy * np.exp(clfgalaxy.logprob_a(X, Xerr)) #P(Star|X, XErr) logcondstar = misc.logsumexp(clfstar.logprob_a(X, Xerr)) logcondgal = misc.logsumexp(clfgalaxy.logprob_a(X, Xerr)) fraction = np.log(PStar) + logcondstar \ - np.logaddexp(np.log(PStar) + logcondstar, np.log(PGalaxy) + logcondgal) fraction = np.exp(fraction) if np.isnan(fraction): raise Exception('Invalid Fractions Nan') if fraction > 1 or fraction < 0: raise Exception('Invalid Fraction Range: {0}'.format(fraction)) if fraction >= 0.5: return 1 else: return 0
def smoother(self, next_state, filtered_state): gpb_ = self._init_gpb() state = SwitchingKalmanState(n_models=self.n_models) for k in xrange(self.n_models): kalman = KalmanFilter(model=self.models[k]) for j in xrange(self.n_models): # Smoothing step (gpb_[k].m[:,j], gpb_[k].P[:,:,j]) = kalman._smoother(\ filtered_state.model(j), next_state.model(j), self.embeds[j][k]) # Posterior Transition # p(s_t=j | s_t+1=k, y_1:T) \approx \propto p(s_t+1=k | s_t=j) * p(s_t=j | y_1:t) U = self.log_transmat.T + filtered_state.M U = U.T - logsumexp(U, axis=1) # p(s_t=j, s_t+1=k | y_1:T) = p(s_t=j | s_t+1=k, y_1:T) * p(s_t+1=k | y_1:T) M = U + next_state.M # p(s_t=j | y1:T) = \sum_k p(s_t=j, s_t+1=k | y_1:T) state.M = logsumexp(M, axis=1) # p(s_t+1=k | s_t=j, y_1:T) = p(s_t=j, s_t+1=k | y_1:T) / p(s_t=j | y_1:T) W = np.exp(M.T - state.M) # WARKING: W is W.T in Murphy's paper # Collapse step for j in xrange(self.n_models): # (state.m[:,j], state.P[:,:,j]) = self._collapse(m_[:,:,j], P_[:,:,:,j], W[:,j]) m, P = self._collapse(gpb_[j].m, gpb_[j].P, W[:,j], self.masks[j]) state._states[j] = KalmanState(mean=m, covariance=P) return state
def hsmm_messages_forwards_log( trans_potential, initial_state_potential, reverse_cumulative_obs_potentials, reverse_dur_potentials, reverse_dur_survival_potentials, alphal, alphastarl, left_censoring=False, right_censoring=True): T, _ = alphal.shape alphastarl[0] = initial_state_potential for t in xrange(T-1): cB = reverse_cumulative_obs_potentials(t) alphal[t] = logsumexp( alphastarl[t+1-cB.shape[0]:t+1] + cB + reverse_dur_potentials(t), axis=0) if left_censoring: raise NotImplementedError alphastarl[t+1] = logsumexp( alphal[t][:,na] + trans_potential(t), axis=0) t = T-1 cB = reverse_cumulative_obs_potentials(t) alphal[t] = logsumexp( alphastarl[t+1-cB.shape[0]:t+1] + cB + reverse_dur_potentials(t), axis=0) if not right_censoring: normalizer = logsumexp(alphal[t]) else: normalizer = None # TODO return alphal, alphastarl, normalizer
def logpdf(self, pts, pool=None): """Evaluate the logpdf of the KDE at `pts`.""" logpdfs = [logweight + kde(pts, pool=pool) for logweight, kde in zip(self._logweights, self._kdes)] if len(pts.shape) == 1: return logsumexp(logpdfs) else: return logsumexp(logpdfs, axis=0)
def filter(self, prev_state, observation): gpb_ = self._init_gpb() state = SwitchingKalmanState(n_models=self.n_models) L = np.zeros((self.n_models, self.n_models)) for j in xrange(self.n_models): kalman = KalmanFilter(model=self.models[j]) for i in xrange(self.n_models): # Prediction step pred_state = kalman._filter_prediction(prev_state.model(i), self.embeds[i][j]) # Update step (gpb_[j].m[:,i], gpb_[j].P[:,:,i], L[i,j]) = kalman._filter_update(pred_state, observation) # Posterior Transition # p(s_t-1=i, s_t=j | y_1:t) \propto L_t(i,j) * p(s_t=j | s_t-1=i) * p(s_t-1=i | y_1:t-1) M = L.T + self.log_transmat.T + prev_state.M M = M.T - logsumexp(M) # p(s_t=j | y_1:t) = \sum_i p(s_t-1=i, s_t=j | y_1:t) state.M = logsumexp(M, axis=0) # p(s_t-1=i | s_t=j, y_1:t) = p(s_t-1=i, s_t=j | y_1:t) / p(s_t=j | y_1:t) W = np.exp(M - state.M) # Collapse step for j in xrange(self.n_models): # (state.m[:,j], state.P[:,:,j]) = self._collapse(gpb_[j].m, gpb_[j].P, W[:,j]) m, P = self._collapse(gpb_[j].m, gpb_[j].P, W[:,j], self.masks[j]) state._states[j] = KalmanState(mean=m, covariance=P) return state
def test_mixture_weight_init(): train_m_file = 'nltcs_2015-01-29_18-39-06/train.m.log' valid_m_file = 'nltcs_2015-01-29_18-39-06/valid.m.log' test_m_file = 'nltcs_2015-01-29_18-39-06/test.m.log' logging.basicConfig(level=logging.DEBUG) train = dataset.csv_2_numpy(train_m_file, path='', type='float32') valid = dataset.csv_2_numpy(valid_m_file, path='', type='float32') test = dataset.csv_2_numpy(test_m_file, path='', type='float32') k_components = train.shape[1] unif_weights = numpy.array([1 for i in range(k_components)]) unif_weights = unif_weights / unif_weights.sum() rand_weights = numpy.random.rand(k_components) rand_weights = rand_weights / rand_weights.sum() unif_mixture = logsumexp(train + numpy.log(unif_weights), axis=1).mean() rand_mixture = logsumexp(train + numpy.log(rand_weights), axis=1).mean() print('UNIF W LL', unif_mixture) print('RAND W LL', rand_mixture)
def marginalise(self, parameter, *args, **kwargs): """Marginalise over the specified dimension in parameter space.""" mode = kwargs.get('mode', 'internal') # index to marginalise over m = np.argwhere(np.array(self.param)==parameter)[0,0] import scipy.misc as spm samp = np.unique(self.par[parameter]['samples']) dp = samp[1]-samp[0] # Sum PDF along one axis and output the result in the required manner if mode=='internal': self.posterior = spm.logsumexp(self.posterior, axis=m) print self.posterior.shape # Clean up the parameter array self.param= np.delete(np.array(self.param), m) self.par[parameter]['marginalised'] = True print 'Marginalised over %s.'%parameter return 0 elif mode=='return': posterior = args[0] posterior = spm.logsumexp(posterior, axis=m) param = args[1] param = np.delete(np.array(param), m) print 'Marginalised over %s.'%parameter return posterior, param
def up_tree_pass(self,X,nodes): ''' Calculates prior probability of latent variables and combines prior probability of children to calculate posterior for the latent variable corresponding to node Parameters: ----------- X: numpy array of size 'n x m' Explanatory variables nodes: list of size equal number of nodes in HME List with all nodes of HME ''' self._prior(X) children = self.get_childrens(nodes) # check that all children are of the same type if len(set([e.node_type for e in children])) != 1: raise ValueError("Children nodes should have the same node type") # prior probabilities calculation for i,child_node in enumerate(children): if child_node.node_type == "expert": self.responsibilities[:,i] += child_node.weights elif child_node.node_type == "gate": self.responsibilities[:,i] += logsumexp(child_node.responsibilities, axis = 1) else: raise TypeError("Unidentified node type") #prevent underflow self.normaliser = logsumexp(self.responsibilities, axis = 1)
def plot_pred_lls(results): # Plot predictive log likelihoods homog_rates = np.zeros(K) for k in range(K): homog_rates[k] = (C_train==k).sum() / T_train homog_pll = 0 for k in range(K): homog_pll += -T_test * homog_rates[k] homog_pll += (C_test==k).sum() * np.log(homog_rates[k]) # Plot predictive log likelihoods relative to standard Poisson plt.figure() for i,result in enumerate(results): lls, plls, Weffs, Ps, Ls = result plls = plls[N_samples//2:] J = len(plls) avg_pll = -np.log(J) + logsumexp(plls) avg_pll = (avg_pll - homog_pll) / len(S_test) samples = np.random.choice(plls, size=(100, J), replace=True) pll_samples = logsumexp((samples - homog_pll)/len(S_test), axis=1) - np.log(J) std_hll = pll_samples.std() print "PLL: ", avg_pll, " +- ", std_hll plt.bar(i, avg_pll) plt.show()
def compute_LL(sample_read_counts, motif_len, gt_freqs, stutter_model): # Determine the most common frame across all samples frame_counts = collections.defaultdict(int) tot_read_counts = collections.defaultdict(int) for sample,read_counts in sample_read_counts.items(): for read,count in read_counts.items(): frame = read%motif_len frame += 0 if frame > 0 else motif_len frame_counts[frame] += count tot_read_counts[read] += count best_frame = sorted(frame_counts.items(), key = lambda x: x[1])[-1][0] # Filter any samples with out-of-frame reads valid_read_counts = [] tot_read_counts = collections.defaultdict(int) allele_set = set([]) for sample,read_counts in sample_read_counts.items(): all_in_frame = True for read,count in read_counts.items(): frame = read%motif_len frame += 0 if frame > 0 else motif_len if frame != best_frame: all_in_frame = False break if all_in_frame: valid_read_counts.append(read_counts) for read,count in read_counts.items(): allele_set.add(read) tot_read_counts[read] += count allele_sizes = sorted(list(allele_set)) # Size of allele for each index allele_indices = dict(map(reversed, enumerate(allele_sizes))) eff_coverage = 0 # Effective number of reads informative for stutter inference read_counts = [] # Array of dictionaries, where key = allele index and count = # such reads for a sample max_stutter = 0 for i in xrange(len(valid_read_counts)): sorted_sizes = sorted(valid_read_counts[i].keys()) max_stutter = max(max_stutter, sorted_sizes[-1]-sorted_sizes[0]) count_dict = dict([(allele_indices[x[0]], x[1]) for x in valid_read_counts[i].items()]) eff_coverage += sum(valid_read_counts[i].values())-1 read_counts.append(count_dict) stutter_size = min(5, max_stutter) num_stutters = 2*stutter_size + 1 log_stutter_probs = [] for size in xrange(-stutter_size, stutter_size+1): log_stutter_probs.append(stutter_model.get_log_stutter_size_prob(size)) log_stutter_probs = numpy.array(log_stutter_probs)-logsumexp(log_stutter_probs) log_gt_priors = [] for i in xrange(len(allele_sizes)): log_gt_priors.append(numpy.log(gt_freqs[allele_sizes[i]])) log_gt_priors = numpy.array(log_gt_priors)-logsumexp(log_gt_priors) print(numpy.exp(log_gt_priors)) print(numpy.exp(log_stutter_probs)) nalleles = len(allele_sizes) LL = calc_log_likelihood(log_gt_priors, log_stutter_probs, read_counts, nalleles, num_stutters, allele_sizes) return LL
def get_segment_quality_end(self, end_index: int, call_state: int) -> float: """Calculates the complement of phred-scaled posterior probability that a site marks the end of a segment. This is done by directly summing the probability of the following complementary paths in the log space: ... [end_index] [end_index+1] ... call_state => call_state (any state except for call_state) => (any state) Args: end_index: right breakpoint index of a segment call_state: segment call state index Returns: a phred-scaled probability """ assert 0 <= end_index < self.num_sites all_other_states_list = self.leave_one_out_state_lists[call_state] if end_index == self.num_sites - 1: logp = logsumexp(self.log_posterior_prob_tc[self.num_sites - 1, all_other_states_list]) else: complementary_paths_unnorm_logp = [(self.alpha_tc[end_index, end_state] + self.log_trans_tcc[end_index, end_state, next_state] + self.log_emission_tc[end_index + 1, next_state] + self.beta_tc[end_index + 1, end_state]) for end_state in all_other_states_list for next_state in self.all_states_list] complementary_paths_unnorm_logp.append((self.alpha_tc[end_index, call_state] + self.log_trans_tcc[end_index, call_state, call_state] + self.log_emission_tc[end_index + 1, call_state] + self.beta_tc[end_index + 1, call_state])) logp = logsumexp(np.asarray(complementary_paths_unnorm_logp)) - self.log_data_likelihood return logp_to_phred(logp)
def reference_backward(framelogprob, startprob, transmat): print '\n\n' log_transmat = np.log(transmat) log_startprob = np.log(startprob) bwdlattice = np.zeros_like(framelogprob) n_observations, n_components = framelogprob.shape work_buffer = np.zeros(n_components) for i in range(n_components): bwdlattice[n_observations - 1, i] = 0.0 for t in range(n_observations - 2, -1, -1): for i in range(n_components): for j in range(n_components): work_buffer[j] = log_transmat[i, j] + framelogprob[t + 1, j] + bwdlattice[t + 1, j] if (i == 0): print 'log_transmat[i, %d] '%j, log_transmat[i, j] print 'framelogprob[t + 1, %d]'%j, framelogprob[t + 1, j] print 'bwdlattice[t + 1, %d] '%j, bwdlattice[t + 1, j] if i == 0: print 'bwd[%d, %d] = logsumexp(%s) = %f' % (t, i, str(work_buffer), logsumexp(work_buffer)) bwdlattice[t, i] = logsumexp(work_buffer) return bwdlattice
def bayesFactors(model1, model2): """ Computes the Bayes factor for two competing models. The computation is based on Newton & Raftery 1994 (Eq. 13). Parameters ---------- model1 : PyAstronomy.funcFit.TraceAnalysis instance TraceAnalysis for the first model. model2 : PyAstronomy.funcFit.TraceAnalysis instance TraceAnalysis for the second model. Returns ------- BF : float The Bayes factor BF (neither log10(BF) nor ln(BF)). Note that a small value means that the first model is favored, i.e., BF=p(M2)/p(M1). """ logp1 = model1["deviance"] / (-2.) logp2 = model2["deviance"] / (-2.) # Given a number of numbers x1, x2, ... whose logarithms are given by l_i=ln(x_i) etc., # logsum calculates: ln(sum_i exp(l_i)) = ln(sum_i x_i) bf = numpy.exp(sm.logsumexp(-logp1) - numpy.log(len(logp1)) - (sm.logsumexp(-logp2) - numpy.log(len(logp2)))) return bf
def _calcgammamix(self,alpha,beta,observations): ''' Calculates 'gamma_mix'. Gamma_mix is a (TxNxK) numpy array, where gamma_mix[t][i][m] = the probability of being in state 'i' at time 't' with mixture 'm' given the full observation sequence. ''' gamma_mix = numpy.zeros((len(observations),self.n,self.m),dtype=self.precision) for t in xrange(len(observations)): for j in xrange(self.n): for m in xrange(self.m): alphabeta = [] for jj in xrange(self.n): alphabeta.append(alpha[t][jj] + beta[t][jj]) alphabeta = logsumexp(alphabeta) comp1 = alpha[t][j] + beta[t][j] - alphabeta # if alphabeta == 0: # comp1 = 0 # else: # comp1 = (alpha[t][j]*beta[t][j]) / alphabeta bjk_sum = [] for k in xrange(self.m): bjk_sum.append(self.w[j][k] + self.Bmix_map[j][k][t]) # if bjk_sum == 0: # comp2 = 0 # else: # comp2 = (self.w[j][m]*self.Bmix_map[j][m][t])/bjk_sum bjk_sum = logsumexp(bjk_sum) comp2 = self.w[j][m] + self.Bmix_map[j][m][t] - bjk_sum gamma_mix[t][j][m] = comp1 + comp2 return gamma_mix
def messages_backwards2(self): # this method is just for numerical testing # returns HSMM messages using HMM embedding. the way of the future! Al = np.log(self.trans_matrix) T, num_states = self.T, self.num_states betal = np.zeros((T,num_states)) betastarl = np.zeros((T,num_states)) starts = cumsum(self.rs,strict=True) ends = cumsum(self.rs,strict=False) foo = np.zeros((num_states,ends[-1])) for idx, row in enumerate(self.bwd_enter_rows): foo[idx,starts[idx]:ends[idx]] = row bar = np.zeros_like(self.hmm_bwd_trans_matrix) for start, end in zip(starts,ends): bar[start:end,start:end] = self.hmm_bwd_trans_matrix[start:end,start:end] pmess = np.zeros(ends[-1]) # betal[-1] is 0 for t in range(T-1,-1,-1): pmess += self.hmm_aBl[t] betastarl[t] = logsumexp(np.log(foo) + pmess, axis=1) betal[t-1] = logsumexp(Al + betastarl[t], axis=1) pmess = logsumexp(np.log(bar) + pmess, axis=1) pmess[ends-1] = np.logaddexp(pmess[ends-1],betal[t-1] + np.log(1-self.ps)) betal[-1] = 0. return betal, betastarl
def calculate_MI_bounded_discrete(X, Y, M_c, X_L, X_D): get_view_index = lambda which_column: X_L['column_partition']['assignments'][which_column] view_X = get_view_index(X) view_Y = get_view_index(Y) # independent if view_X != view_Y: return 0.0 # get cluster logps view_state = X_L['view_state'][view_X] cluster_logps = su.determine_cluster_crp_logps(view_state) cluster_crps = numpy.exp(cluster_logps) # get exp'ed values for multinomial n_clusters = len(cluster_crps) # get X values x_values = M_c['column_metadata'][X]['code_to_value'].values() # get Y values y_values = M_c['column_metadata'][Y]['code_to_value'].values() # get components models for each cluster for columns X and Y component_models_X = [0]*n_clusters component_models_Y = [0]*n_clusters for i in range(n_clusters): cluster_models = su.create_cluster_model_from_X_L(M_c, X_L, view_X, i) component_models_X[i] = cluster_models[X] component_models_Y[i] = cluster_models[Y] MI = 0.0 for x in x_values: for y in y_values: # calculate marginal logs Pxy = numpy.zeros(n_clusters) # P(x,y), Joint distribution Px = numpy.zeros(n_clusters) # P(x) Py = numpy.zeros(n_clusters) # P(y) # get logp of x and y in each cluster. add cluster logp's for j in range(n_clusters): Px[j] = component_models_X[j].calc_element_predictive_logp(x) Py[j] = component_models_Y[j].calc_element_predictive_logp(y) Pxy[j] = Px[j] + Py[j] + cluster_logps[j] # \sum_c P(x|c)P(y|c)P(c), Joint distribution Px[j] += cluster_logps[j] # \sum_c P(x|c)P(c) Py[j] += cluster_logps[j] # \sum_c P(y|c)P(c) # sum over clusters Px = logsumexp(Px) Py = logsumexp(Py) Pxy = logsumexp(Pxy) MI += numpy.exp(Pxy)*(Pxy - (Px + Py)) # ignore MI < 0 if MI <= 0.0: MI = 0.0 return MI
def ais(self, N_samples=100, B=1000, steps_per_B=1, verbose=True, full_output=False, callback=None): """ Since Gibbs sampling as a function of temperature is implemented, we can use AIS to approximate the marginal likelihood of the model. """ # We use a linear schedule by default betas = np.linspace(0, 1, B) print "Estimating marginal likelihood with AIS" lw = np.zeros(N_samples) for m in progprint_xrange(N_samples): # Initialize the model with a draw from the prior self.initialize_from_prior() # Keep track of the log of the m-th weight # It starts at zero because the prior is assumed to be normalized lw[m] = 0.0 # Sample the intermediate distributions for b in xrange(1,B): if verbose: sys.stdout.write("M: %d\tBeta: %.3f \r" % (m,betas[b])) sys.stdout.flush() # Compute the ratio of this sample under this distribution # and the previous distribution. The difference is added # to the log weight curr_lp = self.log_probability(temperature=betas[b]) prev_lp = self.log_probability(temperature=betas[b-1]) lw[m] += curr_lp - prev_lp # Sample the model at temperature betas[b] # Take some number of steps per beta in hopes that # the Markov chain will reach equilibrium. for s in range(steps_per_B): self.collapsed_resample_model(temperature=betas[b]) # Call the given callback if callback: callback(self, m, b) if verbose: print "" print "W: %f" % lw[m] # Compute the mean of the weights to get an estimate of the normalization constant log_Z = -np.log(N_samples) + logsumexp(lw) # Use bootstrap to compute standard error subsamples = np.random.choice(lw, size=(100, N_samples), replace=True) log_Z_subsamples = logsumexp(subsamples, axis=1) - np.log(N_samples) std_log_Z = log_Z_subsamples.std() if full_output: return log_Z, std_log_Z, lw else: return log_Z, std_log_Z
def ibis(actions, rewards, choices, idx_blocks, subj_idx, apply_rep_bias, apply_weber_decision_noise, curiosity_bias, show_progress, temperature, alpha_unchosen): assert(2 not in actions); assert(0 in actions); assert(1 in actions) actions = np.asarray(actions, dtype=np.intc) rewards = np.ascontiguousarray(rewards) choices = np.asarray(choices, dtype = np.intc) idx_blocks = np.asarray(idx_blocks, dtype=np.intc) nb_samples = 1000 T = actions.shape[0] upp_bound_eta = 10. # sample initialisation if (apply_rep_bias or curiosity_bias) and apply_weber_decision_noise == 0: samples = np.random.rand(nb_samples, 4) if temperature: upp_bound_beta = np.sqrt(6)/(np.pi * 5) else: upp_bound_beta = 2. samples[:, 2] = np.random.rand(nb_samples) * upp_bound_beta samples[:, 3] = upp_bound_eta * (np.random.rand(nb_samples) * 2. - 1.) elif apply_weber_decision_noise == 0: samples = np.random.rand(nb_samples, 3) if temperature: upp_bound_beta = np.sqrt(6)/(np.pi * 5) else: upp_bound_beta = 2. samples[:, 2] = np.random.rand(nb_samples) * upp_bound_beta elif apply_weber_decision_noise ==1 : if apply_rep_bias: samples = np.random.rand(nb_samples,5) if temperature: upp_bound_beta = np.sqrt(6)/(np.pi * 5) else: upp_bound_beta = 2. samples[:, 4] = upp_bound_eta * (np.random.rand(nb_samples) * 2. - 1.) else: samples = np.random.rand(nb_samples,4) if temperature: upp_bound_beta = np.sqrt(6)/(np.pi * 5) else: upp_bound_beta = 2. upp_bound_k = 10 samples[:, 2] = np.random.rand(nb_samples) * upp_bound_beta # bound on the beta samples[:, 3] = np.random.rand(nb_samples) * upp_bound_k if alpha_unchosen >= 0 and alpha_unchosen <= 1: samples[:,1] = alpha_unchosen sample_alpha_u = False else: sample_alpha_u = True Q_samples = np.zeros([nb_samples, 2]) prev_action = np.zeros(nb_samples) - 1 # ibis param esslist = np.zeros(T) log_weights = np.zeros(nb_samples) weights_a = np.zeros(nb_samples) p_loglkd = np.zeros(nb_samples) loglkd = np.zeros(nb_samples) marg_loglkd = 0 coefficient = .5 marg_loglkd_l = np.zeros(T) acceptance_l = [] # move step param if apply_rep_bias and apply_weber_decision_noise: move_samples = np.zeros([nb_samples, 5]) elif apply_rep_bias or curiosity_bias: move_samples = np.zeros([nb_samples, 4]) elif apply_weber_decision_noise: move_samples = np.zeros([nb_samples, 4]) else: move_samples = np.zeros([nb_samples, 3]) move_p_loglkd = np.zeros(nb_samples) Q_samples_move = np.zeros([nb_samples, 2]) prev_action_move = np.zeros(nb_samples) mean_Q = np.zeros([T, 2]) prediction_err = np.zeros(nb_samples) prediction_err[:] = -np.inf prediction_err_move = np.zeros(nb_samples) if show_progress : plt.figure(figsize=(15,9)); plt.suptitle("noiseless rl", fontsize=14); plt.ion() # loop for t_idx in range(T): if (t_idx+1) % 10 == 0 : sys.stdout.write(' ' + str(t_idx+1) + ' '); print 'marg_loglkd ' + str(marg_loglkd); if (t_idx+1) % 100 == 0: print ('\n') assert(len(np.unique(prev_action)) == 1) # update step weights_a[:] = log_weights if idx_blocks[t_idx]: Q_samples[:] = 0.5 prev_action[:] = -1 # loop over samples for n_idx in range(nb_samples): alpha_c = samples[n_idx, 0] alpha_u = samples[n_idx, 1] if temperature: beta = 1./samples[n_idx, 2] else: beta = 10**samples[n_idx, 2] if apply_rep_bias or curiosity_bias: eta = samples[n_idx, -1] if apply_weber_decision_noise: k_beta = samples[n_idx,3] # reweighting if choices[t_idx] == 1 and prev_action[n_idx] != -1 and (apply_rep_bias==1 or curiosity_bias) and apply_weber_decision_noise == 0: if apply_rep_bias: value = 1./(1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) - np.sign(prev_action[n_idx] - .5) * eta)) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] elif curiosity_bias: try: count_samples = t_idx - 1 - np.where(actions[:t_idx] != actions[t_idx - 1])[0][-1] except: count_samples = t_idx assert(count_samples > 0) value = 1./(1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) + np.sign(prev_action[n_idx] - .5) * eta * count_samples)) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] elif choices[t_idx] == 1 and apply_weber_decision_noise == 0: value = 1./(1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]))) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] elif choices[t_idx] == 1 and apply_weber_decision_noise == 1 and apply_rep_bias == 0: beta_modified = beta / (1. + k_beta * prediction_err[n_idx]) value = 1./(1. + np.exp(beta_modified * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]))) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] elif choices[t_idx] == 1 and apply_weber_decision_noise == 1 and apply_rep_bias == 1: beta_modified = beta / (1. + k_beta * prediction_err[n_idx]) value = 1./(1. + np.exp(beta_modified * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) - np.sign(prev_action[n_idx] - .5) * eta)) loglkd[n_idx] = np.log((value**actions[t_idx]) * (1 - value)**((1 - actions[t_idx]))) prev_action[n_idx] = actions[t_idx] else: value = 1. loglkd[n_idx] = 0. if np.isnan(loglkd[n_idx]): print t_idx print n_idx print beta print value raise Exception p_loglkd[n_idx] = p_loglkd[n_idx] + loglkd[n_idx] log_weights[n_idx] = log_weights[n_idx] + loglkd[n_idx] # update step if actions[t_idx] == 0: prediction_err[n_idx] = np.abs(Q_samples[n_idx, 0] - rewards[0, t_idx]) Q_samples[n_idx, 0] = (1 - alpha_c) * Q_samples[n_idx, 0] + alpha_c * rewards[0, t_idx] if not curiosity_bias: Q_samples[n_idx, 1] = (1 - alpha_u) * Q_samples[n_idx, 1] + alpha_u * rewards[1, t_idx] else: prediction_err[n_idx] = np.abs(Q_samples[n_idx, 1] - rewards[1, t_idx]) if not curiosity_bias: Q_samples[n_idx, 0] = (1 - alpha_u) * Q_samples[n_idx, 0] + alpha_u * rewards[0, t_idx] Q_samples[n_idx, 1] = (1 - alpha_c) * Q_samples[n_idx, 1] + alpha_c * rewards[1, t_idx] marg_loglkd += logsumexp(weights_a + loglkd) - logsumexp(weights_a) marg_loglkd_l[t_idx] = marg_loglkd ess = np.exp(2 * logsumexp(log_weights) - logsumexp(2 * log_weights)) esslist[t_idx] = ess weights_a[:] = uf.to_normalized_weights(log_weights) mean_Q[t_idx] = np.sum((Q_samples.T * weights_a).T, axis=0) # move step if ess < coefficient * nb_samples: nb_acceptance = 0. if not sample_alpha_u: samples_tmp = np.delete(samples, 1, axis=1) mu_p = np.sum(samples_tmp.T * weights_a, axis=1) Sigma_p = np.dot((samples_tmp - mu_p).T * weights_a, (samples_tmp - mu_p)) else: mu_p = np.sum(samples.T * weights_a, axis=1) Sigma_p = np.dot((samples - mu_p).T * weights_a, (samples - mu_p)) idxTrajectories = uf.stratified_resampling(weights_a) for n_idx in range(nb_samples): idx_traj = idxTrajectories[n_idx] while True: sample_cand = np.array(samples[idx_traj]) sample_p = multi_norm(mu_p, Sigma_p) sample_p_copy = np.array(sample_p) if (not sample_alpha_u) and apply_rep_bias: sample_p = np.array([sample_p[0], alpha_unchosen, sample_p[1], sample_p[2]]) sample_cand = np.delete(sample_cand, 1) elif not sample_alpha_u: sample_p = np.array([sample_p[0], alpha_unchosen, sample_p[1]]) sample_cand = np.delete(sample_cand, 1) if not apply_rep_bias and not apply_weber_decision_noise: if sample_p[0] >= 0 and sample_p[0] <= 1 and sample_p[1] >= 0 and sample_p[1] <= 1 and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta: break elif not apply_rep_bias and apply_weber_decision_noise: if sample_p[0] >= 0 and sample_p[0] <= 1 and sample_p[1] >= 0 and sample_p[1] <= 1 \ and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > 0 and sample_p[3] <= upp_bound_k: break elif apply_rep_bias and not apply_weber_decision_noise: if sample_p[0] >= 0 and sample_p[0] <= 1 and sample_p[1] >= 0 and sample_p[1] <= 1 \ and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > -upp_bound_eta and sample_p[3] < upp_bound_eta: break else: if sample_p[0] >= 0 and sample_p[0] <= 1 and sample_p[1] >= 0 and sample_p[1] <= 1 \ and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > 0 and sample_p[3] < upp_bound_k \ and sample_p[-1] > -upp_bound_eta and sample_p[-1] < upp_bound_eta: break [loglkd_prop, Q_prop, prev_action_prop, prediction_err_prop] = get_loglikelihood(sample_p, rewards, actions, choices, idx_blocks, t_idx + 1, apply_rep_bias, apply_weber_decision_noise, curiosity_bias, temperature) log_ratio = loglkd_prop - p_loglkd[idx_traj] \ + get_logtruncnorm(sample_cand, mu_p, Sigma_p) - get_logtruncnorm(sample_p_copy, mu_p, Sigma_p) log_ratio = np.minimum(log_ratio, 0) if (np.log(np.random.rand()) < log_ratio): nb_acceptance += 1. move_samples[n_idx] = sample_p move_p_loglkd[n_idx] = loglkd_prop Q_samples_move[n_idx] = Q_prop prediction_err_move[n_idx] = prediction_err_prop else: move_samples[n_idx] = samples[idx_traj] move_p_loglkd[n_idx] = p_loglkd[idx_traj] Q_samples_move[n_idx] = Q_samples[idx_traj] prediction_err_move[n_idx] = prediction_err[idx_traj] print 'acceptance ratio %s'%str(nb_acceptance/nb_samples) assert(prev_action_prop == prev_action[0]) acceptance_l.append(nb_acceptance/nb_samples) # move samples samples[:] = move_samples p_loglkd[:] = move_p_loglkd log_weights[:] = 0. Q_samples[:] = Q_samples_move prediction_err[:] = prediction_err_move if show_progress and t_idx%10==0 : weights_a[:] = uf.to_normalized_weights(log_weights) plt.subplot(3,2,1) plt.plot(range(t_idx), mean_Q[:t_idx], 'm', linewidth=2); plt.hold(False) plt.xlabel('trials') plt.ylabel('Q values') if apply_rep_bias == 1: mean_rep = np.sum(weights_a * samples[:,3]) std_rep = np.sqrt(np.sum(weights_a * samples[:,3]**2) - mean_rep**2) plt.subplot(3,2,2) x = np.linspace(-2.,2.,5000) plt.plot(x, norm.pdf(x, mean_rep, std_rep), 'g'); plt.hold(True) plt.plot([mean_rep, mean_rep], plt.gca().get_ylim(),'g', linewidth=2) plt.hold(False) plt.xlabel('trials') plt.ylabel('rep param') if temperature: mean_beta = np.sum(weights_a * 1./samples[:, 2]) std_beta = np.sqrt(np.sum(weights_a * ((1./samples[:,2])**2)) - mean_beta**2) else: mean_beta = np.sum(weights_a * 10**samples[:, 2]) std_beta = np.sqrt(np.sum(weights_a * ((10**samples[:,2])**2)) - mean_beta**2) if apply_weber_decision_noise : mean_k = np.sum(weights_a * samples[:,3]) std_k = np.sqrt(np.sum(weights_a * (samples[:,3]**2)) - mean_k**2) plt.subplot(3,2,3) x = np.linspace(0.01,200.,5000) plt.plot(x, norm.pdf(x, mean_beta, std_beta), 'g', linewidth=2); plt.hold(True) plt.plot([mean_beta, mean_beta], plt.gca().get_ylim(), 'g', linewidth=2) plt.hold(False) plt.xlabel('beta softmax') plt.ylabel('pdf') mean_alpha_0 = np.sum(weights_a * samples[:, 0]) std_alpha_0 = np.sqrt(np.sum(weights_a * (samples[:, 0]**2)) - mean_alpha_0**2) mean_alpha_1 = np.sum(weights_a * samples[:, 1]) std_alpha_1 = np.sqrt(np.sum(weights_a * (samples[:, 1]**2)) - mean_alpha_1**2) plt.subplot(3,2,4) x = np.linspace(0.,1.,5000) plt.plot(x, norm.pdf(x, mean_alpha_0, std_alpha_0), 'm', linewidth=2); plt.hold(True) plt.plot([mean_alpha_0, mean_alpha_0], plt.gca().get_ylim(), 'm', linewidth=2) plt.plot(x, norm.pdf(x, mean_alpha_1, std_alpha_1), 'c', linewidth=2); plt.plot([mean_alpha_1, mean_alpha_1], plt.gca().get_ylim(), 'c', linewidth=2) plt.hold(False) plt.xlabel('learning rate chosen (majenta) an unchosen (cyan)') plt.ylabel('pdf') plt.subplot(3,2,5) plt.plot(range(t_idx), esslist[:t_idx], 'b', linewidth=2); plt.hold(True) plt.plot(plt.gca().get_xlim(), [nb_samples/2, nb_samples/2],'b--', linewidth=2) plt.axis([0, t_idx-1, 0, nb_samples]) plt.hold(False) plt.xlabel('trials') plt.ylabel('ess') # modified here add the plot for k if apply_weber_decision_noise : # added by Alex plt.subplot(3,2,6) x = np.linspace(0.01,10.,5000) plt.plot(x, norm.pdf(x, mean_k, std_k), 'k', linewidth=2); plt.hold(True) plt.plot([mean_k, mean_k], plt.gca().get_ylim(), 'k', linewidth=2) plt.hold(False) plt.xlabel('scaling parameter for softmax 1/[0 1]') plt.ylabel('pdf') plt.draw() plt.show() plt.pause(0.05) return [samples, mean_Q, esslist, acceptance_l, log_weights, p_loglkd, marg_loglkd_l]
def lnGauss_mixture(x, mix): w = mix[:, 0] mu = mix[:, 1] sigma = mix[:, 2] return logsumexp(lnGauss(x, mu, sigma), b=w)
def test_logsumexp(): # make sure logsumexp can be imported from either scipy.misc or # scipy.special with suppress_warnings() as sup: sup.filter(DeprecationWarning, "`logsumexp` is deprecated") assert_allclose(logsumexp([0, 1]), sc_logsumexp([0, 1]), atol=1e-16)
def _do_forward_pass(self, framelogprob): n_samples, n_components = framelogprob.shape fwdlattice = np.zeros((n_samples, n_components)) _hmmc._forward(n_samples, n_components, log_mask_zero(self.startprob_), log_mask_zero(self.transmat_), framelogprob, fwdlattice) return logsumexp(fwdlattice[-1]), fwdlattice
all_df = pd.DataFrame() p = np.zeros((total_no_iters - 2, n_trial)) + 0.0 # initialize solution_XX = np.dstack( [i[lam_no][0][:total_no_iters, :].T for i in dat['solution']]) E_true = np.array([[ pdist(solution_XX[:, :i, j].T).min() for i in range(2, total_no_iters) ] for j in range(n_trial)]) alpha = 10.0 for it, trial in enumerate(range(n_trial)): for n, tr in enumerate(range(2, total_no_iters)): true_set = solution_XX[:, :tr, trial].T samples = np.random.uniform(size=(10000, dim)) samples = samples * (bounds[:, 1] - bounds[:, 0]) + bounds[:, 0] E_samp = (cdist(true_set[:-1], samples).min(axis=0)) log_prob = alpha * E_true[trial, n] - logsumexp(alpha * E_samp - np.log(10000)) p[n, it] = -log_prob for no_iters in range(5, total_no_iters + 1): mins_df = pd.DataFrame(index=np.arange(n_trial), columns=names) # no_iters = 20 true_lam = lam_no l_ego = get_Ls(names[true_lam], no_iters) # ax.set_title('$\Lambda^*={}$\n'.format(names[true_lam])) ## dat['solution'][true_lam, coor_or_ei, no_samples] solution_X = np.dstack( [i[true_lam][0][:no_iters, :].T for i in dat['solution']]) solution_y = np.dstack( [i[true_lam][1][:no_iters] for i in dat['solution']])
def expected_log_likelihood(self, x): lognorm = logsumexp(self.weights._alpha_mf) return sum( np.exp(a - lognorm) * c.expected_log_likelihood(x) for a, c in zip(self.weights._alpha_mf, self.components))
def compute_predictive_ll(S_test, S_train, true_model=None, bfgs_model=None, sgd_models=None, gibbs_samples=None, vb_models=None, svi_models=None): """ Compute the predictive log likelihood :return: """ plls = {} # Compute homogeneous pred ll T = S_train.shape[0] T_test = S_test.shape[0] lam_homog = S_train.sum(axis=0) / float(T) plls['homog'] = 0 plls['homog'] += -gammaln(S_test+1).sum() plls['homog'] += (-lam_homog * T_test).sum() plls['homog'] += (S_test.sum(axis=0) * np.log(lam_homog)).sum() if true_model is not None: plls['true'] = true_model.heldout_log_likelihood(S_test) if bfgs_model is not None: assert isinstance(bfgs_model, DiscreteTimeStandardHawkesModel) plls['bfgs'] = bfgs_model.heldout_log_likelihood(S_test) if sgd_models is not None: assert isinstance(sgd_models, list) plls['sgd'] = np.zeros(len(sgd_models)) for i,sgd_model in enumerate(sgd_models): plls['sgd'] = sgd_model.heldout_log_likelihood(S_test) if gibbs_samples is not None: print "Computing pred ll for Gibbs" # Compute log(E[pred likelihood]) on second half of samplese offset = len(gibbs_samples) // 2 # Preconvolve with the Gibbs model's basis F_test = gibbs_samples[0].basis.convolve_with_basis(S_test) plls['gibbs'] = [] for s in gibbs_samples[offset:]: plls['gibbs'].append(s.heldout_log_likelihood(S_test, F=F_test)) # Convert to numpy array plls['gibbs'] = np.array(plls['gibbs']) if vb_models is not None: print "Computing pred ll for VB" # Compute predictive likelihood over samples from VB model N_models = len(vb_models) N_samples = 100 # Preconvolve with the VB model's basis F_test = vb_models[0].basis.convolve_with_basis(S_test) vb_plls = np.zeros((N_models, N_samples)) for i, vb_model in enumerate(vb_models): for j in xrange(N_samples): vb_model.resample_from_mf() vb_plls[i,j] = vb_model.heldout_log_likelihood(S_test, F=F_test) # Compute the log of the average predicted likelihood plls['vb'] = -np.log(N_samples) + logsumexp(vb_plls, axis=1) if svi_models is not None: print "Computing predictive likelihood for SVI models" # Compute predictive likelihood over samples from VB model N_models = len(svi_models) N_samples = 1 # Preconvolve with the VB model's basis F_test = svi_models[0].basis.convolve_with_basis(S_test) svi_plls = np.zeros((N_models, N_samples)) for i, svi_model in enumerate(svi_models): # print "Computing pred ll for SVI iteration ", i for j in xrange(N_samples): svi_model.resample_from_mf() svi_plls[i,j] = svi_model.heldout_log_likelihood(S_test, F=F_test) plls['svi'] = -np.log(N_samples) + logsumexp(svi_plls, axis=1) return plls
def __call__(self, pianoroll): lls = [self.evaluator(pianoroll) for _ in range(self.ensemble_size)] return logsumexp(lls, b=1. / len(lls), axis=0)
def _get_assign_probs(self): return np.exp( self.log_assignment # num_sublex x num_clusters x num_symbols - spm.logsumexp(self.log_assignment, axis=1)[:, np.newaxis, :])
def _update_log_like(self): self.log_like = spm.logsumexp(self.log_assignment, axis=1)
if r > A[current_state][current_state]: current_state ^= 1 rolls = np.array(rolls) - 1 die = np.array(die) K = 2 T = rolls.shape[0] Alpha = np.empty((K, T)) # Forward Alpha[:, 0] = np.log(0.5) + np.log(B[:, rolls[0]]) # Alpha[:, 0] = 0.5 * B[:,rolls[0]] for t in range(1, T): for k in range(K): Alpha[k, t] = np.log(B[k, rolls[t]]) + logsumexp( [Alpha[i, t - 1] + np.log(A[i, k]) for i in range(K)]) # Alpha[k,t] = B[k, rolls[t]] * np.sum([Alpha[i,t-1] * A[i, k] for i in range(K)]) normalized_alpha = Alpha[1, :] / np.sum(Alpha, axis=0) x = np.arange(0, N) plt.plot(x, normalized_alpha) plt.plot(x, die) plt.show() # Backward Beta = np.empty((K, T)) Beta[:, T - 1] = 1 for t in range(T - 2, 0, -1): for k in range(K): Beta[k, t] = logsumexp([ Beta[i, t + 1] + np.log(B[k, rolls[t + 1]]) + np.log(A[i, k])
def log_likelihood(self): if not hasattr(self, '_normalizer') or self._normalizer is None: scores = self._compute_scores() self._normalizer = logsumexp(scores[~np.isnan(self.data).any(1)], axis=1).sum() return self._normalizer
def _psislw(lw, reff): """Pareto smoothed importance sampling (PSIS). Parameters ---------- lw : array Array of size (n_samples, n_observations) reff : float relative MCMC efficiency, `effective_n / n` Returns ------- lw_out : array Smoothed log weights kss : array Pareto tail indices """ n, m = lw.shape lw_out = np.copy(lw, order='F') kss = np.empty(m) # precalculate constants cutoff_ind = -int(np.ceil(min(n / 0.5, 3 * (n / reff)**0.5))) - 1 cutoffmin = np.log(np.finfo(float).tiny) k_min = 1. / 3 # loop over sets of log weights for i, x in enumerate(lw_out.T): # improve numerical accuracy x -= np.max(x) # sort the array x_sort_ind = np.argsort(x) # divide log weights into body and right tail xcutoff = max(x[x_sort_ind[cutoff_ind]], cutoffmin) expxcutoff = np.exp(xcutoff) tailinds, = np.where(x > xcutoff) x2 = x[tailinds] n2 = len(x2) if n2 <= 4: # not enough tail samples for gpdfit k = np.inf else: # order of tail samples x2si = np.argsort(x2) # fit generalized Pareto distribution to the right tail samples x2 = np.exp(x2) - expxcutoff k, sigma = _gpdfit(x2[x2si]) if k >= k_min and not np.isinf(k): # no smoothing if short tail or GPD fit failed # compute ordered statistic for the fit sti = np.arange(0.5, n2) / n2 qq = _gpinv(sti, k, sigma) qq = np.log(qq + expxcutoff) # place the smoothed tail into the output array x[tailinds[x2si]] = qq # truncate smoothed values to the largest raw weight 0 x[x > 0] = 0 # renormalize weights x -= logsumexp(x) # store tail index k kss[i] = k return lw_out, kss
def pmf(self, state): exponent = self.Q_U(state) / self.temperature return np.exp(exponent - logsumexp(exponent))
def logadd(lp, lq): return logsumexp([lp, lq])
def loo(trace, model=None, pointwise=False, reff=None, progressbar=False): """Calculates leave-one-out (LOO) cross-validation for out of sample predictive model fit, following Vehtari et al. (2015). Cross-validation is computed using Pareto-smoothed importance sampling (PSIS). Parameters ---------- trace : result of MCMC run model : PyMC Model Optional model. Default None, taken from context. pointwise: bool if True the pointwise predictive accuracy will be returned. Default False reff : float relative MCMC efficiency, `effective_n / n` i.e. number of effective samples divided by the number of actual samples. Computed from trace by default. progressbar: bool Whether or not to display a progress bar in the command line. The bar shows the percentage of completion, the evaluation speed, and the estimated time to completion Returns ------- namedtuple with the following elements: loo: approximated Leave-one-out cross-validation loo_se: standard error of loo p_loo: effective number of parameters loo_i: array of pointwise predictive accuracy, only if pointwise True """ model = modelcontext(model) if reff is None: if trace.nchains == 1: reff = 1. else: eff = pm.effective_n(trace) eff_ave = pm.stats.dict2pd(eff, 'eff').mean() samples = len(trace) * trace.nchains reff = eff_ave / samples log_py = _log_post_trace(trace, model, progressbar=progressbar) if log_py.size == 0: raise ValueError('The model does not contain observed values.') lw, ks = _psislw(-log_py, reff) lw += log_py if np.any(ks > 0.7): warnings.warn("""Estimated shape parameter of Pareto distribution is greater than 0.7 for one or more samples. You should consider using a more robust model, this is because importance sampling is less likely to work well if the marginal posterior and LOO posterior are very different. This is more likely to happen with a non-robust model and highly influential observations.""") loo_lppd_i = -2 * logsumexp(lw, axis=0) loo_lppd = loo_lppd_i.sum() loo_lppd_se = (len(loo_lppd_i) * np.var(loo_lppd_i))**0.5 lppd = np.sum(logsumexp(log_py, axis=0, b=1. / log_py.shape[0])) p_loo = lppd + (0.5 * loo_lppd) if pointwise: LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO, LOO_i') return LOO_r(loo_lppd, loo_lppd_se, p_loo, loo_lppd_i) else: LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO') return LOO_r(loo_lppd, loo_lppd_se, p_loo)
def loo(trace, model=None, pointwise=False, progressbar=False): """Calculates leave-one-out (LOO) cross-validation for out of sample predictive model fit, following Vehtari et al. (2015). Cross-validation is computed using Pareto-smoothed importance sampling (PSIS). Parameters ---------- trace : result of MCMC run model : PyMC Model Optional model. Default None, taken from context. pointwise: bool if True the pointwise predictive accuracy will be returned. Default False progressbar: bool Whether or not to display a progress bar in the command line. The bar shows the percentage of completion, the evaluation speed, and the estimated time to completion Returns ------- namedtuple with the following elements: loo: approximated Leave-one-out cross-validation loo_se: standard error of loo p_loo: effective number of parameters loo_i: and array of the pointwise predictive accuracy, only if pointwise True """ model = modelcontext(model) log_py = _log_post_trace(trace, model, progressbar=progressbar) if log_py.size == 0: raise ValueError('The model does not contain observed values.') # Importance ratios r = np.exp(-log_py) r_sorted = np.sort(r, axis=0) # Extract largest 20% of importance ratios and fit generalized Pareto to each # (returns tuple with shape, location, scale) q80 = int(len(log_py) * 0.8) pareto_fit = np.apply_along_axis( lambda x: pareto.fit(x, floc=0), 0, r_sorted[q80:]) if np.any(pareto_fit[0] > 0.7): warnings.warn("""Estimated shape parameter of Pareto distribution is greater than 0.7 for one or more samples. You should consider using a more robust model, this is because importance sampling is less likely to work well if the marginal posterior and LOO posterior are very different. This is more likely to happen with a non-robust model and highly influential observations.""") elif np.any(pareto_fit[0] > 0.5): warnings.warn("""Estimated shape parameter of Pareto distribution is greater than 0.5 for one or more samples. This may indicate that the variance of the Pareto smoothed importance sampling estimate is very large.""") # Calculate expected values of the order statistics of the fitted Pareto S = len(r_sorted) M = S - q80 z = (np.arange(M) + 0.5) / M expvals = map(lambda x: pareto.ppf(z, x[0], scale=x[2]), pareto_fit.T) # Replace importance ratios with order statistics of fitted Pareto r_sorted[q80:] = np.vstack(expvals).T # Unsort ratios (within columns) before using them as weights r_new = np.array([r[np.argsort(i)] for r, i in zip(r_sorted.T, np.argsort(r.T, axis=1))]).T # Truncate weights to guarantee finite variance w = np.minimum(r_new, r_new.mean(axis=0) * S**0.75) loo_lppd_i = - 2. * logsumexp(log_py, axis=0, b=w / np.sum(w, axis=0)) loo_lppd_se = np.sqrt(len(loo_lppd_i) * np.var(loo_lppd_i)) loo_lppd = np.sum(loo_lppd_i) lppd = np.sum(logsumexp(log_py, axis=0, b=1. / log_py.shape[0])) p_loo = lppd + (0.5 * loo_lppd) if pointwise: LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO, LOO_i') return LOO_r(loo_lppd, loo_lppd_se, p_loo, loo_lppd_i) else: LOO_r = namedtuple('LOO_r', 'LOO, LOO_se, p_LOO') return LOO_r(loo_lppd, loo_lppd_se, p_loo)
def log_marginal_likelihood(self, X): """Calculate log marginal likehood. Arguments: X (structured_array, shape = (n_samples, 1)): Input data where each row is a sample stored as a tuple of data entries. Returns: log_marginal_likelihood (float) """ total = 0.0 # initialize my gcov matrix dict with respect to z my_gcovz = {} keys = ['age', 'hr'] for z in range(self.n_components): cov = np.array([[ self.gaussian_cov['age']['age'][z], self.gaussian_cov['age']['hr'][z] ], [ self.gaussian_cov['hr']['age'][z], self.gaussian_cov['hr']['hr'][z] ]]) my_gcovz[z] = cov # initialize my mu matrix dict with respect to z my_muz = {} for z in range(self.n_components): my_muz[z] = np.array( [self.gaussian_mean['age'][z], self.gaussian_mean['hr'][z]]).astype(np.float_) #print (my_muz) for x in X: #x1,x2,x3,x4,x5,x6,x7,x8,x9,x10 = x[0],x[1],x[2],x[3],x[4],x[5],x[6],x[7],x[8],x[9] xg, xp, xb, xc = np.array([x[0], x[1]]).astype( np.float), x[2], [x[3], x[4]], [x[5], x[6], x[7], x[8], x[9]] #print (x1,x2,x3,x4,x5,x6,x7,x8,x9,x10) #look at page for order of terms fir, sec, thi, fou, fiv, six, sev, eig, nin, ten, ele, twe = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 firz,secz,thiz,fouz,fivz,sixz,sevz,eigz,ninz,tenz,elez,twez = [],[],[],[],[],[],[],[],[],[],[],[] for z in range(self.n_components): #Gaussian exp term adj_xg = np.array(xg - my_muz[z])[:, np.newaxis] firz.append( (-1 / 2) * np.transpose(adj_xg).dot(np.linalg.inv(my_gcovz[z])).dot( (adj_xg))) #print (np.array(firz).shape) #Gaussian log term thiz.append( (-1 / 2) * np.log(np.linalg.det(2 * np.pi * my_gcovz[z]))) #Poisson exp term secz.append(-self.poisson['edu-num'][z]) #Poisson log term elez.append(xp * np.log(self.poisson['edu-num'][z]) - np.log(factorial(xp))) #First bernoulli dist (income) log term if xb[0] == '<=50K': fouz.append(np.log(self.bernoulli['income']['<=50K'][z])) else: fouz.append(np.log(self.bernoulli['income']['>50K'][z])) #Second bernoulli dist (sex) log term if xb[1] == 'Female': fivz.append(np.log(self.bernoulli['sex']['Female'][z])) else: fivz.append(np.log(self.bernoulli['sex']['Male'][z])) #Multinoulli workclass sixz.append( np.log(self.multinomial['workclass'][xc[0].decode("utf-8")] [z])) #Multinoulli marital sevz.append( np.log( self.multinomial['marital'][xc[1].decode("utf-8")][z])) #Multinoulli occup eigz.append( np.log( self.multinomial['occup'][xc[2].decode("utf-8")][z])) #Multinoulli relation ninz.append( np.log(self.multinomial['relation'][xc[3].decode("utf-8")] [z])) #Multinoulli country tenz.append( np.log( self.multinomial['country'][xc[4].decode("utf-8")][z])) #theta zM twez.append(np.log(self.component[z])) #print (np.array(firz).squeeze(),np.array(secz).shape,np.array(thiz).shape,np.array(fouz).shape,np.array(fivz).shape,np.array(sixz).shape,np.array(sevz).shape,np.array(eigz).shape,np.array(ninz).shape,np.array(tenz).shape,np.array(elez).shape) myarr = np.array(firz).squeeze() + np.array(secz) + np.array( thiz) + np.array(fouz) + np.array(fivz) + np.array( sixz) + np.array(sevz) + np.array(eigz) + np.array( ninz) + np.array(tenz) + np.array(elez) + np.array( twez) total = total + logsumexp(myarr) #print (fir,sec,thi,fou,fiv,six,sev,eig,nin,ten,ele) #total = total + fir+sec+thi+fou+fiv+six+sev+eig+nin+ten+ele #print ("Log marginal likelihood:",total) return total
def simple_test(X, X_valid, n_epochs, n_batch, init_lr, n_layers=5, vis_freq=100, n_samples=100, z_std=1.0): '''z_std = 1.0 gives the correct answer but other values might be good for debugging and analysis purposes.''' N, D = X.shape N_valid = X_valid.shape[0] assert (X_valid.shape == (N_valid, D)) y_valid = np.zeros_like(X_valid) num_params = 2 max_x = np.max(X) WL_init = 1.0 layers = ign.init_ign_LU(n_layers, num_params, WL_val=WL_init) phi_shared = make_shared_dict(layers, '%d%s') ll_primary_f = lambda X, y, w: loglik_primary_f(X, y, w, max_x) logprior_f_corrected = lambda theta: logprior_f(theta, max_x) hypernet_f = lambda z, prelim: ign.network_T_and_J_LU( z[None, :], phi_shared, force_diag=prelim)[0][0, :] log_det_dtheta_dz_f = lambda z, prelim: T.sum( ign.network_T_and_J_LU(z[None, :], phi_shared, force_diag=prelim)[1]) params_to_opt = phi_shared.values() R = ht.build_trainer(params_to_opt, N, ll_primary_f, logprior_f_corrected, hypernet_f, log_det_dtheta_dz_f=log_det_dtheta_dz_f) trainer, get_err, test_loglik, _, grad_f = R batch_order = np.arange(int(N / n_batch)) cost_hist = np.zeros(n_epochs) loglik_valid = np.zeros(n_epochs) for epoch in xrange(n_epochs): np.random.shuffle(batch_order) cost = 0.0 for ii in batch_order: x_batch = X[ii * n_batch:(ii + 1) * n_batch] y_batch = np.zeros_like(x_batch) z_noise = z_std * np.random.randn(num_params) if epoch <= 200: current_lr = init_lr prelim = True else: current_lr = init_lr * 0.01 prelim = False batch_cost = trainer(x_batch, y_batch, z_noise, current_lr, prelim) cost += batch_cost cost /= len(batch_order) print cost cost_hist[epoch] = cost loglik_valid_s = np.zeros((N_valid, n_samples)) for ss in xrange(n_samples): z_noise = z_std * np.random.randn(num_params) loglik_valid_s[:, ss] = test_loglik(X_valid, y_valid, z_noise, False) loglik_valid_s_adj = loglik_valid_s - np.log(n_samples) loglik_valid[epoch] = np.mean(logsumexp(loglik_valid_s_adj, axis=1)) print 'valid %f' % loglik_valid[epoch] phi = make_unshared_dict(phi_shared) return phi, cost_hist, loglik_valid, grad_f
def mf_update_c(self, network, stepsize=1.0): """ Update the block assignment probabilitlies one at a time. This one involves a number of not-so-friendly expectations. :return: """ E_A = network.E_A E_notA = 1 - network.E_A # Sample each assignment in order for n1 in xrange(self.N): # Compute unnormalized log probs of each connection lp = np.zeros(self.C) # Prior from m lp += self.mf_expected_log_m() # Iterate over possible block assignments for cn1 in xrange(self.C): # Likelihood from each edge in the network for n2 in xrange(self.N): for cn2 in xrange(self.C): pcn2 = self.mf_m[n2, cn2] p_pn1n2 = Beta(self.mf_tau1[cn1, cn2], self.mf_tau0[cn1, cn2]) E_ln_p_n1n2 = p_pn1n2.expected_log_p() E_ln_notp_n1n2 = p_pn1n2.expected_log_notp() lp[cn1] += pcn2 * Bernoulli().negentropy( E_x=E_A[n1, n2], E_notx=E_notA[n1, n2], E_ln_p=E_ln_p_n1n2, E_ln_notp=E_ln_notp_n1n2) # Compute the expected log likelihood of the weights # Compute E[ln p(W | A=1, c)] lp[cn1] += E_A[ n1, n2] * pcn2 * self._expected_log_likelihood_W( network, n1, cn1, n2, cn2) # Now do the same thing for the reverse edge if n2 != n1: p_pn2n1 = Beta(self.mf_tau1[cn2, cn1], self.mf_tau0[cn2, cn1]) E_ln_p_n2n1 = p_pn2n1.expected_log_p() E_ln_notp_n2n1 = p_pn2n1.expected_log_notp() lp[cn1] += pcn2 * Bernoulli().negentropy( E_x=E_A[n2, n1], E_notx=E_notA[n2, n1], E_ln_p=E_ln_p_n2n1, E_ln_notp=E_ln_notp_n2n1) lp[cn1] += E_A[ n2, n1] * pcn2 * self._expected_log_likelihood_W( network, n2, cn2, n1, cn1) # Normalize the log probabilities to update mf_m Z = logsumexp(lp) mk_hat = np.exp(lp - Z) self.mf_m[n1, :] = ( 1.0 - stepsize) * self.mf_m[n1, :] + stepsize * mk_hat
def logavg(v): return logsumexp(v) - np.log(len(v))
def var_dpmm_multinomial(X, alpha, base_dirichlet, T=50, n_iter=100, Xtest=None): ''' runs variational inference on a DP mixture model where each mixture component is a multinomial distribution. X: observed data, (N,M) matrix, can be sparse alpha: concentration parameter base_dirichlet: base measure (Dirichlet (1,M) in this case) ''' N, M = X.shape # variational multinomial parameters for z_n phi = np.matrix(np.random.uniform(size=(T, N))) phi = np.divide(phi, np.sum(phi, axis=0)) # variational beta parameters for V_t gamma1 = np.matrix(np.zeros((T - 1, 1))) gamma2 = np.matrix(np.zeros((T - 1, 1))) # variational dirichlet parameters for \eta_t tau = np.matrix(np.zeros((T, M))) ll = [] #held_out = [] log_time = [] K_trace = [] Z_trace = [] total_time = time.time() for it in range(n_iter): sys.stdout.write('.') sys.stdout.flush() gamma1 = 1. + np.sum(phi[:T - 1, :], axis=1) phi_cum = np.cumsum(phi[:0:-1, :], axis=0)[::-1, :] gamma2 = alpha + np.sum(phi_cum, axis=1) tau = base_dirichlet + phi * X lV1 = psi(gamma1) - psi(gamma1 + gamma2) # E_q[log V_t] lV1 = np.vstack((lV1, 0.)) lV2 = psi(gamma2) - psi(gamma1 + gamma2) # E_q[log (1-V_t)] lV2 = np.cumsum(np.vstack((0., lV2)), axis=0) # \sum_{i=1}^{t-1} E_q[log (1-V_i)] eta = psi(tau) - psi(np.sum(tau, axis=1)) # E_q[eta_t] S = lV1 + lV2 + eta * X.T S = S - logsumexp(S, axis=0) phi = np.exp(S) log_time.append(np.log(time.time() - total_time)) Z = np.argmax(phi, axis=0).A1 K_trace.append(np.unique(Z).size) Z_trace.append(Z) ll.append(posterior_LL(X, alpha, tau, Z)) # if Xtest is not None: # predictive_sample(X,Xtest, Z, Z_count=None,alpha=alpha) # held_out.append(mean_log_predictive(Xtest, gamma1, gamma2, tau, # alpha, base_dirichlet, eta=eta)) # held_out.append(predictive_sample(X,Xtest, Z, Z_count=None,alpha=alpha)) return gamma1, gamma2, tau, phi, ll, log_time, K_trace, Z_trace
def normalize_over_prefix(df, log_prob='log_prob_target'): for trial_name, sub_df in df.groupby('trial_name'): log_normalizer = spm.logsumexp(sub_df[log_prob]) df.loc[df.trial_name == trial_name, 'normalized_prob_target'] = np.exp( df.loc[df.trial_name == trial_name, log_prob] - log_normalizer)
def logpdf_gmm(x, ws, mus, covs): return logsumexp( [np.log(w) + logpdf_gauss(x, m, c) for w, m, c in zip(ws, mus, covs)], axis=0)
def _ars_compute_hulls(S, fS, domain): # compute lower piecewise-linear hull # if the domain of func is unbounded to the left or right, then the lower # hull takes on -inf to the left or right of the end points of S lowerHull = [] for li in np.arange(len(S) - 1): h = Hull() h.m = (fS[li + 1] - fS[li]) / (S[li + 1] - S[li]) h.b = fS[li] - h.m * S[li] h.left = S[li] h.right = S[li + 1] lowerHull.append(h) # compute upper piecewise-linear hull upperHull = [] if np.isinf(domain[0]): # first line (from -infinity) m = (fS[1] - fS[0]) / (S[1] - S[0]) b = fS[0] - m * S[0] # pro = np.exp(b)/m * ( np.exp(m*S[0]) - 0 ) # integrating in from -infinity lnpr = b - np.log(m) + m * S[0] h = Hull() h.m = m h.b = b h.lnpr = lnpr h.left = -np.Inf h.right = S[0] upperHull.append(h) # second line m = (fS[2] - fS[1]) / (S[2] - S[1]) b = fS[1] - m * S[1] # pro = np.exp(b)/m * ( np.exp(m*S[1]) - np.exp(m*S[0]) ) lnpr = _signed_lse(m, b, S[1], S[0]) # Append upper hull for second line h = Hull() h.m = m h.b = b h.lnpr = lnpr h.left = S[0] h.right = S[1] upperHull.append(h) # interior lines # there are two lines between each abscissa for li in np.arange(1, len(S) - 2): m1 = (fS[li] - fS[li - 1]) / (S[li] - S[li - 1]) b1 = fS[li] - m1 * S[li] m2 = (fS[li + 2] - fS[li + 1]) / (S[li + 2] - S[li + 1]) b2 = fS[li + 1] - m2 * S[li + 1] # compute the two lines' intersection # Make sure it's in the valid range ix = (b1 - b2) / (m2 - m1) if not (ix >= S[li] and ix <= S[li + 1]): # This seems to happen when fS goes from a reasonable to an unreasonable range # import pdb; pdb.set_trace() ix = np.clip(ix, S[li] + np.spacing(1), S[li + 1] - np.spacing(1)) # pro = np.exp(b1)/m1 * ( np.exp(m1*ix) - np.exp(m1*S[li]) ) lnpr1 = _signed_lse(m1, b1, ix, S[li]) h = Hull() h.m = m1 h.b = b1 h.lnpr = lnpr1 h.left = S[li] h.right = ix upperHull.append(h) # pro = np.exp(b2)/m2 * ( np.exp(m2*S[li+1]) - np.exp(m2*ix) ) lnpr2 = _signed_lse(m2, b2, S[li + 1], ix) h = Hull() h.m = m2 h.b = b2 h.lnpr = lnpr2 h.left = ix h.right = S[li + 1] upperHull.append(h) # second to last line (m<0) m = (fS[-2] - fS[-3]) / (S[-2] - S[-3]) b = fS[-2] - m * S[-2] # pro = np.exp(b)/m * ( np.exp(m*S[-1]) - np.exp(m*S[-2]) ) lnpr = _signed_lse(m, b, S[-1], S[-2]) h = Hull() h.m = m h.b = b h.lnpr = lnpr h.left = S[-2] h.right = S[-1] upperHull.append(h) if np.isinf(domain[1]): # last line (to infinity) m = (fS[-1] - fS[-2]) / (S[-1] - S[-2]) b = fS[-1] - m * S[-1] # pro = np.exp(b)/m * ( 0 - np.exp(m*S[-1]) ) lnpr = b - np.log(np.abs(m)) + m * S[-1] h = Hull() h.m = m h.b = b h.lnpr = lnpr h.left = S[-1] h.right = np.Inf upperHull.append(h) lnprs = np.array([h.lnpr for h in upperHull]) lnZ = logsumexp(lnprs) prs = np.exp(lnprs - lnZ) for (i, h) in enumerate(upperHull): h.pr = prs[i] if not np.all(np.isfinite(prs)): raise Exception("ARS prs contains Inf or NaN") return lowerHull, upperHull
def _distn_form_heldout_log_likelihood(self, X, M=10): """ We can analytically integrate out z (latent states) given omega. To estimate the heldout log likelihood of a data sequence, we Monte Carlo integrate over omega, where omega is drawn from the prior. :param data: :param M: number of Monte Carlo samples for integrating out omega :return: """ # assert len(self.data_list) == 1, "TODO: Support more than 1 data set" T, K = X.shape assert K == self.K kappa = kappa_vec(X) N = N_vec(X) # Compute the data-specific normalization constant from the # augmented multinomial distribution Z_mul = (gammaln(N + 1) - gammaln(X[:, :-1] + 1) - gammaln(N - X[:, :-1] + 1)).sum() Z_mul += (-N * np.log(2.)).sum() # Monte carlo integrate wrt omega ~ PG(N, 0) import pypolyagamma as ppg hlls = np.zeros(M) for m in xrange(M): # Sample omega using the emission distributions samplers omega = np.zeros(N.size) ppg.pgdrawvpar(self.emission_distn.ppgs, N.ravel(), np.zeros(N.size), omega) omega = omega.reshape((T, K - 1)) # valid = omega > 0 omega = np.clip(omega, 1e-8, np.inf) # Compute the normalization constant for this omega Z_omg = 0.5 * (kappa**2 / omega).sum() Z_omg += T * (K - 1) / 2. * np.log(2 * np.pi) Z_omg += -0.5 * np.log(omega).sum() # 1/2 log det of Omega_t^{-1} # clip omega = zero for message passing # omega[~valid] = 1e-32 # Exactly integrate out the latent states z using message passing # The "data" is the normal potential from the states = MultinomialLDSStates(model=self, data=X) conditional_mean = kappa / omega - self.emission_distn.mu[None, :] # conditional_mean[~np.isfinite(conditional_mean)] = 0 conditional_cov = np.zeros((T, K - 1, K - 1)) for t in xrange(T): conditional_cov[t, :, :] = np.diag(1. / omega[t, :]) Z_lds = states.log_likelihood(conditional_mean, conditional_cov) # Sum them up to get the heldout log likelihood for this omega hlls[m] = Z_mul + Z_omg + Z_lds # Now take the log of the average to get the log likelihood hll = logsumexp(hlls) - np.log(M) # Use bootstrap to compute error bars samples = np.random.choice(hlls, size=(100, M), replace=True) hll_samples = logsumexp(samples, axis=1) - np.log(M) std_hll = hll_samples.std() return hll, std_hll
def messages_backwards_log(self): betal = self._messages_backwards_log(self.trans_matrix,self.aBl) assert not np.isnan(betal).any() self._normalizer = logsumexp(np.log(self.pi_0) + betal[0] + self.aBl[0]) return betal
def messages_forwards_log(self): alphal = self._messages_forwards_log(self.trans_matrix,self.pi_0,self.aBl) assert not np.any(np.isnan(alphal)) self._normalizer = logsumexp(alphal[-1]) return alphal
def log_expected_pll(plls): return -np.log(len(plls)) + logsumexp(plls)
def pmf(self, phi): v = self.value(phi)/self.temp return np.exp(v - logsumexp(v))