def calc_log_proba_mod(peptide, domain, sequence): """ Function which computes the log of the updated probability. For numerical stability, the sum of the logs is computed as log(exp(logA)+exp(logB)) which is just log(A+B) """ ix = PDZ_Data.domain_names.index(domain.name) alpha = PDZ_Data.fp_interaction_matrix[peptide.name][ix] score = eval_score(domain, sequence,0) z_1 = log_modified(score) z_2 = log_modified(-1.0*score) if alpha > 0: a = peptide.posterior_matrix[1,1] x = np.log(a) -z_1 b = peptide.posterior_matrix[1,0] y = np.log(b) - z_2 result = np.logaddexp(x,y) else: a = peptide.posterior_matrix[0,1] x = np.log(a) - z_1 b = peptide.posterior_matrix[0,0] y = np.log(b) - z_2 result = np.logaddexp(x,y) return result*-1.0
def messages_backwards(self): 'approximates duration tails at indices > trunc with geometric tails' aDl, aDsl, Al = self.aDl, self.aDsl, np.log(self.trans_matrix) trunc = self.trunc if self.trunc is not None else self.T T,state_dim = aDl.shape assert trunc > 1 aBl = self.aBl/self.temp if self.temp is not None else self.aBl hmm_betal = HMMStatesEigen._messages_backwards(self._get_hmm_transition_matrix(),aBl) assert not np.isnan(hmm_betal).any() betal = np.zeros((T,state_dim),dtype=np.float64) betastarl = np.zeros_like(betal) for t in xrange(T-1,-1,-1): np.logaddexp.reduce(betal[t:t+trunc] + self.cumulative_likelihoods(t,t+trunc) + aDl[:min(trunc,T-t)],axis=0, out=betastarl[t]) if t+trunc < T: np.logaddexp(betastarl[t], self.likelihood_block(t,t+trunc+1) + aDsl[trunc -1] + hmm_betal[t+trunc], out=betastarl[t]) if T-t < trunc and self.right_censoring: np.logaddexp(betastarl[t], self.likelihood_block(t,None) + aDsl[T-t -1], betastarl[t]) np.logaddexp.reduce(betastarl[t] + Al,axis=1,out=betal[t-1]) betal[-1] = 0. return betal, betastarl
def hsmm_messages_backwards_log( trans_potentials, initial_state_potential, cumulative_obs_potentials, dur_potentials, dur_survival_potentials, betal, betastarl, left_censoring=False, right_censoring=True): errs = np.seterr(invalid='ignore') # logaddexp(-inf,-inf) T, _ = betal.shape betal[-1] = 0. for t in xrange(T-1,-1,-1): cB, offset = cumulative_obs_potentials(t) dp = dur_potentials(t) np.logaddexp.reduce(betal[t:t+cB.shape[0]] + cB + dur_potentials(t), axis=0, out=betastarl[t]) betastarl[t] -= offset if right_censoring: np.logaddexp(betastarl[t], cB[-1] - offset + dur_survival_potentials(t), out=betastarl[t]) np.logaddexp.reduce(betastarl[t] + trans_potentials(t-1), axis=1, out=betal[t-1]) betal[-1] = 0. # overwritten on last iteration if not left_censoring: normalizer = np.logaddexp.reduce(initial_state_potential + betastarl[0]) else: raise NotImplementedError np.seterr(**errs) return betal, betastarl, normalizer
def softmax_loss2(props, lbls, mask=None): grdts = dict() err = 0 for name, prop in props.iteritems(): # make sure that it is the output of binary class assert(prop.shape[0]==2) print "original prop: ", prop # rebase the prop for numerical stability # mathimatically, this do not affect the softmax result! # http://ufldl.stanford.edu/tutorial/supervised/SoftmaxRegression/ # prop = prop - np.max(prop) propmax = np.max(prop, axis=0) prop[0,:,:,:] -= propmax prop[1,:,:,:] -= propmax log_softmax = np.empty(prop.shape, dtype=prop.dtype) log_softmax[0,:,:,:] = prop[0,:,:,:] - np.logaddexp( prop[0,:,:,:], prop[1,:,:,:] ) log_softmax[1,:,:,:] = prop[1,:,:,:] - np.logaddexp( prop[0,:,:,:], prop[1,:,:,:] ) prop = np.exp(log_softmax) props[name] = prop lbl = lbls[name] grdts[name] = prop - lbl err = err + np.sum( -lbl * log_softmax ) print "gradient: ", grdts[name] assert(not np.any(np.isnan(grdts[name]))) return (props, err, grdts)
def compute_score_vect(self, bin_n, mu_vect, sigmasq_vect, pi_vect): bin_edges = np.arange(bin_n + 1, dtype=np.float32)/ (bin_n+1) bin_centers = bin_edges[:-1] + 1./bin_n score_bins = np.zeros(bin_n) # compute the prob for each bin K = len(pi_vect) dp_per_comp_scores = np.zeros((bin_n, K), dtype=np.float32) for k in range(K): mu = mu_vect[k] sigmasq = sigmasq_vect[k] pi = pi_vect[k] dp_per_comp_scores[:, k] = irm.util.log_norm_dens(bin_centers, mu, sigmasq) dp_per_comp_scores[:, k] += np.log(pi) scores = dp_per_comp_scores[:, 0] for k in range(1, K): scores = np.logaddexp(scores, dp_per_comp_scores[:, k]) # normalize score_total = scores[0] for i in range(1, bin_n): score_total = np.logaddexp(score_total, scores[i]) scores -= score_total return scores
def forward_backward(node_potentials,edge_potentials): H,N = node_potentials.shape forward = -1000.0 * np.ones([H,N],dtype=float) backward = -1000.0 * np.ones([H,N],dtype=float) forward[:,0] = np.log(node_potentials[:,0]) ## Forward loop for pos in xrange(1,N): for current_state in xrange(H): for prev_state in xrange(H): forward_v = forward[prev_state,pos-1] trans_v = np.log(edge_potentials[prev_state,current_state,pos-1]) logprob = forward_v + trans_v forward[current_state,pos] = np.logaddexp(forward[current_state,pos], logprob) forward[current_state,pos] += np.log(node_potentials[current_state,pos]) ## Backward loop backward[:,N-1] = 0.0 # log(1) = 0 for pos in xrange(N-2,-1,-1): for current_state in xrange(H): logprob = -1000.0 for next_state in xrange(H): back = backward[next_state,pos+1] trans = np.log(edge_potentials[current_state,next_state,pos]); observation = np.log(node_potentials[next_state,pos+1]); logprob = np.logaddexp(logprob, trans + observation + back); backward[current_state,pos] = logprob #sanity_check_forward_backward(forward,backward) #print forward, backward return np.exp(forward),np.exp(backward)
def ctc_loss(label, prob, remainder, seq_length, batch_size, num_gpu=1, big_num=1e10): label_ = [0, 0] prob[prob < 1 / big_num] = 1 / big_num log_prob = np.log(prob) l = len(label) for i in range(l): label_.append(int(label[i])) label_.append(0) l_ = 2 * l + 1 a = np.full((seq_length, l_ + 1), -big_num) a[0][1] = log_prob[remainder][0] a[0][2] = log_prob[remainder][label_[2]] for i in range(1, seq_length): row = i * int(batch_size / num_gpu) + remainder a[i][1] = a[i - 1][1] + log_prob[row][0] a[i][2] = np.logaddexp(a[i - 1][2], a[i - 1][1]) + log_prob[row][label_[2]] for j in range(3, l_ + 1): a[i][j] = np.logaddexp(a[i - 1][j], a[i - 1][j - 1]) if label_[j] != 0 and label_[j] != label_[j - 2]: a[i][j] = np.logaddexp(a[i][j], a[i - 1][j - 2]) a[i][j] += log_prob[row][label_[j]] return -np.logaddexp(a[seq_length - 1][l_], a[seq_length - 1][l_ - 1])
def compute_weights(data, Nlive): """Returns log_ev, log_wts for the log-likelihood samples in data, assumed to be a result of nested sampling with Nlive live points.""" start_data=data[:-Nlive] end_data=data[-Nlive:] log_wts=zeros(data.shape[0]) log_vol_factor=log1p(-1.0/Nlive) log_dvol = -1.0/Nlive log_vol = 0.0 log_ev = -float('inf') for i,log_like in enumerate(start_data): # Volume associated with this likelihood = Vol/Nlive: log_this_vol=log_vol+log_dvol log_wts[i] = log_like+log_this_vol log_ev = logaddexp(log_ev, log_wts[i]) log_vol += log_vol_factor avg_log_like_end = -float('inf') for i,log_l in enumerate(end_data): avg_log_like_end = logaddexp(avg_log_like_end, log_l) avg_log_like_end-=log(Nlive) # Each remaining live point contributes (Vol/Nlive)*like to # integral, but have posterior weights Vol relative to the other samples log_wts[-Nlive:] = log_vol+end_data log_ev = logaddexp(log_ev, avg_log_like_end + log_vol) log_wts -= log_ev return log_ev, log_wts
def test_transition_probabilities(hm): alpha = hm.forward() beta = hm.backward() gamma = hm.state_probs(alpha, beta) xi = hm.bw(alpha, beta) trans = hm.transition_probabilities(xi, gamma) iter_trans = [] for i in range(len(hm.hidden_states)): row = [] for j in range(len(hm.hidden_states)): num = np.NINF den = np.NINF for seq in range(len(hm.observations)): num_seq = xi[seq][0][i][j] den_seq = gamma[seq][0][i] for o in range(1, len(hm.observations[seq]) - 1): # xi is probability of probability # of being at state i at time t and # state j at time t+1 num_seq = np.logaddexp(num_seq, xi[seq][o][i][j]) # gamma is probability of being in # state i at time t den_seq = np.logaddexp(den_seq, gamma[seq][o][i]) # add the current sequence contribution to total num = np.logaddexp(num, num_seq) den = np.logaddexp(den, den_seq) row.append(np.exp(num - den)) iter_trans.append(row) assert iter_trans == approx(trans)
def equilibrium_concentrations(cls, DeltaG, Ptot, Ltot): """ Compute equilibrium concentrations for simple two-component association. Parameters ---------- DeltaG : float Reduced free energy of binding (in units of kT) Ptot : float or numpy array Total protein concentration summed over bound and unbound species, molarity. Ltot : float or numpy array Total ligand concentration summed over bound and unbound speciesl, molarity. Returns ------- P : float or numpy array with same dimensions as Ptot Free protein concentration, molarity. L : float or numpy array with same dimensions as Ptot Free ligand concentration, molarity. PL : float or numpy array with same dimensions as Ptot Bound complex concentration, molarity. """ # Original form: #Kd = np.exp(DeltaG) #sqrt_arg = (Ptot + Ltot + Kd)**2 - 4*Ptot*Ltot #sqrt_arg[sqrt_arg < 0.0] = 0.0 #PL = 0.5 * ((Ptot + Ltot + Kd) - np.sqrt(sqrt_arg)); # complex concentration (M) # Numerically stable variant? logP = np.log(Ptot) logL = np.log(Ltot) logPLK = np.logaddexp(np.logaddexp(logP, logL), DeltaG) PLK = np.exp(logPLK); sqrt_arg = 1.0 - np.exp(np.log(4.0) + logP + logL - 2*logPLK); sqrt_arg[sqrt_arg < 0.0] = 0.0 # ensure always positive PL = 0.5 * PLK * (1.0 - np.sqrt(sqrt_arg)); # complex concentration (M) # Another variant #PL = 2*Ptot*Ltot / ((Ptot+Ltot+Kd) + np.sqrt((Ptot + Ltot + Kd)**2 - 4*Ptot*Ltot)); # complex concentration (M) # Yet another numerically stable variant? #logPLK = np.logaddexp(np.log(Ptot + Ltot), DeltaG); #PLK = np.exp(logPLK); #xy = np.exp(np.log(Ptot) + np.log(Ltot) - 2.0*logPLK); #chi = 1.0 - 4.0 * xy; #chi[chi < 0.0] = 0.0 # prevent square roots of negative numbers #PL = 0.5 * PLK * (1 - np.sqrt(chi)) # Ensure all concentrations are within limits, correcting cases where numerical issues cause problems. PL[PL < 0.0] = 0.0 # complex cannot have negative concentration #PL_max = np.minimum(Ptot, Ltot) #indices = np.where(PL > PL_max) #PL[indices] = PL_max[indices] # Compute remaining concentrations. P = Ptot - PL; # free protein concentration in sample cell after n injections (M) L = Ltot - PL; # free ligand concentration in sample cell after n injections (M) return [P, L, PL]
def compute_visible_llr(self, data): background = data.frames.background ball = data.frames.ball ball.present_ll_c[...] = numpy.logaddexp(ball.color_analysis.ll, self.occlusion_analyzer.occluded_lpr) ball.absent_ll_c[...] = data.background.q_estimation * numpy.logaddexp(background.color_analysis.ll, self.occlusion_analyzer.occluded_lpr) ball.present_llr[...] = numpy.logaddexp(ball.present_ll_c - ball.absent_ll_c + self.visible_lpr, ball.absent_ll_c)
def log_proba(self, kde, value, period=None): if period is None: return kde.score([[value]]) else: values = kde.score_samples([[value], [value+period], [value-period]]) total = np.logaddexp(values[0], values[1]) total = np.logaddexp(total, values[2]) return total
def pixel_space_information_gain(self, baseline, gold_standard, stimulus, eps=1e-20): log_p_gold = gold_standard.log_density(stimulus) log_p_baseline = baseline.log_density(stimulus) log_p_model = self.log_density(stimulus) p_gold = np.exp(log_p_gold) p_gold[p_gold == 0] = p_gold[p_gold > 0].min() ig = (p_gold)*(np.logaddexp(log_p_model, np.log(eps))-np.logaddexp(log_p_baseline, np.log(eps))) return ig
def compute_sig_PPsi ( self ,layers , y_possible , mode = "None" ): #print self.Prob[layers] update_Psi_o = np.zeros(self.w_o.shape) update_Psi_t = np.zeros(self.w_t.shape) update_Psi = np.zeros(self.w.shape) CRF_Prob = 0 if mode == "Start" : log_current_Prob = ( np.dot(self.w_o[y_possible] , self.x[layers]/(self.SEQ_LENGTH) ) + self.w_t[self.y_class][y_possible]/self.SEQ_LENGTH ) update_Psi_o[y_possible] = (log_current_Prob) + np.log(self.x[layers])- np.log(self.SEQ_LENGTH) update_Psi_t[self.y_class][y_possible] = (log_current_Prob) - np.log(self.SEQ_LENGTH) CRF_Prob = log_current_Prob elif mode == "End" : log_current_Prob = ( self.w_t.T[self.y_class][:-1] ) update_Psi = log_current_Prob + self.CRF_Psi[layers][:] update_Psi_t.T[self.y_class] = (log_current_Prob) - (self.SEQ_LENGTH) CRF_Prob = log_current_Prob + (self.CRF_Prob[layers][:]) else: log_current_Prob = ( np.dot(self.w_o[y_possible] , self.x[layers]/(self.SEQ_LENGTH) ) + self.w_t.T[y_possible][:-1]) update_Psi = log_current_Prob + self.CRF_Psi[layers][:] update_Psi_t.T[y_possible][:] = (log_current_Prob) - np.log(self.SEQ_LENGTH) update_Psi_o[y_possible] = log_current_Prob + np.log(self.x[layers]) - np.log(self.SEQ_LENGTH) CRF_Prob = log_current_Prob + self.CRF_Prob[layers][:] ''' for y_last in range(0,self.y_class): if mode == "Start" : log_current_Prob = ( np.dot(self.w_o[y_possible] , self.x[layers]/(self.SEQ_LENGTH) ) + self.w_t[self.y_class][y_possible]/self.SEQ_LENGTH ) update_Psi_o[y_possible] = (log_current_Prob) + np.log(self.x[layers])- np.log(self.SEQ_LENGTH) update_Psi_t[self.y_class][y_possible] = (log_current_Prob) - np.log(self.SEQ_LENGTH) CRF_Prob = log_current_Prob elif mode == "End" : log_current_Prob = ( self.w_t[y_last][self.y_class]/self.SEQ_LENGTH ) update_Psi = np.logaddexp ( update_Psi , log_current_Prob + self.CRF_Psi[layers][y_last] ) update_Psi_t[y_last][y_possible] = np.logaddexp( update_Psi_t[y_last][y_possible] , (log_current_Prob) - (self.SEQ_LENGTH) ) CRF_Prob = np.logaddexp( CRF_Prob , log_current_Prob + (self.CRF_Prob[layers][y_last]) ) else: log_current_Prob = ( np.dot(self.w_o[y_possible] , self.x[layers]/(self.SEQ_LENGTH) ) + self.w_t[y_last][y_possible]/self.SEQ_LENGTH ) update_Psi = np.logaddexp ( update_Psi , log_current_Prob + self.CRF_Psi[layers][y_last]) update_Psi_t[y_last][y_possible] = np.logaddexp ( update_Psi_t[y_last][y_possible] , (log_current_Prob) - np.log(self.SEQ_LENGTH) ) update_Psi_o[y_possible] = np.logaddexp ( update_Psi_o[y_possible] , log_current_Prob + np.log(self.x[layers]) - np.log(self.SEQ_LENGTH) ) CRF_Prob = np.logaddexp( CRF_Prob , log_current_Prob + self.CRF_Prob[layers][y_last] ) ''' update_Psi = np.hstack( ( np.hstack(update_Psi_o) , np.hstack(update_Psi_t) ) ) if mode != "Start": a = update_Psi[0] b = CRF_Prob[0] for idx in range(1 , self.y_class): a = np.logaddexp( a , update_Psi[idx]) b = np.logaddexp( b , CRD_Prob[idx]) else: a = update_Psi b = CRF_Prob self.CRF_Psi[layers+1][y_possible] = a #update_Psi #print "update_Psi = " , update_Psi self.CRF_Prob[layers+1][y_possible] = b #CRF_Prob
def loglik(self, a): if not np.issubdtype(a.dtype, int): raise RuntimeError('a must be an integer array') if not np.all((a==0) + (a==1)): raise RuntimeError('a must be a binary array') log_p = -np.logaddexp(0., -self.odds) log_1_minus_p = -np.logaddexp(0., self.odds) return a * log_p + (1-a) * log_1_minus_p
def _logaddexp(x1, x2, out=None): """Fix np.logaddexp in numpy < 1.4 when x1 == x2 == -np.inf.""" if out is not None: result = np.logaddexp(x1, x2, out=out) else: result = np.logaddexp(x1, x2) result[np.logical_and(x1 == -np.inf, x2 == -np.inf)] = -np.inf return result
def marginal_dist_n(self, n, log_object_prior=None): log_object_prior = self._domain.log_object_prior(log_object_prior) marginal_dist = None for logP, lexicon in zip(*self.weighted_lexicons()): dist = self._domain.dist_n(n, lexicon, log_object_prior) if marginal_dist is None: marginal_dist = np.empty(dist.shape) marginal_dist[:] = -np.inf np.logaddexp(marginal_dist, logP + dist, out=marginal_dist) return marginal_dist
def __call__(self, h_word, c_word, h_tag, c_tag, h_pos=0, c_pos=0): """ Given a potential dependency, give score """ val = logaddexp(self.word_score(h_word, c_word), self.tag_score(h_tag, c_tag)) val = logaddexp(val, self.dist_score(h_pos, c_pos)) return val
def test_nan(self): err = np.seterr(invalid="ignore") try: assert np.isnan(np.logaddexp(np.nan, np.inf)) assert np.isnan(np.logaddexp(np.inf, np.nan)) assert np.isnan(np.logaddexp(np.nan, 0)) assert np.isnan(np.logaddexp(0, np.nan)) assert np.isnan(np.logaddexp(np.nan, np.nan)) finally: np.seterr(**err)
def logsumexp(array): """ Recursive algorithm that sums numbers using the log of the exponentials of the input array. This is used because the probabilities can be very small in the likelihoods. Unfortunately its pretty slow. """ if len(array) == 2: return np.logaddexp(array[0],array[1]) else: return np.logaddexp(array[0],logsumexp(array[1:]))
def _sum_log_array(self, a): """Sum the log probabilities in an array. @param a: array logs @type a: array of float @return: log(exp(a[0]) + exp(a[1]) + ... + exp(a[n])) @rtype: float """ m = array([-inf]) for element in a[nonzero(a != -inf)]: logaddexp(m,element,m) return m[0]
def bs(x, q, q1fun, q2fun=None, means=None, sigmas=None, n2=100, niter=5, return_all=False, guess=1): """ Parameters ---------- x : samples from the density function ``q1fun`` q : log-density function values corresponding to ``x`` q1fun : density function q2fun : density function (optional) means : multivariate normal means (optional) sigmas : multivariate normal sigmas (optional) n2 : number of samples to draw niter : number of iterations return_all : guess : """ if not q2fun: means = means if means is not None else np.mean(x, axis=0) sigmas = sigmas if sigmas is not None else 2*np.std(x, axis=0) q2fun = MVN(means, sigmas) x1s = np.asarray(x) x2s = q2fun.rvs(n2) lq11 = np.asarray(q).ravel() lq12 = np.array([q1fun(x2) for x2 in x2s]).ravel() lq22 = np.array([q2fun(x2) for x2 in x2s]) lq21 = np.array([q2fun(x1) for x1 in x1s]) n1, ln1 = lq11.size, m.log(lq11.size) n2, ln2 = n2, m.log(n2) ls1 = m.log(n1/float(n1+n2)) ls2 = m.log(n2/float(n1+n2)) ll1 = lq11 - lq21 ll2 = lq12 - lq22 lr = np.ones(niter)*guess for i in range(1,niter): A = ll2 - np.logaddexp(ls1+ll2, ls2+lr[i-1]) Am = A.max() A = np.log(np.sum(np.exp(A-Am))) + Am - np.log(n2) B = 0.0 - np.logaddexp(ls1+ll1, ls2+lr[i-1]) Bm = B.max() B = np.log(np.sum(np.exp(B-Bm)))+ Bm - np.log(n1) lr[i] = A-B return lr if return_all else lr[-1]
def add_outside_chart_items(parent_inside_item, children_inside_items): global inside, outside # first, make sure the parent item has been added to the outside chart. # if it were not added, this suggests one of two things: # 1) there's a bug. # 2) even though this item was created by the inside algorithm, it didn't participate in any complete parses # which is why the outside chart does not contain it. to save space and time, we will not create such items if parent_inside_item not in outside: return # add the left (or only) child item to the outside chart if children_inside_items[0] not in outside: outside[children_inside_items[0]] = NEGINF # sealing rules require special processing if len(children_inside_items) == 1: # first, determine stop cost direction = RIGHT if children_inside_items[0].nonterminal.type == NOT_SEALED and \ parent_inside_item.nonterminal.type == HALF_SEALED \ else LEFT adjacency = ADJ if children_inside_items[0].nonterminal.fertility == 0 else NOT_ADJ stop_cost = stop_params[parent_inside_item.nonterminal.pos, direction, adjacency][STOP] # then, update the outside score of this child outside[children_inside_items[0]] = logaddexp( outside[children_inside_items[0]], outside[parent_inside_item] + stop_cost ) # now, binary rules elif len(children_inside_items) == 2: # add the right child item to the outside chart if children_inside_items[1] not in outside: outside[children_inside_items[1]] = NEGINF # first, determine the cost of not stopping direction = LEFT if children_inside_items[0].nonterminal.type == SEALED else RIGHT adjacency = ADJ if direction == LEFT and children_inside_items[1].nonterminal.fertility == 0 or \ direction == RIGHT and children_inside_items[0].nonterminal.fertility == 0 \ else NOT_ADJ head_terminal = parent_inside_item.nonterminal.pos dependent_terminal = children_inside_items[0].nonterminal.pos if direction == LEFT else children_inside_items[1].nonterminal.pos no_stop_cost = stop_params[(head_terminal, direction, adjacency)][NO_STOP] prod_cost = prod_params[(head_terminal, direction)][dependent_terminal] # now, update the outside score of first child outside[children_inside_items[0]] = logaddexp( outside[children_inside_items[0]], outside[parent_inside_item] + \ inside[children_inside_items[1]] + \ no_stop_cost + prod_cost) outside[children_inside_items[1]] = logaddexp( outside[children_inside_items[1]], outside[parent_inside_item] + \ inside[children_inside_items[0]] + \ no_stop_cost + prod_cost) else: assert False
def fast_js_with(self, p): log_p = np.array([p.log_likelihood(ngram) for ngram in p.unique_ngrams()]) log_q = np.array([self.log_likelihood(ngram) for ngram in p.unique_ngrams()]) log_m = np.logaddexp(log_p - np.log(2), log_q - np.log(2)) kl_p_m = np.sum(np.exp(log_p) * (log_p - log_m)) log_p = np.array([p.log_likelihood(ngram) for ngram in self.unique_ngrams()]) log_q = np.array([self.log_likelihood(ngram) for ngram in self.unique_ngrams()]) log_m = np.logaddexp(log_p - np.log(2), log_q - np.log(2)) kl_q_m = np.sum(np.exp(log_q) * (log_q - log_m)) return 0.5*(kl_p_m + kl_q_m) / np.log(2)
def backward_probs(num_segments, num_pops, x_labels, mu_k, mu_k_y, sigma, theta, e_y_x, labels, labels_as_nums): ''' forward algorithm ''' # initialization bkd_individual = np.zeros([num_segments, num_pops*num_pops]) # initialize bkd matrix to zeros bkd_global = np.zeros([num_segments, num_pops*num_pops]) # initialize bkd matrix to zeros for individual in x_labels: bkd_individual = np.zeros([num_segments, num_pops*num_pops]) # reset individual h0 = individual[0] h1 = individual[1] # initial distribution for label_h0 in labels: for label_h1 in labels: # P(y1) p_y1 = mu_k[label_h0] * mu_k[label_h1] # P(x1|y1) p_y1x1 = mu_k_y[(h0[len(h0)-1], label_h0)] * mu_k_y[(h1[len(h0)-1], label_h1)] bkd_individual[len(h0)-1][labels_as_nums[(label_h0,label_h1)]] = np.logaddexp(p_y1, p_y1x1) bkd_global[len(h0)-1][labels_as_nums[(label_h0,label_h1)]] += np.logaddexp(p_y1, p_y1x1) # windows 2 - S for i in xrange(len(h0)-2, -1,-1): if h0[i+1] == h1[i+1]: # unknown if switch happened st = 0 else: if h0[i+1] == h1[i] and h1[i+1] == h0[i]: st = 1 else: st = 0 for label_minus1_h0 in labels: for label_h0 in labels: for label_minus1_h1 in labels: for label_h1 in labels: p_switch = switch_error(sigma, st) p_transition = transition_prob(label_h0, label_minus1_h0, label_h1, label_minus1_h1, st, theta, mu_k) p_emission = emission_prob(h0[i], h0[i+1], label_h0, label_minus1_h0, h1[i], h1[i+1], label_h1, label_minus1_h1, st, e_y_x, mu_k_y) # logadd = np.logaddexp(p_transition, p_emission) prob_i = np.logaddexp(p_switch, np.logaddexp(p_transition, p_emission)) bkd_individual[i][labels_as_nums[(label_h0, label_h1)]] = prob_i bkd_global[i][labels_as_nums[(label_h0, label_h1)]] += prob_i # normalize for row in range(0, bkd_global.shape[0]): sum_row = bkd_global[row].sum() bkd_global[row] = bkd_global[row] / sum_row return bkd_global
def _update_terminal_region(self, tree, terminal_regions, leaf, X, y, residual, pred, sample_weight, leaf_indices): """Making one Newton step""" # terminal_region = numpy.where(terminal_regions == leaf)[0] terminal_region = leaf_indices y = y.take(terminal_region, axis=0) y_signed = 2. * y - 1 pred = pred.take(terminal_region, axis=0) sample_weight = sample_weight.take(terminal_region) argument = -y_signed * pred - self.shift n_gradient = numpy.sum(sample_weight * y_signed * expit(argument)) laplacian = numpy.sum(sample_weight / numpy.logaddexp(0., argument) / numpy.logaddexp(0., -argument)) tree.value[leaf, 0, 0] = n_gradient / laplacian
def as_merge(cls, node_left, node_right): """ Create a node from two other nodes Parameters ---------- node_left : Node the Node on the left node_right : Node The Node on the right """ crp_alpha = node_left.crp_alpha data_model = node_left.data_model data = np.vstack((node_left.data, node_right.data)) data_uncerts = np.vstack((node_left.data_uncerts, node_right.data_uncerts)) indexes = node_left.indexes + node_right.indexes indexes.sort() nk = node_left.nk + node_right.nk log_dk = logaddexp(math.log(crp_alpha) + math.lgamma(nk), node_left.log_dk + node_right.log_dk) log_pi = -math.log1p(math.exp(node_left.log_dk + node_right.log_dk - math.log(crp_alpha) - math.lgamma(nk))) # combine sum_dicts sum_dict = {} for key in node_left.sum_dict: if key in node_right.sum_dict: sum_dict[key] = (node_left.sum_dict[key] + node_right.sum_dict[key]) # Calculate log_rk - the log probability of the merge logp, sum_dict = data_model.log_marginal_likelihood(data, data_uncerts, **sum_dict) numer = log_pi + logp neg_pi = math.log(-math.expm1(log_pi)) log_ml = logaddexp(numer, neg_pi+node_left.log_ml+node_right.log_ml) log_rk = numer-log_ml if log_pi == 0: raise RuntimeError('Precision error') return cls(data, data_uncerts, data_model, crp_alpha, log_dk, log_pi, log_ml, logp, sum_dict, log_rk, node_left, node_right, nk, indexes)
def estimate_transition_probabilities(self, gamma, xi): trans_new = np.empty_like(self.transitionMatrix) # trans_new[:] = -np.inf # local_xi = zi[:T - 1] # local_gamma = gamma[:T - 1] for from_, to in product(range(self.nStates), range(self.nStates)): sum_xi = -np.inf sum_gamma = -np.inf for t, gamma_ in enumerate(gamma[:-1]): # print "State{} --> State{}".format(from_, to) sum_xi = np.logaddexp(sum_xi, xi[t, from_, to]) sum_gamma = np.logaddexp(sum_gamma, gamma_[from_]) trans_new[from_, to] = sum_xi - sum_gamma return trans_new
def lnprob(p): ''' posterior prob when there are outliers has two mixtures : reds and blues ''' m1, m2, b1, b2, lnf1, lnf2, Q, M1, M2, lnV1, lnV2 = p # First check the prior. lp = lnprior(p) if not np.isfinite(lp): return -np.inf, None # Compute the vector of foreground likelihoods and include the q prior. ll_fg = lnlike_fg(p) arg1 = ll_fg + np.log(Q) # Compute the vector of background likelihoods and include the q prior. ll_bg = lnlike_bg(p) arg2 = ll_bg + np.log(1.0 - Q) # Combine these using log-add-exp for numerical stability. ll = np.sum(np.logaddexp(arg1, arg2)) # We're using emcee's "blobs" feature in order to keep track of the # foreground and background likelihoods for reasons that will become # clear soon. return lp + ll, (arg1, arg2)
def fgmc(log_fg_ratios, mu_log_vt, sigma_log_vt, Rf, maxfg): ''' Function to fit the likelihood Fixme ''' Lb = np.random.uniform(0., maxfg, len(Rf)) pquit = 0 while pquit < 0.1: # quit when the posterior on Lf is very close to its prior nsamp = len(Lb) Rf_sel = np.random.choice(Rf, nsamp) vt = np.random.lognormal(mu_log_vt, sigma_log_vt, len(Rf_sel)) Lf = Rf_sel * vt log_Lf, log_Lb = log(Lf), log(Lb) plR = 0 for lfr in log_fg_ratios: plR += np.logaddexp(lfr + log_Lf, log_Lb) plR -= (Lf + Lb) plRn = plR - max(plR) idx = np.exp(plRn) > np.random.random(len(plRn)) pquit = ss.stats.ks_2samp(Lb, Lb[idx])[1] Lb = Lb[idx] return Rf_sel[idx], Lf[idx], Lb
def time_sync_decoding(self, h: torch.Tensor, encoded_lengths: torch.Tensor) -> List[Hypothesis]: """Time synchronous beam search implementation. Based on https://ieeexplore.ieee.org/document/9053040 Args: h: Encoded speech features (1, T_max, D_enc) Returns: nbest_hyps: N-best decoding results """ # Precompute some constants for blank position ids = list(range(self.vocab_size + 1)) ids.remove(self.blank) # Used when blank token is first vs last token if self.blank == 0: index_incr = 1 else: index_incr = 0 # prepare the batched beam states beam = min(self.beam_size, self.vocab_size) beam_state = self.decoder.initialize_state( torch.zeros(beam, device=h.device, dtype=h.dtype)) # [L, B, H], [L, B, H] (for LSTMs) # Initialize first hypothesis for the beam (blank) B = [ Hypothesis( y_sequence=[self.blank], score=0.0, dec_state=self.decoder.batch_select_state(beam_state, 0), timestep=[-1], length=0, ) ] cache = {} for i in range(int(encoded_lengths)): hi = h[:, i:i + 1, :] # Update caches A = [] C = B h_enc = hi # For a limited number of symmetric expansions per timestep "i" for v in range(self.tsd_max_symmetric_expansion_per_step): D = [] # Decode a batch of beam states and scores beam_y, beam_state, beam_lm_tokens = self.decoder.batch_score_hypothesis( C, cache, beam_state) # Extract the log probabilities and the predicted tokens beam_logp = torch.log_softmax(self.joint.joint(h_enc, beam_y), dim=-1) # [B, 1, 1, V + 1] beam_logp = beam_logp[:, 0, 0, :] # [B, V + 1] beam_topk = beam_logp[:, ids].topk(beam, dim=-1) seq_A = [h.y_sequence for h in A] for j, hyp in enumerate(C): # create a new hypothesis in A if hyp.y_sequence not in seq_A: # If the sequence is not in seq_A, add it as the blank token # In this step, we dont add a token but simply update score A.append( Hypothesis( score=(hyp.score + float(beam_logp[j, self.blank])), y_sequence=hyp.y_sequence[:], dec_state=hyp.dec_state, lm_state=hyp.lm_state, timestep=hyp.timestep[:], length=encoded_lengths, )) else: # merge the existing blank hypothesis score with current score. dict_pos = seq_A.index(hyp.y_sequence) A[dict_pos].score = np.logaddexp( A[dict_pos].score, (hyp.score + float(beam_logp[j, self.blank]))) if v < self.tsd_max_symmetric_expansion_per_step: for j, hyp in enumerate(C): # for each current hypothesis j # extract the top token score and top token id for the jth hypothesis for logp, k in zip(beam_topk[0][j], beam_topk[1][j] + index_incr): # create new hypothesis and store in D # Note: This loop does *not* include the blank token! new_hyp = Hypothesis( score=(hyp.score + float(logp)), y_sequence=(hyp.y_sequence + [int(k)]), dec_state=self.decoder.batch_select_state( beam_state, j), lm_state=hyp.lm_state, timestep=hyp.timestep[:] + [i], length=encoded_lengths, ) D.append(new_hyp) # Prune beam C = sorted(D, key=lambda x: x.score, reverse=True)[:beam] # Prune beam B = sorted(A, key=lambda x: x.score, reverse=True)[:beam] return self.sort_nbest(B)
def passing_node(self, rootid, node, rootids, parallel_values): """Accumulate node to the integration. Breadth-first removed `node` and nodes active next to node (`parallel_nodes`). rootid and rootids are needed to identify which bootstrap instance should accumulate. Parameters ---------- rootid: TreeNode root node this `node` is from. node: TreeNode node being processed. rootids: array of ints for each parallel node, which root it belongs to. parallel_nodes: array of TreeNodes parallel nodes passing `node`. """ # node is being consumed # we have parallel arcs to parallel_nodes assert not isinstance(rootid, float) nchildren = len(node.children) Li = node.value # in which bootstraps is rootid? active = self.rootids[:, rootid] # how many live points does each bootstrap have? nlive = self.rootids[:, rootids].sum(axis=1) nlive0 = nlive[0] if nchildren >= 1: # one arc terminates, another is spawned # weight is the size of the slice off the volume if self.random: randompoint = np.random.beta(1, nlive, size=self.ncounters) logleft = log(randompoint) logright = log1p(-randompoint) logleft[0] = log1p(-exp(-1. / nlive0)) logright[0] = -1. / nlive0 else: logleft = log1p(-exp(-1. / nlive)) logright = -1. / nlive logwidth = logleft + self.all_logVolremaining logwidth[~active] = -np.inf wi = logwidth[active] + Li self.logweights.append(logwidth) self.istail.append(False) # print("updating continuation...", Li) assert active[0], (active, rootid) logZ = self.all_logZ[active] logZnew = logaddexp(logZ, wi) H = exp(wi - logZnew) * Li + exp(logZ - logZnew) * ( self.all_H[active] + logZ) - logZnew first_setting = np.isnan(H) # print() # print("Hnext:", H[0], first_setting[0]) assert np.isfinite( H[~first_setting]).all(), (first_setting, self.all_H[active][~first_setting], H, wi, logZnew, Li, logZ) self.all_logZ[active] = np.where(first_setting, wi, logZnew) # print("logZ:", self.all_logZ[0]) if first_setting[0]: assert np.all(np.isfinite(Li - wi)), (Li, wi) else: assert np.isfinite(self.all_H[0]), self.all_H[0] assert np.isfinite(H[0]), (first_setting[0], H[0], self.all_H[0], wi[0], logZnew[0], Li, logZ[0]) self.all_H[active] = np.where(first_setting, -logwidth[active], H) # print("H:", self.all_H) assert np.isfinite(self.all_H[active]).all(), (self.all_H[active], first_setting[0], H[0], self.all_H[0], wi[0], logZnew[0], Li, logZ[0]) # assert np.all(np.isfinite(self.all_H[active])), (H, self.all_H[active], wi, logZnew, Li, logZ) self.logZ = self.all_logZ[0] assert np.all(np.isfinite( self.all_logZ[active])), (self.all_logZ[active]) # self.Lmax = max((n.value for n in parallel_nodes)) # print("L=%.1f N=%d V=%.2e logw=%.2e logZ=%.1f logZremain=%.1f" % ( # Li, nlive[0], self.logVolremaining, wi[0], self.logZ, logZremain)) # print("L=%.1f N=%d V=%.2e logw=%.2e logZ=%.1f logZremain=%.1f" % ( # Li, nlive[0], self.all_logVolremaining[0], (logwidth + Li)[0], self.all_logZ[0], logZremain)) # print("L=%.1f N=%d V=%.2e logw=%.2e logZ=<%.1f logZremain=%.1f" % ( # Li, nlive[1], self.all_logVolremaining[1], (logwidth + Li)[1], self.all_logZ[1], logZremain)) if self.all_H[0] > 0: # TODO: this needs to change if nlive varies self.logZerr = (self.all_H[0] / nlive0)**0.5 # assert np.all(np.isfinite(self.logZerr)), (self.logZerr, self.all_H[0], nlive) # volume is reduced by exp(-1/N) self.all_logVolremaining[active] += logright[active] self.logVolremaining = self.all_logVolremaining[0] else: # contracting! # print("contracting...", Li) # weight is simply volume / Nlive logwidth = -np.inf * np.ones(self.ncounters) logwidth[active] = self.all_logVolremaining[active] - log( nlive[active]) wi = logwidth + Li self.logweights.append(logwidth) self.istail.append(True) self.all_logZ[active] = logaddexp(self.all_logZ[active], wi[active]) self.logZ = self.all_logZ[0] # print("L=%.1f N=%d V=%.2e logw=%.2e logZ=%.1f" % (Li, nlive, self.logVolremaining, wi, self.logZ)) # the volume shrinks by (N - 1) / N # self.logVolremaining += log(1 - exp(-1. / nlive)) # if nlive = 1, we are removing the last point, so remaining # volume is zero (leads to log of -inf, as expected) with np.errstate(divide='ignore'): self.all_logVolremaining[active] += log1p(-1.0 / nlive[active]) self.logVolremaining = self.all_logVolremaining[0] V = self.all_logVolremaining - log(nlive0) Lmax = np.max(parallel_values) self.all_logZremain = V + log(np.sum( exp(parallel_values - Lmax))) + Lmax self.logZremainMax = self.all_logZremain.max() self.logZremain = self.all_logZremain[0] with np.errstate(over='ignore', under='ignore'): self.remainder_ratio = exp(self.logZremain - self.logZ) self.remainder_fraction = 1.0 / (1 + exp(self.logZ - self.logZremain))
def sigmoid(x): return np.exp(-np.logaddexp(0, -x))
def loss_func(x): return np.logaddexp(-x)
def combine_results(saved_logl, saved_nodeids, pointpile, main_iterator, mpi_comm=None): """Combine a sequence of likelihoods and nodes into a summary dictionary.""" assert np.shape(main_iterator.logweights) == ( len(saved_logl), len(main_iterator.all_logZ)), (np.shape(main_iterator.logweights), np.shape(saved_logl), np.shape(main_iterator.all_logZ)) saved_logl = np.array(saved_logl) saved_u = pointpile.getu(saved_nodeids) saved_v = pointpile.getp(saved_nodeids) saved_logwt = np.array(main_iterator.logweights) saved_logwt0 = saved_logwt[:, 0] saved_logwt_bs = saved_logwt[:, 1:] logZ_bs = main_iterator.all_logZ[1:] assert len(saved_logwt_bs) == len(saved_nodeids), (saved_logwt_bs.shape, len(saved_nodeids)) if mpi_comm is not None: # spread logZ_bs, saved_logwt_bs recv_saved_logwt_bs = mpi_comm.gather(saved_logwt_bs, root=0) recv_saved_logwt_bs = mpi_comm.bcast(recv_saved_logwt_bs, root=0) saved_logwt_bs = np.concatenate(recv_saved_logwt_bs, axis=1) recv_logZ_bs = mpi_comm.gather(logZ_bs, root=0) recv_logZ_bs = mpi_comm.bcast(recv_logZ_bs, root=0) logZ_bs = np.concatenate(recv_logZ_bs) saved_wt_bs = exp(saved_logwt_bs + saved_logl.reshape((-1, 1)) - logZ_bs) saved_wt0 = exp(saved_logwt0 + saved_logl - main_iterator.all_logZ[0]) # compute fraction in tail w = saved_wt0 / saved_wt0.sum() assert np.isclose(w.sum() - 1, 0), w.sum() ess = len(w) / (1.0 + ((len(w) * w - 1)**2).sum() / len(w)) tail_fraction = w[np.asarray(main_iterator.istail)].sum() if tail_fraction != 0: logzerr_tail = logaddexp( log(tail_fraction) + main_iterator.logZ, main_iterator.logZ) - main_iterator.logZ else: logzerr_tail = 0 logzerr_bs = (logZ_bs - main_iterator.logZ).max() logzerr_total = (logzerr_tail**2 + logzerr_bs**2)**0.5 samples = resample_equal(saved_v, w) j = saved_logl.argmax() results = dict( niter=len(saved_logl), logz=main_iterator.logZ, logzerr=logzerr_total, logz_bs=logZ_bs.mean(), logz_single=main_iterator.logZ, logzerr_tail=logzerr_tail, logzerr_bs=logzerr_bs, ess=ess, H=main_iterator.all_H[0], Herr=main_iterator.all_H.std(), posterior=dict( mean=samples.mean(axis=0).tolist(), stdev=samples.std(axis=0).tolist(), median=np.percentile(samples, 50, axis=0).tolist(), errlo=np.percentile(samples, 15.8655, axis=0).tolist(), errup=np.percentile(samples, 84.1345, axis=0).tolist(), ), weighted_samples=dict(upoints=saved_u, points=saved_v, weights=saved_wt0, logw=saved_logwt0, bootstrapped_weights=saved_wt_bs, logl=saved_logl), samples=samples, maximum_likelihood=dict( logl=saved_logl[j], point=saved_v[j, :].tolist(), point_untransformed=saved_u[j, :].tolist(), ), ) return results
def passing_node(self, node, parallel_nodes): """Accumulate node to the integration. Parameters ----------- node: TreeNode breadth-first removed node parallel_nodes: list nodes active next to node """ # node is being consumed # we have parallel arcs to parallel_nodes nchildren = len(node.children) Li = node.value nlive = len(parallel_nodes) if nchildren >= 1: # one arc terminates, another is spawned # weight is the size of the slice off the volume logleft = log1p(-exp(-1. / nlive)) logright = -1. / nlive if self.random: randompoint = np.random.beta(1, nlive) logleft = log(randompoint) logright = log1p(-randompoint) logwidth = logleft + self.logVolremaining wi = logwidth + Li self.logweights.append(logwidth) if math.isinf(self.logZ): self.logZ = wi self.H = Li - self.logZ else: logZnew = logaddexp(self.logZ, wi) self.H = exp(wi - logZnew) * Li + exp(self.logZ - logZnew) * ( self.H + self.logZ) - logZnew assert np.all(np.isfinite(self.H)), (self.H, wi, logZnew, Li, self.logZ) self.logZ = logZnew # print(self.H) # self.Lmax = max(node.value, self.Lmax) # self.Lmax = max((n.value for n in parallel_nodes)) # logZremain = parallel_nodes.max() + self.logVolremaining # print("L=%.1f N=%d V=%.2e logw=%.2e logZ=%.1f logZremain=%.1f" % (Li, nlive, self.logVolremaining, wi, self.logZ, logZremain)) # volume is reduced by exp(-1/N) self.logVolremaining += logright # TODO: this needs to change if nlive varies self.logZerr = (self.H / nlive)**0.5 assert np.all(np.isfinite(self.logZerr)), (self.H, nlive) else: # contracting! # weight is simply volume / Nlive logwidth = self.logVolremaining - log(nlive) wi = logwidth + Li self.logweights.append(logwidth) self.logZ = logaddexp(self.logZ, wi) # print("L=%.1f N=%d V=%.2e logw=%.2e logZ=%.1f" % (Li, nlive, self.logVolremaining, wi, self.logZ)) # the volume shrinks by (N - 1) / N # self.logVolremaining += log(1 - exp(-1. / nlive)) # if nlive = 1, we are removing the last point, so remaining # volume is zero (leads to log of -inf, as expected) with np.errstate(divide='ignore'): self.logVolremaining += log1p(-1.0 / nlive)
def RDP_depend_pate_gaussian(params, alpha): """ Return the data-dependent RDP of GNMAX (proposed in PATE2) Bounds RDP from above of GNMax given an upper bound on q (Theorem 6). Args: logq: Natural logarithm of the probability of a non-argmax outcome. sigma: Standard deviation of Gaussian noise. orders: An array_like list of Renyi orders. Returns: Upper bound on RPD for all orders. A scalar if orders is a scalar. Raises: ValueError: If the input is malformed. """ logq = params['logq'] sigma = params['sigma'] if alpha == 1: p = np.exp(logq) w = (2 * p - 1) * (logq - _log1mexp(logq)) return w if logq > 0 or sigma < 0 or np.any(alpha < 1): # not defined for alpha=1 raise ValueError("Inputs are malformed.") if np.isneginf(logq): # If the mechanism's output is fixed, it has 0-DP. print('isneginf', logq) if np.isscalar(alpha): return 0. else: return np.full_like(alpha, 0., dtype=np.float) variance = sigma**2 # Use two different higher orders: mu_hi1 and mu_hi2 computed according to # Proposition 10. mu_hi2 = math.sqrt(variance * -logq) mu_hi1 = mu_hi2 + 1 orders_vec = np.atleast_1d(alpha) ret = orders_vec / variance # baseline: data-independent bound # Filter out entries where data-dependent bound does not apply. mask = np.logical_and(mu_hi1 > orders_vec, mu_hi2 > 1) rdp_hi1 = mu_hi1 / variance rdp_hi2 = mu_hi2 / variance log_a2 = (mu_hi2 - 1) * rdp_hi2 # Make sure q is in the increasing wrt q range and A is positive. if (np.any(mask) and logq <= log_a2 - mu_hi2 * (math.log(1 + 1 / (mu_hi1 - 1)) + math.log(1 + 1 / (mu_hi2 - 1))) and -logq > rdp_hi2): # Use log1p(x) = log(1 + x) to avoid catastrophic cancellations when x ~ 0. log1q = _log1mexp(logq) # log1q = log(1-q) log_a = (alpha - 1) * (log1q - _log1mexp( (logq + rdp_hi2) * (1 - 1 / mu_hi2))) log_b = (alpha - 1) * (rdp_hi1 - logq / (mu_hi1 - 1)) # Use logaddexp(x, y) = log(e^x + e^y) to avoid overflow for large x, y. log_s1 = utils.stable_logsumexp_two(log1q + log_a, logq + log_b) log_s = np.logaddexp(log1q + log_a, logq + log_b) ret[mask] = np.minimum(ret, log_s / (alpha - 1))[mask] # print('alpha ={} mask {}'.format(alpha,ret)) if ret[mask] < 0: print('negative ret', ret) print('log_s1 ={} log_s = {}'.format(log_s1, log_s)) print('alpha = {} mu_hi1 ={}'.format(alpha, mu_hi1)) print('log1q = {} log_a = {} log_b={} log_s = {}'.format( log1q, log_a, log_b, log_s)) ret[mask] = 1. / (sigma**2) * alpha # print('replace ret with', ret) assert np.all(ret >= 0) if np.isscalar(alpha): return np.asscalar(ret) else: return ret
def __call__(self, hyp, cs, r_prev, new_chunk=False): """Compute CTC prefix scores for next labels. Args: hyp (List): prefix label sequence cs (np.ndarray): array of next labels. A tensor of size `[beam_width]` r_prev (np.ndarray): previous CTC state `[T, 2]` Returns: ctc_scores (np.ndarray): `[beam_width]` ctc_states (np.ndarray): `[beam_width, T, 2]` """ beam_width = len(cs) # initialize CTC states ylen = len(hyp) - 1 # ignore sos # new CTC states are prepared as a frame x (n or b) x n_labels tensor # that corresponds to r_t^n(h) and r_t^b(h). r = np.ndarray((self.xlen, 2, beam_width), dtype=np.float32) xs = self.log_probs[:, cs] if ylen == 0: r[0, 0] = xs[0] r[0, 1] = self.log0 else: r[ylen - 1] = self.log0 # Initialize CTC state for the new chunk if new_chunk and self.xlen_prev > 0: xlen_prev = r_prev.shape[0] r_new = np.full((self.xlen - xlen_prev, 2), self.log0, dtype=np.float32) r_new[0, 1] = r_prev[xlen_prev - 1, 1] + self.log_probs[xlen_prev, self.blank] for i in range(xlen_prev + 1, self.xlen): r_new[i - xlen_prev, 1] = r_new[i - xlen_prev - 1, 1] + self.log_probs[i, self.blank] r_prev = np.concatenate([r_prev, r_new], axis=0) # prepare forward probabilities for the last label r_sum = np.logaddexp(r_prev[:, 0], r_prev[:, 1]) # log(r_t^n(g) + r_t^b(g)) last = hyp[-1] if ylen > 0 and last in cs: log_phi = np.ndarray((self.xlen, beam_width), dtype=np.float32) for k in range(beam_width): log_phi[:, k] = r_sum if cs[k] != last else r_prev[:, 1] else: log_phi = r_sum # `[T]` # compute forward probabilities log(r_t^n(h)), log(r_t^b(h)), # and log prefix probabilities log(psi) start = max(ylen, 1) log_psi = r[start - 1, 0] for t in range(start, self.xlen): # non-blank r[t, 0] = np.logaddexp(r[t - 1, 0], log_phi[t - 1]) + xs[t] # blank r[t, 1] = np.logaddexp(r[t - 1, 0], r[t - 1, 1]) + self.log_probs[t, self.blank] log_psi = np.logaddexp(log_psi, log_phi[t - 1] + xs[t]) # get P(...eos|X) that ends with the prefix itself eos_pos = np.where(cs == self.eos)[0] if len(eos_pos) > 0: log_psi[eos_pos] = r_sum[-1] # log(r_T^n(g) + r_T^b(g)) # return the log prefix probability and CTC states, where the label axis # of the CTC states is moved to the first axis to slice it easily return log_psi, np.rollaxis(r, 2)
def forward(self, x): """ Implementation of softplus. Overflow avoided by use of the logaddexp function. self._lower is added before returning. """ return np.logaddexp(0, x) + self._lower
def __call__(self, y, pred): """Compute the deviance (= 2 * negative log-likelihood). """ # logaddexp(0, v) == log(1.0 + exp(v)) pred = pred.ravel() return -2.0 * np.mean((y * pred) - np.logaddexp(0.0, pred))
def refine_probs(self): """ refine_probs() Improve the estimated probabilities used by working with the full set of data allocated to each node, rather than just the initial sub-set used to create/split nodes. """ # travel up from leaves improving log_rk etc. for level_it in range(len(self.assignments) - 1, -1, -1): # print(level_it, self.nodes[level_it].keys()) for node_it in self.nodes[level_it]: node = self.nodes[level_it][node_it] if node.tree_terminated: if node.nk > 1: # log_rk, etc are accurate node.log_dk = node.true_bhc.root_node.log_dk node.log_pi = node.true_bhc.root_node.log_pi node.logp = node.true_bhc.root_node.logp node.log_ml = node.true_bhc.root_node.log_ml node.log_rk = node.true_bhc.root_node.log_rk else: node.log_dk = self.crp_alpha node.log_pi = 0. node.logp = self.data_model.log_marginal_likelihood( node.data) node.log_ml = node.logp node.log_rk = 0. elif node.truncation_terminated: node.log_dk = (math.log(self.crp_alpha) + math.lgamma(node.nk)) node.log_pi = 0. node.logp = self.data_model.log_marginal_likelihood( node.data) node.log_ml = node.logp node.log_rk = 0. else: left_child = self.nodes[level_it + 1][node_it * 2] right_child = self.nodes[level_it + 1][node_it * 2 + 1] node.log_dk = np.logaddexp( math.log(self.crp_alpha) + math.lgamma(node.nk), left_child.log_dk + right_child.log_dk) node.log_pi = -math.log1p( math.exp(left_child.log_dk + right_child.log_dk - math.log(self.crp_alpha) - math.lgamma(node.nk))) neg_pi = math.log(-math.expm1(node.log_pi)) node.logp = self.data_model.log_marginal_likelihood( node.data) node.log_ml = np.logaddexp( node.log_pi + node.logp, neg_pi + left_child.log_ml + right_child.log_ml) node.log_rk = node.log_pi + node.logp - node.log_ml # travel down from top improving for level_it in range(1, len(self.assignments)): for node_it in self.nodes[level_it]: node = self.nodes[level_it][node_it] parent_node = self.nodes[level_it - 1][int(node_it / 2)] node.prev_wk = (parent_node.prev_wk * (1 - math.exp(parent_node.log_rk)))
def sigmoid(x): if x >= 0: return math.exp(-np.logaddexp(0, -x)) else: return math.exp(x - np.logaddexp(x, 0))
def expectation_step2(self, seqs, no_emissions=False): num_states = self.num_states num_emissions = self.num_emissions expected_transitions = np.zeros((num_states, num_states)) expected_emissions = np.zeros((num_states, num_emissions)) expected_start_count = np.zeros(num_states) if self.laplace: expected_transitions += self.laplace expected_emissions += self.laplace expected_start_count += self.laplace expected_emissions = np.log(expected_emissions) log_emissions = np.log(self.emissions.T) log_transitions = np.log(self.transitions) log_start_prob = np.log(self.start_prob) score = 0 import time for seq in seqs: tic = time.clock() a2, b2 = self.hmm.forward_backward(seq) print(time.clock() - tic) tic = time.clock() fwa, bwa = self.log_fwa_bwa(seq) print(time.clock() - tic) seq_score = logsumexp(fwa[-1]) score += seq_score print(time.clock() - tic) # emissions estimation b = fwa + bwa b -= seq_score print(time.clock() - tic) # transitions estimation # tic = time.clock() temp = np.zeros((len(seq) - 1, num_states, num_states)) for l in range(len(seq) - 1): np.add.outer(fwa[l], bwa[l + 1] + log_emissions[seq[l + 1]], out=temp[l]) temp[l] += log_transitions print(time.clock() - tic) a = logsumexp(temp, 0) a -= seq_score a = np.exp(a) print(time.clock() - tic) # start estimation expected_start_count += np.exp(log_start_prob + log_emissions[seq[0]] + bwa[1] - seq_score) print(time.clock() - tic) if len(seq) == 1: if not no_emissions: expected_emissions[:, seq[0]] = np.logaddexp( expected_emissions[:, seq[0]], b[0]) else: expected_transitions += a[:num_states, :num_states] if not no_emissions: for i, l in enumerate(seq): expected_emissions[:, l] = np.logaddexp( expected_emissions[:, l], b[i]) if no_emissions: expected_emissions = self.emissions else: expected_emissions = np.exp(expected_emissions) return expected_transitions, expected_emissions, expected_start_count, score
def _npSoftplus(self, np_features): np_features = np.asarray(np_features) zero = np.asarray(0).astype(np_features.dtype) return np.logaddexp(zero, np_features)
def ctc_beam_decode_chunk(self, log_probs_seq, cutoff_prob=0.99, cutoff_top_n=10, beam_size=20, **kwargs): ## extend prefix in loop for logit in log_probs_seq: # turn logit to prob if not len(logit) == len(self.vocabulary): raise ValueError( "The shape of prob_seq does not match with the " "shape of the vocabulary.") # prefix_set_next: the set containing candidate prefixes # probs_b_cur: prefixes' probability ending with blank in current step # probs_nb_cur: prefixes' probability ending with non-blank in current step prefix_set_next, log_probs_b_cur, log_probs_nb_cur = {}, {}, {} log_prob_idx = self._get_pruned_log_probs(logit, cutoff_prob, cutoff_top_n) for l in self.prefix_set_prev: if l not in prefix_set_next: log_probs_b_cur[l], log_probs_nb_cur[l] = -float( "INF"), -float("INF") _, org_state, org_word, org_count = self.prefix_set_prev[l] # extend prefix by travering prob_idx for c, log_prob_c in log_prob_idx: cur_prefix_state, cur_last_word, cur_word_count = ( org_state, org_word, org_count) if c == self.blank_id: log_probs_b_cur[l] = np.logaddexp(log_probs_b_cur[l], \ log_prob_c + np.logaddexp(self.log_probs_b_prev[l],\ self.log_probs_nb_prev[l])) else: last_char = l[-1] new_char = self.vocabulary[c] l_plus = l + new_char if l_plus not in prefix_set_next: log_probs_b_cur[l_plus], log_probs_nb_cur[ l_plus] = -float("INF"), -float("INF") if new_char == last_char: log_probs_nb_cur[l_plus] = np.logaddexp( log_probs_nb_cur[l_plus], log_prob_c + self.log_probs_b_prev[l]) log_probs_nb_cur[l] = np.logaddexp( log_probs_nb_cur[l], log_prob_c + self.log_probs_nb_prev[l]) elif new_char == ' ' or self.char_based_lm: if cur_last_word != '': #print("**"+cur_last_word+"**") cur_word_count = cur_word_count + 1 if (self.ext_scoring_func is None) or (len(l) == 1): log_score = 0.0 else: log_score, cur_prefix_state = \ self.ext_scoring_func(cur_last_word, cur_prefix_state, cur_word_count, log=True) if not self.char_based_lm: cur_last_word = '' log_probs_nb_cur[l_plus] = np.logaddexp( log_probs_nb_cur[l_plus], log_score + log_prob_c + np.logaddexp(self.log_probs_b_prev[l], self.log_probs_nb_prev[l])) else: cur_last_word = cur_last_word + new_char log_probs_nb_cur[l_plus] = np.logaddexp( log_probs_nb_cur[l_plus], log_prob_c + np.logaddexp(self.log_probs_b_prev[l], self.log_probs_nb_prev[l])) if self.char_based_lm: cur_last_word = new_char # add l_plus into prefix_set_next prefix_set_next[l_plus] = (np.logaddexp( log_probs_nb_cur[l_plus], log_probs_b_cur[l_plus]), cur_prefix_state, cur_last_word, cur_word_count) # add l into prefix_set_next prefix_set_next[l] = (np.logaddexp(log_probs_b_cur[l], log_probs_nb_cur[l]), org_state, org_word, org_count) # update probs self.log_probs_b_prev, self.log_probs_nb_prev = log_probs_b_cur, log_probs_nb_cur ## store top beam_size prefixes self.prefix_set_prev = sorted(prefix_set_next.items(), key=lambda asd: asd[1][0], reverse=True) if beam_size < len(self.prefix_set_prev): self.prefix_set_prev = self.prefix_set_prev[:beam_size] self.prefix_set_prev = dict(self.prefix_set_prev) beam_result = [] for seq, cur_total_state in self.prefix_set_prev.items(): log_prob, state, last_word, word_count = cur_total_state if log_prob > float("-INF") and len(seq) > 1: # score last word by external scorer if (self.ext_scoring_func is not None) and (last_word != ' '): if last_word != '': word_count += 1 log_prob, _ = self.ext_scoring_func(last_word, state, word_count, log=True) beam_result.append((log_prob, seq.lstrip())) else: beam_result.append((float("-INF"), '')) ## output top beam_size decoding results beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True) return beam_result[0][1]
def combine_results(saved_logl, saved_nodeids, pointpile, main_iterator, mpi_comm=None): """Combine a sequence of likelihoods and nodes into a summary dictionary. Parameters ---------- saved_logl: list of floats loglikelihoods of dead points saved_nodeids: list of ints indices of dead points pointpile: :pyclass:PointPile Point pile. main_iterator: :pyclass:BreadthFirstIterator iterator used mpi_comm: MPI communicator object, or None if MPI is not used. Returns -------- results: dict All information of the run. Important keys: Number of nested sampling iterations (niter), Evidence estimate (logz), Effective Sample Size (ess), H (information gain), weighted samples (weighted_samples), equally weighted samples (samples), best-fit point information (maximum_likelihood), posterior summaries (posterior). The rank order test score (insertion_order_MWW_test) is included if the iterator has it. """ assert np.shape(main_iterator.logweights) == ( len(saved_logl), len(main_iterator.all_logZ)), (np.shape(main_iterator.logweights), np.shape(saved_logl), np.shape(main_iterator.all_logZ)) saved_logl = np.array(saved_logl) saved_u = pointpile.getu(saved_nodeids) saved_v = pointpile.getp(saved_nodeids) saved_logwt = np.array(main_iterator.logweights) saved_logwt0 = saved_logwt[:, 0] saved_logwt_bs = saved_logwt[:, 1:] logZ_bs = main_iterator.all_logZ[1:] assert len(saved_logwt_bs) == len(saved_nodeids), (saved_logwt_bs.shape, len(saved_nodeids)) if mpi_comm is not None: # spread logZ_bs, saved_logwt_bs recv_saved_logwt_bs = mpi_comm.gather(saved_logwt_bs, root=0) recv_saved_logwt_bs = mpi_comm.bcast(recv_saved_logwt_bs, root=0) saved_logwt_bs = np.concatenate(recv_saved_logwt_bs, axis=1) recv_logZ_bs = mpi_comm.gather(logZ_bs, root=0) recv_logZ_bs = mpi_comm.bcast(recv_logZ_bs, root=0) logZ_bs = np.concatenate(recv_logZ_bs) saved_wt_bs = exp(saved_logwt_bs + saved_logl.reshape((-1, 1)) - logZ_bs) saved_wt0 = exp(saved_logwt0 + saved_logl - main_iterator.all_logZ[0]) # compute fraction in tail w = saved_wt0 / saved_wt0.sum() assert np.isclose(w.sum() - 1, 0), w.sum() ess = len(w) / (1.0 + ((len(w) * w - 1)**2).sum() / len(w)) tail_fraction = w[np.asarray(main_iterator.istail)].sum() if tail_fraction != 0: logzerr_tail = logaddexp( log(tail_fraction) + main_iterator.logZ, main_iterator.logZ) - main_iterator.logZ else: logzerr_tail = 0 logzerr_bs = (logZ_bs - main_iterator.logZ).max() logzerr_total = (logzerr_tail**2 + logzerr_bs**2)**0.5 samples = resample_equal(saved_v, w) ndim = saved_u.shape[1] information_gain_bits = [] for i in range(ndim): H, _ = np.histogram(saved_u[:, i], weights=saved_wt0, density=True, bins=np.linspace(0, 1, 40)) information_gain_bits.append( float((np.log2(1 / ((H + 0.001) * 40)) / 40).sum())) j = saved_logl.argmax() results = dict( niter=len(saved_logl), logz=main_iterator.logZ, logzerr=logzerr_total, logz_bs=logZ_bs.mean(), logz_single=main_iterator.logZ, logzerr_tail=logzerr_tail, logzerr_bs=logzerr_bs, ess=ess, H=main_iterator.all_H[0], Herr=main_iterator.all_H.std(), posterior=dict( mean=samples.mean(axis=0).tolist(), stdev=samples.std(axis=0).tolist(), median=np.percentile(samples, 50, axis=0).tolist(), errlo=np.percentile(samples, 15.8655, axis=0).tolist(), errup=np.percentile(samples, 84.1345, axis=0).tolist(), information_gain_bits=information_gain_bits, ), weighted_samples=dict(upoints=saved_u, points=saved_v, weights=saved_wt0, logw=saved_logwt0, bootstrapped_weights=saved_wt_bs, logl=saved_logl), samples=samples, maximum_likelihood=dict( logl=saved_logl[j], point=saved_v[j, :].tolist(), point_untransformed=saved_u[j, :].tolist(), ), ) if getattr(main_iterator, 'check_insertion_order', False): results['insertion_order_MWW_test'] = dict( independent_iterations=main_iterator.insertion_order_runlength, converged=main_iterator.insertion_order_converged, ) return results
def reference_sigmoid(x): return np.exp(-np.logaddexp(0, -x))
def ctc_beam_search_decoder(log_probs_seq, lm_scorer=None, beam_size=100, blank=0, cutoff_prob=1.0, cutoff_top_n=None): """ Performs prefix beam search on the output of a CTC network. Args: log_probs_seq (tensor): The log probabilities. Should be a 2D array (timesteps x alphabet_size) lm_scorer (func): Language model function. Should take as input a string and output a probability. beam_size (int): The beam width. Will keep the `beam_size` most likely candidates at each timestep. blank (int): Blank label index cutoff_prob: Cutoff probability for pruning. Defaults to `1.0`, meaning no pruning cutoff_top_n: Cutoff number for pruning. Retruns: string: The decoded CTC output. """ T, V = log_probs_seq.shape log_cutoff_prob = math.log(cutoff_prob) cutoff_top_n = min(cutoff_top_n, V) if cutoff_top_n else V beams = Beams(is_valid=lm_scorer.is_valid if lm_scorer else None) for t in range(T): log_probs = log_probs_seq[t] curr_beams = list(beams.sort()) # A default dictionary to store the next step candidates. num_prefixes = len(curr_beams) min_cutoff = -float('inf') full_beam = False if lm_scorer: # score - beta # it will not insert a new word or character min_cutoff = curr_beams[-1][-1].score_ctc + log_probs[blank] full_beam = num_prefixes == beam_size # Prunning step pruned_indexes = torch.arange(len(log_probs)).tolist() if log_cutoff_prob < 0.0 or cutoff_top_n < V: idxs = torch.argsort(log_probs, descending=True) n_idxs = min( (logcumsumexp(log_probs[idxs], 0) <= log_cutoff_prob).sum(), cutoff_top_n, V) pruned_indexes = idxs[:n_idxs].tolist() for token_index in pruned_indexes: p = log_probs[token_index].item() # The variables p_b and p_nb are respectively the # probabilities for the prefix given that it ends in a # blank and does not end in a blank at this time step. for prefix, beam in curr_beams: p_b, p_nb = beam.p_b, beam.p_nb if full_beam and p + beam.score_ctc < min_cutoff: break # If we propose a blank the prefix doesn't change. Only the probability of ending # in blank gets updated. if token_index == blank: beam.n_p_b = np.logaddexp(beam.n_p_b, beam.score_ctc + p) continue # Extend the prefix by the new character s and add it to the beam[' Only'] the # probability of not ending in blank gets updated. last_token_index = prefix[-1] if prefix else None if token_index == last_token_index: # If s is repeated at the end we also update the unchanged prefix. This is the # merging case. beam.n_p_nb = np.logaddexp(beam.n_p_nb, p_nb + p) n_prefix = prefix + (token_index, ) # Must update state for prefix search n_beam = beams.getitem(n_prefix, p=p, previous_beam=beam) if not n_beam: continue n_p_nb = n_beam.n_p_nb if token_index == last_token_index and p_b > -float('inf'): # We don't include the previous probability of not ending in blank (p_nb) # if s is repeated at the end. The CTC algorithm merges characters not # separated by a blank. n_p_nb = np.logaddexp(n_p_nb, p_b + p) elif token_index != last_token_index: n_p_nb = np.logaddexp(n_p_nb, beam.score_ctc + p) if lm_scorer: # LM scorer has access and updates the state variabl p_lm = lm_scorer(n_prefix, n_beam.state) n_beam.score_lm = beam.score_lm + p_lm n_beam.n_p_nb = n_p_nb # Update the probabilities beams.step() # Trim the beam before moving on to the next time-step. beams.topk_(beam_size) # score the eos if lm_scorer: for prefix, beam in beams.items(): if prefix: p_lm = lm_scorer(prefix, beam.state, eos=True) beam.score_lm += p_lm beam.score = beam.score_ctc + beam.score_lm # Return the top beam_size -log probabilities without the lm scoring # return [(-beam['score_ctc'], p, beam['timesteps']) for p, beam in beams.sort()] return [(-beam.score, p, beam.timesteps) for p, beam in beams.sort()]
sampler.reset() sampler.run_mcmc(pos, 1500); labels = ["$m$", "$b$", "$Q$", "$M$", "$\ln V$"] truths = true_params + [true_frac, true_outliers[0], np.log(true_outliers[1])] corner.corner(sampler.flatchain, bins=35, labels=labels, truths=truths) plt.show() '''Doing posteriors''' norm = 0.0 post_prob = np.zeros(len(x)) for i in range(sampler.chain.shape[1]): for j in range(sampler.chain.shape[0]): ll_fg, ll_bg = sampler.blobs[i][j] post_prob += np.exp(ll_fg - np.logaddexp(ll_fg, ll_bg)) norm += 1 post_prob /= norm fig1, ax1 = plt.subplots() # Plot the data points. ax1.errorbar(x, y, yerr=yerr, fmt=",k", ms=0, capsize=0, lw=1, zorder=999) # Plot the (true) outliers. ax1.scatter(x[m_bkg], y[m_bkg], marker="s", s=22, c=post_prob[m_bkg], cmap="gray_r", vmin=0, vmax=1, zorder=1000) # Plot the (true) good points. ax1.scatter(x[~m_bkg], y[~m_bkg], marker="o", s=22, c=post_prob[~m_bkg], cmap="gray_r", vmin=0, vmax=1, zorder=1000) # Plot the true line. ax1.plot(x0, y0, color="k", lw=1.5) ax1.set_xlabel("$x$")
def compute_log_partition(self, ax, bx): a = ax + self.a b = bx + self.b A = 0.5 * (b**2 / a - self.b**2 / self.a + np.log(self.a / a)) logZ = np.sum(np.logaddexp(np.log(1 - self.rho), np.log(self.rho) + A)) return logZ
def _beam_search(self, hyps, helper, scores_ctc, lm, lp_weight): beam_width = helper.beam_width lm_weight = helper.lm_weight merge_prob = True # scores_ctc: `[T, vocab]` for t in range(scores_ctc.size(0)): # Pick up the top-k scores _, topk_ids = torch.topk( scores_ctc[t, 1:], # exclude blank k=min(beam_width, self.vocab), dim=-1, largest=True, sorted=True) topk_ids += 1 # index:0 is for blank # bachfy all hypotheses (not in the cache, non-blank) for LM batch_hyps = [beam for beam in hyps if beam['update_lm']] if len(batch_hyps) > 0: ys = scores_ctc.new_zeros((len(batch_hyps), 1), dtype=torch.int64) for i, beam in enumerate(batch_hyps): ys[i] = beam['hyp'][-1] # Update LM states for shallow fusion _, lmstates, scores_lm = helper.update_rnnlm_state_batch( lm, batch_hyps, ys) hyp_ids_strs = [beam['hyp_ids_str'] for beam in hyps] for i, beam in enumerate(batch_hyps): lmstate = { 'hxs': lmstates['hxs'][:, i:i + 1], 'cxs': lmstates['cxs'][:, i:i + 1] } if lmstates is not None else None index = hyp_ids_strs.index(beam['hyp_ids_str']) hyps[index]['lmstate'] = lmstate if lm is not None: hyps[index]['next_scores_lm'] = scores_lm[i:i + 1] else: hyps[index]['next_scores_lm'] = None assert hyps[index]['update_lm'] hyps[index]['update_lm'] = False # register to cache self.state_cache[beam['hyp_ids_str']] = { 'next_scores_lm': hyps[index]['next_scores_lm'], 'lmstate': lmstate, } new_hyps = [] for j, beam in enumerate(hyps): p_b = beam['p_b'] p_nb = beam['p_nb'] total_score_lm = beam['score_lm'] # case 1. hyp is not extended new_p_b = np.logaddexp(p_b + scores_ctc[t, self.blank].item(), p_nb + scores_ctc[t, self.blank].item()) if len(beam['hyp'][1:]) > 0: new_p_nb = p_nb + scores_ctc[t, beam['hyp'][-1]].item() else: new_p_nb = LOG_0 total_score_ctc = np.logaddexp(new_p_b, new_p_nb) total_score_lp = len(beam['hyp'][1:]) * lp_weight total_score = total_score_ctc + total_score_lp + total_score_lm * lm_weight new_hyps.append({ 'hyp': beam['hyp'][:], 'hyp_ids_str': beam['hyp_ids_str'], 'score': total_score, 'p_b': new_p_b, 'p_nb': new_p_nb, 'score_ctc': total_score_ctc, 'score_lm': total_score_lm, 'score_lp': total_score_lp, 'next_scores_lm': beam['next_scores_lm'], 'lmstate': beam['lmstate'], 'update_lm': False }) # case 2. hyp is extended new_p_b = LOG_0 for k in range(beam_width): idx = topk_ids[k].item() p_t = scores_ctc[t, idx].item() c_prev = beam['hyp'][-1] if len(beam['hyp']) > 1 else None if idx == c_prev: new_p_nb = p_b + p_t # TODO(hirofumi): apply character LM here else: new_p_nb = np.logaddexp(p_b + p_t, p_nb + p_t) # TODO(hirofumi): apply character LM here if idx == self.space: pass # TODO(hirofumi): apply word LM here total_score_ctc = np.logaddexp(new_p_b, new_p_nb) total_score_lp = (len(beam['hyp'][1:]) + 1) * lp_weight total_score = total_score_ctc + total_score_lp if lm is not None: total_score_lm += beam['next_scores_lm'][0, 0, idx].item() total_score += total_score_lm * lm_weight hyp_ids = beam['hyp'] + [idx] hyp_ids_str = ' '.join(list(map(str, hyp_ids))) exist_cache = hyp_ids_str in self.state_cache.keys() if exist_cache: # from cache scores_lm = self.state_cache[hyp_ids_str][ 'next_scores_lm'] lmstate = self.state_cache[hyp_ids_str]['lmstate'] else: # LM will be updated later scores_lm = None lmstate = beam['lmstate'] new_hyps.append({ 'hyp': hyp_ids, 'hyp_ids_str': hyp_ids_str, 'score': total_score, 'p_b': new_p_b, 'p_nb': new_p_nb, 'score_ctc': total_score_ctc, 'score_lm': total_score_lm, 'score_lp': total_score_lp, 'next_scores_lm': scores_lm, 'lmstate': lmstate, 'update_lm': not exist_cache }) # Pruning new_hyps = sorted(new_hyps, key=lambda x: x['score'], reverse=True) new_hyps = helper.merge_ctc_path(new_hyps, merge_prob) hyps = new_hyps[:beam_width] return hyps, new_hyps
a + b # %% M = np.ones((3, 2)) a = np.arange(3) # %% # M + a # %% a[:, np.newaxis].shape M + a[:, np.newaxis] # %% np.logaddexp(M, a[:, np.newaxis]) # %% X = np.random.random((10, 3)) # %% Xmean = X.mean(0) Xmean # %% X_centered = X - Xmean # %% X_centered.mean(0) # %%
def test_nan(self): assert_(np.isnan(np.logaddexp(np.nan, np.inf))) assert_(np.isnan(np.logaddexp(np.inf, np.nan))) assert_(np.isnan(np.logaddexp(np.nan, 0))) assert_(np.isnan(np.logaddexp(0, np.nan))) assert_(np.isnan(np.logaddexp(np.nan, np.nan)))
def softPlus(x): return np.logaddexp(0, x)
def numeric(self, values): """Evaluates e^x elementwise, adds 1, and takes the log. """ return np.logaddexp(0, values[0])
def __call__(self, y, pred): """Compute the deviance (= negative log-likelihood). """ # logaddexp(0, v) == log(1.0 + exp(v)) pred = pred.ravel() return np.sum(np.logaddexp(0.0, -2 * y * pred)) / y.shape[0]
def log_prob(self): y, x = self.data, self.mock.get() return -np.logaddexp(np.zeros(len(x)), self.alpha * (x - y)).sum()
def time_sync_decoding(self, enc_out: torch.Tensor) -> List[Hypothesis]: """Time synchronous beam search implementation. Based on https://ieeexplore.ieee.org/document/9053040 Args: enc_out: Encoder output sequence. (T, D) Returns: nbest_hyps: N-best hypothesis. """ beam = min(self.beam_size, self.vocab_size) beam_state = self.decoder.init_state(beam) B = [ Hypothesis( yseq=[self.blank_id], score=0.0, dec_state=self.decoder.select_state(beam_state, 0), ) ] cache = {} if self.use_lm: B[0].lm_state = self.lm.zero_state() for enc_out_t in enc_out: A = [] C = B enc_out_t = enc_out_t.unsqueeze(0) for v in range(self.max_sym_exp): D = [] beam_dec_out, beam_state, beam_lm_tokens = self.decoder.batch_score( C, beam_state, cache, self.use_lm, ) beam_logp = torch.log_softmax( self.joint_network(enc_out_t, beam_dec_out), dim=-1, ) beam_topk = beam_logp[:, 1:].topk(beam, dim=-1) seq_A = [h.yseq for h in A] for i, hyp in enumerate(C): if hyp.yseq not in seq_A: A.append( Hypothesis( score=(hyp.score + float(beam_logp[i, 0])), yseq=hyp.yseq[:], dec_state=hyp.dec_state, lm_state=hyp.lm_state, )) else: dict_pos = seq_A.index(hyp.yseq) A[dict_pos].score = np.logaddexp( A[dict_pos].score, (hyp.score + float(beam_logp[i, 0]))) if v < (self.max_sym_exp - 1): if self.use_lm: beam_lm_scores, beam_lm_states = self.lm.batch_score( beam_lm_tokens, [c.lm_state for c in C], None) for i, hyp in enumerate(C): for logp, k in zip(beam_topk[0][i], beam_topk[1][i] + 1): new_hyp = Hypothesis( score=(hyp.score + float(logp)), yseq=(hyp.yseq + [int(k)]), dec_state=self.decoder.select_state( beam_state, i), lm_state=hyp.lm_state, ) if self.use_lm: new_hyp.score += self.lm_weight * beam_lm_scores[ i, k] new_hyp.lm_state = beam_lm_states[i] D.append(new_hyp) C = sorted(D, key=lambda x: x.score, reverse=True)[:beam] B = sorted(A, key=lambda x: x.score, reverse=True)[:beam] return self.sort_nbest(B)
def smooth_f(x, a=1): return np.abs((x + a * np.logaddexp(x / a, -x / a)) / 2.0)
def offline_changepoint_detection(data, prior_function, log_likelihood_class, truncate: int = -40): """ Compute the likelihood of changepoints on data. Parameters: data -- the time series data truncate -- the cutoff probability 10^truncate to stop computation for that changepoint log likelihood Outputs: P -- the log-likelihood of a datasequence [t, s], given there is no changepoint between t and s Q -- the log-likelihood of data Pcp -- the log-likelihood that the i-th changepoint is at time step t. To actually get the probility of a changepoint at time step t sum the probabilities. """ # Set up the placeholders for each parameter n = len(data) Q = np.zeros((n, )) g = np.zeros((n, )) G = np.zeros((n, )) P = np.ones((n, n)) * -np.inf # save everything in log representation for t in range(n): g[t] = prior_function(t) if t == 0: G[t] = g[t] else: G[t] = np.logaddexp(G[t - 1], g[t]) P[n - 1, n - 1] = log_likelihood_class.pdf(data, t=n - 1, s=n) Q[n - 1] = P[n - 1, n - 1] for t in reversed(range(n - 1)): P_next_cp = -np.inf # == log(0) for s in range(t, n - 1): P[t, s] = log_likelihood_class.pdf(data, t=t, s=s + 1) # compute recursion summand = P[t, s] + Q[s + 1] + g[s + 1 - t] P_next_cp = np.logaddexp(P_next_cp, summand) # truncate sum to become approx. linear in time (see # Fearnhead, 2006, eq. (3)) if summand - P_next_cp < truncate: break P[t, n - 1] = log_likelihood_class.pdf(data, t=t, s=n) # (1 - G) is numerical stable until G becomes numerically 1 if G[n - 1 - t] < -1e-15: # exp(-1e-15) = .99999... antiG = np.log(1 - np.exp(G[n - 1 - t])) else: # (1 - G) is approx. -log(G) for G close to 1 antiG = np.log(-G[n - 1 - t]) Q[t] = np.logaddexp(P_next_cp, P[t, n - 1] + antiG) Pcp = np.ones((n - 1, n - 1)) * -np.inf for t in range(n - 1): Pcp[0, t] = P[0, t] + Q[t + 1] + g[t] - Q[0] if np.isnan(Pcp[0, t]): Pcp[0, t] = -np.inf for j in range(1, n - 1): for t in range(j, n - 1): tmp_cond = (Pcp[j - 1, j - 1:t] + P[j:t + 1, t] + Q[t + 1] + g[0:t - j + 1] - Q[j:t + 1]) Pcp[j, t] = logsumexp(tmp_cond.astype(np.float32)) if np.isnan(Pcp[j, t]): Pcp[j, t] = -np.inf return Q, P, Pcp