def get_model1_forward(theta, obs_id, fc): global source, target, trellis obs = trellis[obs_id] max_bt = [-1] * len(obs) p_st = 0.0 for t_idx in obs: t_tok = target[obs_id][t_idx] sum_e = float('-inf') max_e = float('-inf') max_s_idx = None sum_sj = float('-inf') for _, s_idx in obs[t_idx]: s_tok = source[obs_id][s_idx] if s_idx is not NULL else NULL e = get_decision_given_context(theta, E_TYPE, decision=t_tok, context=s_tok) sum_e = utils.logadd(sum_e, e) q = log(1.0 / len(obs[t_idx])) sum_sj = utils.logadd(sum_sj, e + q) if e > max_e: max_e = e max_s_idx = s_idx max_bt[t_idx] = (t_idx, max_s_idx) p_st += sum_sj # update fractional counts if fc is not None: for _, s_idx in obs[t_idx]: s_tok = source[obs_id][s_idx] if s_idx is not NULL else NULL e = get_decision_given_context(theta, E_TYPE, decision=t_tok, context=s_tok) delta = e - sum_e event = (E_TYPE, t_tok, s_tok) fc[event] = utils.logadd(delta, fc.get(event, float('-inf'))) return max_bt[:-1], p_st, fc
def accumulate_fc(self, type, alpha, beta, d, S, c=None, q=None, e=None, fc=None): if type == T_TYPE: update = alpha + q + e + beta - S if fc is None: self.fractional_counts[T_TYPE, d, c] = utils.logadd(update, self.fractional_counts.get( (T_TYPE, d, c,), float('-inf'))) else: fc[T_TYPE, d, c] = utils.logadd(update, fc.get((T_TYPE, d, c,), float('-inf'))) elif type == E_TYPE: update = alpha + beta - S # the emission should be included in alpha if fc is None: self.fractional_counts[E_TYPE, d, c] = utils.logadd(update, self.fractional_counts.get( (E_TYPE, d, c,), float('-inf'))) else: fc[E_TYPE, d, c] = utils.logadd(update, fc.get((E_TYPE, d, c,), float('-inf'))) else: raise "Wrong type" if fc is not None: return fc
def accumulate_fc(type, alpha, beta, d, S, c=None, k=None, q=None, e=None, fc=None): if type == T_TYPE: update = alpha + q + e + beta - S fc[T_TYPE, d, c] = utils.logadd(update, fc.get((T_TYPE, d, c,), float('-inf'))) elif type == E_TYPE: update = alpha + beta - S # the emission should be included in alpha fc[E_TYPE, d, c] = utils.logadd(update, fc.get((E_TYPE, d, c,), float('-inf'))) else: raise "Wrong type" return fc
def get_backwards(self, theta, obs_seq, alpha_pi, fc=None): n = len(obs_seq) - 1 # index of last word beta_pi = {(n, END_SYM): 0.0} S = alpha_pi[(n, END_SYM)] # from line 13 in pseudo code fc = self.accumulate_fc(type=E_TYPE, alpha=0.0, beta=S, e=0.0, S=S, d=START_SYM, c=START_SYM, fc=fc) for k in range(n, 0, -1): for v in self.get_possible_states(obs_seq[k]): e = self.get_decision_given_context(theta=theta, type=E_TYPE, decision=obs_seq[k], context=v) # p(obs[k]|v) pb = beta_pi[(k, v)] fc = self.accumulate_fc(type=E_TYPE, alpha=alpha_pi[(k, v)], beta=beta_pi[k, v], e=e, d=obs_seq[k], c=v, S=S, fc=fc) for u in self.get_possible_states(obs_seq[k - 1]): q = self.get_decision_given_context(type=T_TYPE, decision=v, context=u, theta=theta) # p(v|u) fc = self.accumulate_fc(type=T_TYPE, alpha=alpha_pi[k - 1, u], beta=beta_pi[k, v], q=q, e=e, d=v, c=u, S=S, fc=fc) p = q + e beta_p = pb + p # The beta includes the emission probability new_pi_key = (k - 1, u) if new_pi_key not in beta_pi: # implements lines 16 beta_pi[new_pi_key] = beta_p else: beta_pi[new_pi_key] = utils.logadd(beta_pi[new_pi_key], beta_p) alpha_pi[(k - 1, u)] + p + beta_pi[(k, v)] - S if fc is None: return S, beta_pi else: return S, beta_pi, fc
def batch_accumilate_likelihood(result): global data_likelihood, fractional_counts, emp_feat data_likelihood += result[0] fc = result[1] emp_feat += result[2] for k in fc: fractional_counts[k] = utils.logadd(fc[k], fractional_counts.get(k, float('-inf')))
def get_best_seq(theta, obs_id): global source, target, trellis obs = trellis[obs_id] max_bt = [-1] * len(obs) p_st = 0.0 for t_idx in obs: t_tok = target[obs_id][t_idx] sum_e = float('-inf') max_e = float('-inf') max_s_idx = None sum_sj = float('-inf') for _, s_idx in obs[t_idx]: s_tok = source[obs_id][s_idx] if s_idx is not NULL else NULL e = get_decision_given_context(theta, E_TYPE, decision=t_tok, context=s_tok) sum_e = utils.logadd(sum_e, e) q = log(1.0 / len(obs[t_idx])) sum_sj = utils.logadd(sum_sj, e + q) if e > max_e: max_e = e max_s_idx = s_idx max_bt[t_idx] = (t_idx, max_s_idx) p_st += sum_sj return max_bt[:-1], p_st
def get_backwards(theta, obs_id, alpha_pi, fc=None): global max_jump_width, trellis, source, target obs = trellis[obs_id] src = source[obs_id] tar = target[obs_id] n = len(obs) - 1 # index of last word end_state = obs[n][0] beta_pi = {(n, end_state): 0.0} S = alpha_pi[(n, end_state)] # from line 13 in pseudo code fc = accumulate_fc(type=E_TYPE, alpha=0.0, beta=S, e=0.0, S=S, d=BOUNDARY_START, c=BOUNDARY_START, fc=fc) for k in range(n, 0, -1): for v in obs[k]: tk, aj = v t_tok = tar[tk] s_tok = src[aj] if aj is not NULL else NULL e = get_decision_given_context(theta, E_TYPE, decision=t_tok, context=s_tok) pb = beta_pi[(k, v)] fc = accumulate_fc(type=E_TYPE, alpha=alpha_pi[(k, v)], beta=beta_pi[k, v], e=e, S=S, d=t_tok, c=s_tok, fc=fc) for u in obs[k - 1]: tk_1, aj_1 = u t_tok_1 = tar[tk_1] s_tok_1 = src[aj_1] if aj_1 is not NULL else NULL context = aj_1 if model_type == HMM_MODEL: q = get_decision_given_context(theta, T_TYPE, decision=aj, context=context) fc = accumulate_fc(type=T_TYPE, alpha=alpha_pi[k - 1, u], beta=beta_pi[k, v], q=q, e=e, d=aj, c=context, S=S, fc=fc) else: q = log(1.0 / len(obs[k])) # q = 0.0 p = q + e beta_p = pb + p # The beta includes the emission probability new_pi_key = (k - 1, u) if new_pi_key not in beta_pi: # implements lines 16 beta_pi[new_pi_key] = beta_p else: beta_pi[new_pi_key] = utils.logadd(beta_pi[new_pi_key], beta_p) alpha_pi[(k - 1, u)] + p + beta_pi[(k, v)] - S if fc is None: return S, beta_pi else: return S, beta_pi, fc
def get_model1_forward(theta, obs_id, fc=None, ef=None): global source, target, trellis, diagonal_tension obs = trellis[obs_id] m = len(obs) max_bt = [-1] * len(obs) p_st = 0.0 for t_idx in obs: t_tok = target[obs_id][t_idx] sum_e = float('-inf') sum_pei = float('-inf') max_e = float('-inf') max_s_idx = None sum_sj = float('-inf') sum_q = float('-inf') for _, s_idx in obs[t_idx]: n = len(obs[t_idx]) s_tok = source[obs_id][s_idx] if s_idx is not NULL else NULL e = get_decision_given_context(theta, E_TYPE, decision=t_tok, context=s_tok) sum_e = utils.logadd(sum_e, e) if t_tok == BOUNDARY_START or t_tok == BOUNDARY_END: q = 0.0 elif s_tok == NULL: q = np.log(Po) else: # q = get_fast_align_transition(theta, t_idx, s_idx, m - 2, n - 1) az = compute_z(t_idx, m - 2, n - 1, diagonal_tension) / (1 - Po) q = np.log(unnormalized_prob(t_idx, s_idx, m - 2, n - 1, diagonal_tension) / az) sum_pei = utils.logadd(sum_pei, q + e) sum_sj = utils.logadd(sum_sj, e + q) sum_q = utils.logadd(sum_q, q) if e > max_e: max_e = e max_s_idx = s_idx max_bt[t_idx] = (t_idx, max_s_idx) p_st += sum_sj if p_st == float('inf'): pdb.set_trace() # update fractional counts if fc is not None: for _, s_idx in obs[t_idx]: n = len(obs[t_idx]) s_tok = source[obs_id][s_idx] if s_idx is not NULL else NULL e = get_decision_given_context(theta, E_TYPE, decision=t_tok, context=s_tok) if t_tok == BOUNDARY_START or t_tok == BOUNDARY_END: q = 0.0 hijmn = 0.0 elif s_tok == NULL: q = np.log(Po) hijmn = 0.0 else: az = compute_z(t_idx, m - 2, n - 1, diagonal_tension) / (1 - Po) q = np.log(unnormalized_prob(t_idx, s_idx, m - 2, n - 1, diagonal_tension) / az) hijmn = h(t_idx, s_idx, m - 2, n - 1) p_ai = e + q - sum_pei # TODO: times h(j',i,m,n) # p_q = q - sum_q event = (E_TYPE, t_tok, s_tok) fc[event] = utils.logadd(p_ai, fc.get(event, float('-inf'))) ef += (exp(p_ai) * hijmn) return max_bt[:-1], p_st, fc, ef