def run_forward(self, initial_scores, transition_scores, final_scores, emission_scores, length, N): """ Forward trellis scores.""" #try an alternative vectorized implementation of the loops here that eliminates the loop over "current_state"... forward = np.zeros([length, N], 'f') + logzero() #Initialization forward[0, :] = emission_scores[0, :] + initial_scores #Forward loop for pos in range(1, length): #in log: sum the forward scores of previous position states and transition scores from previous state # up to the current state; logsum all this to obtain a scalar. #logsum over the rows forward_last = project_kbest( forward[pos - 1, :]) if self.approximate else forward[pos - 1, :] for current_state in range(N): forward[pos, current_state] = sselogsum( forward_last + transition_scores[pos - 1, current_state, :]) #add emission forward[pos, :] += emission_scores[pos, :] #Termination ll = sselogsum(project_kbest(forward[length - 1, :]) + final_scores) if self.approximate else \ sselogsum(forward[length - 1, :] + final_scores) return ll, forward
def get_loss(self, N, forward, backward, ll, norm_emission_counts): """ :param forward: forward trellis :param backward: backward trellis :param ll: log likelihood of the current sequence :param emission_scores: for access to state posteriors needed for weighting backward merges :param N: number of split states """ logger = logging.getLogger(__name__) loss_seq = np.zeros(N / 2) # there are N/2 possible merges assert forward.shape[1] == N n_merge = 0 # iterate over possible merges for i in range(len(loss_seq)): i_merge = i + n_merge # prepare trellis #forward_merge = np.zeros((forward.shape[0], forward.shape[1]-1), 'f') + logzero() forward_to_merge = forward[:, i_merge:i_merge + 2] sum_split_forward = np.zeros((forward.shape[0], 1), 'f') + logzero() for row_n, row in enumerate(forward_to_merge): # sum split states sum_split_forward[row_n] = sselogsum(row) forward_merge = np.hstack((forward[:, :i_merge], sum_split_forward, forward[:, i_merge + 2:])) backward_to_merge = backward[:, i_merge:i_merge + 2] sum_split_backward = np.zeros((backward.shape[0], 1), 'f') + logzero() # incorporate weights for each element (Petrov 2009, p. 89) # weights are normalized emission counts # accumulated from state posteriors over all sequences assert backward.shape[1] == N backward_to_merge += norm_emission_counts[i_merge:i_merge + 2] for row_n, row in enumerate(backward_to_merge): # sum weighted split states sum_split_backward[row_n] = sselogsum(row) backward_merge = np.hstack((backward[:, :i_merge], sum_split_backward, backward[:, i_merge + 2:])) ll_merged_positions = np.zeros(forward_merge.shape[0], 'f') + logzero() fb_merge = forward_merge + backward_merge for row_n, row in enumerate(fb_merge): ll_merged_positions[row_n] = sselogsum(row) # likelihood for one merge at all t's #for row_n in range(forward_merge.shape[0]): # ll_merged_positions[row_n] = sselogsum(forward_merge[row_n] + backward_merge[row_n]) # get loss (difference in likelihoods) #assert np.all(ll_merged_positions < ll) #assert np.all((ll_merged_positions < ll).astype(int) | # (np.isclose(ll_merged_positions, ll, rtol=0.01)).astype(int)) loss_seq[i] = (ll_merged_positions - ll).sum() n_merge += 1 return loss_seq
def pass_msg_down(self, sender, tree, N, receiver): """ downward message is the sum over all states of the product of the downward message from sender's parent, the product of upward messages to the sender from his other children, the sender-receiver edge potential, the sender node potential If the sender is the root, the calculation is somewhat simplified. :param sender: parent :param receiver: child """ down_msg_temp = np.zeros(N, 'f') + logzero() curr_edge = tree.get_edge_by_nodes(sender, receiver) # can be zero if there's not children product_child = sum([ tree.get_edge_by_nodes(sender, c).up_msg for c in sender.get_children() if c != receiver ]) if sender.is_root(): down_msg_temp = curr_edge.potentials + product_child else: prev_edge = tree.get_edge_by_nodes(sender.get_parent(), sender) product = project_kbest( prev_edge.down_msg + product_child ) if self.approximate else prev_edge.down_msg + product_child product += sender.potentials for curr_state in range(N): # TODO optimize down_msg_temp[curr_state] = sselogsum( product + curr_edge.potentials[:, curr_state]) curr_edge.down_msg = down_msg_temp
def pass_msg_down(self, sender, tree, N, receiver): """ downward message is the sum over all states of the product of the downward message from sender's parent, the product of upward messages to the sender from his other children, the sender-receiver edge potential, the sender node potential If the sender is the root, the calculation is somewhat simplified. :param sender: parent :param receiver: child """ down_msg_temp = np.zeros(N, 'f') + logzero() curr_edge = tree.get_edge_by_nodes(sender, receiver) # can be zero if there's not children product_child = sum([tree.get_edge_by_nodes(sender, c).up_msg for c in sender.get_children() if c != receiver]) if sender.is_root(): down_msg_temp = curr_edge.potentials + product_child else: prev_edge = tree.get_edge_by_nodes(sender.get_parent(), sender) product = project_kbest( prev_edge.down_msg + product_child) if self.approximate else prev_edge.down_msg + product_child product += sender.potentials for curr_state in range(N): # TODO optimize down_msg_temp[curr_state] = sselogsum(product + curr_edge.potentials[:, curr_state]) curr_edge.down_msg = down_msg_temp
def run_backward(self, initial_scores, transition_scores, final_scores, emission_scores, length, N): """ Backward trellis scores. """ backward = np.zeros([length, N], 'f') + logzero() #Initialization backward[length - 1, :] = final_scores #Backward loop for pos in range(length - 2, -1, -1): #transition_oldbackward = transition_scores[pos, :, :] + backward[pos+1, :].reshape(-1, 1) #transition_oldbackward += emission_scores[pos+1, :].reshape(-1,1) #backward[pos, :] = np.apply_along_axis(logsum, 0, transition_oldbackward) product = project_kbest(backward[pos + 1, :] + emission_scores[pos + 1, :]) if self.approximate else \ backward[pos + 1, :] + emission_scores[pos + 1, :] for current_state in range(N): backward[pos, current_state] = sselogsum(product + transition_scores[pos, :, current_state]) ll = sselogsum(project_kbest(backward[0, :] + emission_scores[0, :]) + initial_scores) if self.approximate else \ sselogsum(backward[0, :] + emission_scores[0, :] + initial_scores) return ll, backward
def run_backward(self, initial_scores, transition_scores, final_scores, emission_scores, length, N): """ Backward trellis scores. """ backward = np.zeros([length, N], 'f') + logzero() #Initialization backward[length - 1, :] = final_scores #Backward loop for pos in range(length - 2, -1, -1): #transition_oldbackward = transition_scores[pos, :, :] + backward[pos+1, :].reshape(-1, 1) #transition_oldbackward += emission_scores[pos+1, :].reshape(-1,1) #backward[pos, :] = np.apply_along_axis(logsum, 0, transition_oldbackward) product = project_kbest(backward[pos + 1, :] + emission_scores[pos + 1, :]) if self.approximate else \ backward[pos + 1, :] + emission_scores[pos + 1, :] for current_state in range(N): backward[pos, current_state] = sselogsum( product + transition_scores[pos, :, current_state]) ll = sselogsum(project_kbest(backward[0, :] + emission_scores[0, :]) + initial_scores) if self.approximate else \ sselogsum(backward[0, :] + emission_scores[0, :] + initial_scores) return ll, backward
def pass_msg_up(self, tree, sender, receiver, N): """ upward message is the sum over all states of the product of the upward belief of the sender and the sender-receiver edge potential :param sender: child :param receiver: parent """ # edge to store the message curr_edge = tree.get_edge_by_nodes(receiver, sender) up_msg_temp = np.zeros(N, 'f') + logzero() if receiver.is_root(): # edge potential here only Nx1 up_msg_temp = sselogsum(sender.up_belief + curr_edge.potentials) else: for curr_state in range(N): # vectorize! # print("curr_edge.potentials[{}, :]\n{}".format(curr_state, curr_edge.potentials[curr_state, :])) up_msg_temp[curr_state] = sselogsum(sender.up_belief + curr_edge.potentials[curr_state, :]) # curr_edge.up_msg = sparse.csr_matrix(up_msg_temp) curr_edge.up_msg = up_msg_temp
def pass_msg_up(self, tree, sender, receiver, N): """ upward message is the sum over all states of the product of the upward belief of the sender and the sender-receiver edge potential :param sender: child :param receiver: parent """ # edge to store the message curr_edge = tree.get_edge_by_nodes(receiver, sender) up_msg_temp = np.zeros(N, 'f') + logzero() if receiver.is_root(): # edge potential here only Nx1 up_msg_temp = sselogsum(sender.up_belief + curr_edge.potentials) else: for curr_state in range(N): # vectorize! # print("curr_edge.potentials[{}, :]\n{}".format(curr_state, curr_edge.potentials[curr_state, :])) up_msg_temp[curr_state] = sselogsum( sender.up_belief + curr_edge.potentials[curr_state, :]) # curr_edge.up_msg = sparse.csr_matrix(up_msg_temp) curr_edge.up_msg = up_msg_temp
def run_forward(self, initial_scores, transition_scores, final_scores, emission_scores, length, N): """ Forward trellis scores.""" #try an alternative vectorized implementation of the loops here that eliminates the loop over "current_state"... forward = np.zeros([length, N], 'f') + logzero() #Initialization forward[0, :] = emission_scores[0, :] + initial_scores #Forward loop for pos in range(1, length): #in log: sum the forward scores of previous position states and transition scores from previous state # up to the current state; logsum all this to obtain a scalar. #logsum over the rows forward_last = project_kbest(forward[pos - 1, :]) if self.approximate else forward[pos - 1, :] for current_state in range(N): forward[pos, current_state] = sselogsum(forward_last + transition_scores[pos - 1, current_state, :]) #add emission forward[pos, :] += emission_scores[pos, :] #Termination ll = sselogsum(project_kbest(forward[length - 1, :]) + final_scores) if self.approximate else \ sselogsum(forward[length - 1, :] + final_scores) return ll, forward
def project_kbest(v, k_prop=1 / 8): """ Only keep k largest coefficients; remaining elements are set to logzero. Form of regularization. Following Grave et al. 2013 (for a 128-state model they set k=16). So k proportion should be roughly 1/8 of the state size. TODO: use different data structure (sparse array) to bring speed again. :param v: vector :param k_prop: k proportion of states to keep """ assert isinstance(v, np.ndarray) k = k_prop * v.shape[0] k_largest = v.argsort()[-k:] v_approx = np.zeros(v.shape[0], 'f') + logzero() v_approx[k_largest] = v[k_largest] return rescale_projected(v_approx, sselogsum(v))
def rescale_projected(v, total): return v + (total - sselogsum(v))