def run_em_estep(self, batch_pos, batch_lan, batch_sen, epoch): batch_score, batch_root_score, batch_decision_score = self.evaluate_batch_score(batch_pos, batch_sen, self.sentence_trans_param, epoch) batch_score = np.array(batch_score) # log p : batch_size, sentence_length, _, v_c_num batch_root_score = np.array(batch_root_score) batch_decision_score = np.array(batch_decision_score) if self.function_mask: batch_score = self.function_to_mask(batch_score, batch_pos) batch_size, sentence_length, _, v_c_num = batch_score.shape _, _, _, v_d_num, _ = batch_decision_score.shape # partial code is consistent with eisner_for_dmv.py (only child decision without root) batch_score = np.concatenate((np.full((batch_size, 1, sentence_length, v_c_num), -np.inf), batch_score),axis=1)# for eisner batch_score = np.concatenate((np.full((batch_size, sentence_length+1, 1, v_c_num), -np.inf), batch_score),axis=2)# for eisner batch_score[:, 0, 1:, 0] = batch_root_score batch_score[:, 0, 1:, 1] = batch_root_score batch_decision_score = np.concatenate((np.zeros((batch_size, 1, 2, v_d_num, 2)), batch_decision_score), axis=1)#np.concatenate((np.full((batch_size, 1, 2, v_d_num, 2), 0), batch_decision_score), axis=1) inside_batch_score = batch_score.reshape(batch_size, sentence_length+1, sentence_length+1, 1, 1, self.cvalency) inside_batch_decision_score = batch_decision_score.reshape(batch_size, sentence_length+1, 1, 2, self.dvalency, 2) inside_complete_table, inside_incomplete_table, sentence_prob = \ eisner_for_dmv.batch_inside(inside_batch_score, inside_batch_decision_score, self.dvalency, self.cvalency) outside_complete_table, outside_incomplete_table = \ eisner_for_dmv.batch_outside(inside_complete_table, inside_incomplete_table, inside_batch_score, inside_batch_decision_score, self.dvalency, self.cvalency) # update count and samples # self.rule_samples.append(list([h_pos, m_pos, dir, v, sentence_id, language_id, count])) # decision_counter # trans_counter # trans_counter_temp = np.zeros((self.trans_counter.shape[0]+1, self.trans_counter.shape[1]+1, self.trans_counter.shape[2], self.trans_counter.shape[3])) batch_pos_add_root = np.array([[self.trans_counter.shape[0]] + list(i) for i in batch_pos]) batch_likelihood = self.update_pseudo_count(inside_incomplete_table, inside_complete_table, sentence_prob, outside_incomplete_table, outside_complete_table, self.trans_counter, self.root_counter, self.decision_counter, batch_pos_add_root, batch_sen, batch_lan) # self.root_counter += trans_counter_temp[0, 1:,1, 0] # p c d v. direction of root is RIGHT, so dir is 1. # self.trans_counter += trans_counter_temp[1:, 1:, :, :] return batch_likelihood
def run_em_estep(self, batch_pos, batch_lan, batch_sen, trans_counter, decision_counter): # Assign scores to each possible dependency arc batch_score, batch_decision_score = self.evaluate_batch_score( batch_pos, batch_sen, self.language_map, self.languages, None) batch_score = np.array(batch_score) batch_decision_score = np.array(batch_decision_score) # Root can not be taken as child batch_score[:, :, 0, :] = -np.inf # Mask function tags if self.function_mask: batch_score = self.function_to_mask(batch_score, batch_pos) batch_size, sentence_length, _, _ = batch_score.shape inside_batch_score = batch_score.reshape(batch_size, sentence_length, sentence_length, 1, 1, self.cvalency) inside_batch_decision_score = batch_decision_score.reshape( batch_size, sentence_length, 1, 2, self.dvalency, 2) # Compute inside-outside table inside_complete_table, inside_incomplete_table, sentence_prob = \ eisner_for_dmv.batch_inside(inside_batch_score, inside_batch_decision_score, self.dvalency, self.cvalency) outside_complete_table, outside_incomplete_table = \ eisner_for_dmv.batch_outside(inside_complete_table, inside_incomplete_table, inside_batch_score, inside_batch_decision_score, self.dvalency, self.cvalency) # Update counters batch_likelihood, en_like = self.update_pseudo_count( inside_incomplete_table, inside_complete_table, sentence_prob, outside_incomplete_table, outside_complete_table, trans_counter, decision_counter, batch_pos, batch_sen, batch_lan) return batch_likelihood, en_like
def run_em_estep(self, batch_pos, batch_words, batch_sen, trans_counter, decision_counter, lex_counter): batch_score, batch_decision_score = self.evaluate_batch_score(batch_words, batch_pos) batch_score = np.array(batch_score) batch_decision_score = np.array(batch_decision_score) # if self.specified_splitting: # batch_score, batch_decision_score = self.mask_scores(batch_score, batch_decision_score, batch_pos) batch_score[:, :, 0, :, :, :] = -np.inf if self.tag_num > 1: batch_score[:, 0, :, 1:, :, :] = -np.inf if self.function_mask: batch_score = self.function_to_mask(batch_score, batch_pos) inside_complete_table, inside_incomplete_table, sentence_prob = \ eisner_for_dmv.batch_inside(batch_score, batch_decision_score, self.dvalency, self.cvalency) outside_complete_table, outside_incomplete_table = \ eisner_for_dmv.batch_outside(inside_complete_table, inside_incomplete_table, batch_score, batch_decision_score, self.dvalency, self.cvalency) batch_likelihood = self.update_pseudo_count(inside_incomplete_table, inside_complete_table, sentence_prob, outside_incomplete_table, outside_complete_table, trans_counter, decision_counter, lex_counter, batch_pos, batch_words) self.trans_counter = trans_counter self.lex_counter = lex_counter return batch_likelihood