def viterbi_decode(self, h: FloatTensor, mask: BoolTensor) -> List[List[int]]: """ decode labels using viterbi algorithm :param h: hidden matrix (batch_size, seq_len, num_labels) :param mask: mask tensor of each sequence in mini batch (batch_size, batch_size) :return: labels of each sequence in mini batch """ batch_size, seq_len, _ = h.size() # prepare the sequence lengths in each sequence seq_lens = mask.long().sum(dim=1) # seq_lens = torch.ones(sizeas) # In mini batch, prepare the score # from the start sequence to the first label score = [self.start_trans.data + h[:, 0]] path = [] for t in range(1, seq_len): # extract the score of previous sequence # (batch_size, num_labels, 1) previous_score = score[t - 1].view(batch_size, -1, 1) # extract the score of hidden matrix of sequence # (batch_size, 1, num_labels) h_t = h[:, t].view(batch_size, 1, -1) # extract the score in transition # from label of t-1 sequence to label of sequence of t # self.trans_matrix has the score of the transition # from sequence A to sequence B # (batch_size, num_labels, num_labels) score_t = previous_score + self.trans_matrix + h_t # keep the maximum value # and point where maximum value of each sequence # (batch_size, num_labels) best_score, best_path = score_t.max(1) score.append(best_score) path.append(best_path) # predict labels of mini batch best_paths = [] print(h.size(1)) for i in range(batch_size): best_path = self._viterbi_compute_best_path( i, seq_lens, score, path) pad_path = best_path + [0] * (h.size(1) - len(best_path)) best_paths.append(pad_path) # best_paths = [ # self._viterbi_compute_best_path(i, seq_lens, score, path) # for i in range(batch_size) # ] return torch.LongTensor(best_paths)
def viterbi_decode( self, h: torch.FloatTensor, mask: torch.BoolTensor ) -> List[List[int]]: """ decode labels using viterbi algorithm :param h: hidden matrix (batch_size, seq_len, num_labels) :param mask: mask tensor of each sequence in mini batch (seq_len, batch_size) :return: labels of each sequence in mini batch """ batch_size, seq_len, _ = h.size() # 各系列の系列長を用意 # prepare the sequence lengths in each sequence seq_lens = mask.long().sum(dim=1) # バッチ内において,スタート地点から先頭のラベルに対してのスコアを用意 # In mini batch, prepare the score # from the start sequence to the first label score = [self.start_trans.data + h[:, 0]] path = [] for t in range(1, seq_len): # 1つ前の系列のスコアを抽出 # extract the score of previous sequence # (batch_size, num_labels, 1) previous_score = score[t - 1].view(batch_size, -1, 1) # 系列の隠れ層のスコアを抽出 # extract the score of hidden matrix of sequence # (batch_size, 1, num_labels) h_t = h[:, t].view(batch_size, 1, -1) # t-1の系列のラベルからtの系列のラベルまでの遷移におけるスコアを抽出 # self.trans_matrixは系列Aから系列Bまでの遷移のスコアを持っている # extract the score in transition # from label of t-1 sequence to label of sequence of t # self.trans_matrix has the score of the transition # from sequence A to sequence B # (batch_size, num_labels, num_labels) score_t = previous_score + self.trans_matrix + h_t # 導出したスコアのうち,各系列の最大値と最大値をとり得る位置を保持 # keep the maximum value # and point where maximum value of each sequence # (batch_size, num_labels) best_score, best_path = score_t.max(1) score.append(best_score) path.append(best_path) # バッチ内のラベルを推定 # predict labels of mini batch best_paths = [ self._viterbi_compute_best_path(i, seq_lens, score, path) for i in range(batch_size) ] return best_paths
def _compute_numerator_log_likelihood( self, h: FloatTensor, y: LongTensor, mask: BoolTensor ) -> FloatTensor: """ compute the numerator term for the log-likelihood :param h: hidden matrix (batch_size, seq_len, num_labels) :param y: answer labels of each sequence in mini batch (batch_size, seq_len) :param mask: mask tensor of each sequence in mini batch (batch_size, seq_len) :return: The score of numerator term for the log-likelihood """ batch_size, seq_len, _ = h.size() # 系列のスタート位置のベクトルを抽出 # extract first vector of sequences in mini batch score = self.start_trans[y[:, 0]] h = h.unsqueeze(-1) trans = self.trans_matrix.unsqueeze(-1) for t in range(seq_len - 1): mask_t = mask[:, t].cuda() if CRF.CUDA else mask[:, t] mask_t1 = mask[:, t + 1] if CRF.CUDA else mask[:, t + 1] # t+1番目のラベルのスコアを抽出 # extract the score of t+1 label # (batch_size) h_t = torch.cat([h[b, t, y[b, t]] for b in range(batch_size)]) # t番目のラベルからt+1番目のラベルへの遷移スコアを抽出 # extract the transition score from t-th label to t+1 label # (batch_size) trans_t = torch.cat([trans[s[t], s[t + 1]] for s in y]) # 足し合わせる # add the score of t+1 and the transition score # (batch_size) score += h_t * mask_t + trans_t * mask_t1 # バッチ内の各系列の最後尾のラベル番号を抽出する # extract end label number of each sequence in mini batch # (batch_size) last_mask_index = mask.long().sum(1) - 1 last_labels = y.gather(1, last_mask_index.unsqueeze(-1)) # hの形を元に戻す # restore the shape of h h = h.unsqueeze(-1).view(batch_size, seq_len, self.num_labels) # バッチ内の最大長の系列のスコアを足し合わせる # Add the score of the sequences of the maximum length in mini batch score += h[:, -1].gather(1, last_labels).squeeze(1) * mask[:, -1] # 各系列の最後尾のタグからEOSまでのスコアを足し合わせる # Add the scores from the last tag of each sequence to EOS score += self.end_trans[last_labels].view(batch_size) return score
def _update_seq_length_for_generation( sequence_lengths: torch.LongTensor, unfinished_sequences: torch.LongTensor, cur_len: int, is_eos_in_next_token: torch.BoolTensor, ) -> Tuple[torch.LongTensor, torch.LongTensor]: # check if sentence is not finished yet is_sent_unfinished = unfinished_sequences.mul( is_eos_in_next_token.long()).bool() # update sentence length sequence_lengths = sequence_lengths.masked_fill( is_sent_unfinished, cur_len) unfinished_sequences = unfinished_sequences.mul( (~is_eos_in_next_token).long()) return sequence_lengths, unfinished_sequences
def _compute_score( self, emissions: torch.Tensor, tags: torch.LongTensor, mask: torch.BoolTensor ) -> torch.Tensor: # emissions: (seq_length, batch_size, num_tags) # tags: (seq_length, batch_size) # mask: (seq_length, batch_size) assert emissions.dim() == 3 and tags.dim() == 2 assert emissions.shape[:2] == tags.shape assert emissions.size(2) == self.num_tags assert mask.shape == tags.shape assert mask[0].all() seq_length, batch_size = tags.shape mask = mask.float() # Start transition score and first emission # shape: (batch_size,) score = self.start_transitions[tags[0]] score += emissions[0, torch.arange(batch_size), tags[0]] for i in range(1, seq_length): # Transition score to next tag, only added if next timestep is valid (mask == 1) # shape: (batch_size,) score += self.transitions[tags[i - 1], tags[i]] * mask[i] # Emission score for next tag, only added if next timestep is valid (mask == 1) # shape: (batch_size,) score += emissions[i, torch.arange(batch_size), tags[i]] * mask[i] # End transition score # shape: (batch_size,) seq_ends = mask.long().sum(dim=0) - 1 # shape: (batch_size,) last_tags = tags[seq_ends, torch.arange(batch_size)] # shape: (batch_size,) score += self.end_transitions[last_tags] return score
def count_correct( heads: LongTensor, types: LongTensor, pred_heads: LongTensor, pred_types: LongTensor, mask: BoolTensor, nopunct_mask: BoolTensor, proj_mask: BoolTensor, root_idx: int = 0, type_idx: Optional[int] = None, ) -> Union["Counts", "TypeWiseCounts"]: # shape: (bsz, slen) assert heads.dim() == 2 assert types.shape == heads.shape assert pred_heads.shape == heads.shape assert pred_types.shape == heads.shape assert mask.shape == heads.shape assert nopunct_mask.shape == heads.shape assert proj_mask.shape == heads.shape corr_heads = heads == pred_heads corr_types = types == pred_types if type_idx is None: root_mask = heads == root_idx nonproj_mask = ~torch.all(proj_mask | (~mask), dim=1, keepdim=True) usents = int(torch.all(corr_heads | (~mask), dim=1).long().sum()) usents_nopunct = int( torch.all(corr_heads | (~mask) | (~nopunct_mask), dim=1).long().sum()) lsents = int( torch.all(corr_heads & corr_types | (~mask), dim=1).long().sum()) lsents_nopunct = int( torch.all(corr_heads & corr_types | (~mask) | (~nopunct_mask), dim=1).long().sum()) uarcs = int((corr_heads & mask).long().sum()) uarcs_nopunct = int((corr_heads & mask & nopunct_mask).long().sum()) uarcs_nonproj = int((corr_heads & mask & nonproj_mask).long().sum()) larcs = int((corr_heads & corr_types & mask).long().sum()) larcs_nopunct = int( (corr_heads & corr_types & mask & nopunct_mask).long().sum()) larcs_nonproj = int( (corr_heads & corr_types & mask & nonproj_mask).long().sum()) roots = int((corr_heads & mask & root_mask).long().sum()) n_sents = heads.size(0) n_arcs = int(mask.long().sum()) n_arcs_nopunct = int((mask & nopunct_mask).long().sum()) n_arcs_nonproj = int((mask & nonproj_mask).long().sum()) n_roots = int((mask & root_mask).long().sum()) return Counts( usents, usents_nopunct, lsents, lsents_nopunct, uarcs, uarcs_nopunct, uarcs_nonproj, larcs, larcs_nopunct, larcs_nonproj, roots, n_sents, n_arcs, n_arcs_nopunct, n_arcs_nonproj, n_roots, ) assert type_idx is not None type_mask = types == type_idx uarcs = int((corr_heads & type_mask & mask).long().sum()) uarcs_nopunct = int( (corr_heads & type_mask & nopunct_mask & mask).long().sum()) larcs = int((corr_heads & corr_types & type_mask & mask).long().sum()) larcs_nopunct = int((corr_heads & corr_types & type_mask & nopunct_mask & mask).long().sum()) n_arcs = int((type_mask & mask).long().sum()) n_arcs_nopunct = int((type_mask & nopunct_mask & mask).long().sum()) return TypeWiseCounts(type_idx, uarcs, uarcs_nopunct, larcs, larcs_nopunct, n_arcs, n_arcs_nopunct)
def _viterbi_decode( self, emissions: torch.FloatTensor, mask: torch.BoolTensor ) -> List[List[int]]: # emissions: (seq_length, batch_size, num_tags) # mask: (seq_length, batch_size) assert emissions.dim() == 3 and mask.dim() == 2 assert emissions.shape[:2] == mask.shape assert emissions.size(2) == self.num_tags assert mask[0].all() seq_length, batch_size = mask.shape # Start transition and first emission # shape: (batch_size, num_tags) score = self.start_transitions + emissions[0] history = [] # score is a tensor of size (batch_size, num_tags) where for every batch, # value at column j stores the score of the best tag sequence so far that ends # with tag j # history saves where the best tags candidate transitioned from; this is used # when we trace back the best tag sequence # Viterbi algorithm recursive case: we compute the score of the best tag sequence # for every possible next tag for i in range(1, seq_length): # Broadcast viterbi score for every possible next tag # shape: (batch_size, num_tags, 1) broadcast_score = score.unsqueeze(2) # Broadcast emission score for every possible current tag # shape: (batch_size, 1, num_tags) broadcast_emission = emissions[i].unsqueeze(1) # Compute the score tensor of size (batch_size, num_tags, num_tags) where # for each sample, entry at row i and column j stores the score of the best # tag sequence so far that ends with transitioning from tag i to tag j and emitting # shape: (batch_size, num_tags, num_tags) next_score = broadcast_score + self.transitions + broadcast_emission # Find the maximum score over all possible current tag # shape: (batch_size, num_tags) next_score, indices = next_score.max(dim=1) # Set score to the next score if this timestep is valid (mask == 1) # and save the index that produces the next score # shape: (batch_size, num_tags) score = torch.where(mask[i].unsqueeze(1), next_score, score) history.append(indices) # End transition score # shape: (batch_size, num_tags) score += self.end_transitions # Now, compute the best path for each sample # shape: (batch_size,) seq_ends = mask.long().sum(dim=0) - 1 best_tags_list = [] for idx in range(batch_size): # Find the tag which maximizes the score at the last timestep; this is our best tag # for the last timestep _, best_last_tag = score[idx].max(dim=0) best_tags = [best_last_tag.item()] # We trace back where the best last tag comes from, append that to our best tag # sequence, and trace it back again, and so on for hist in reversed(history[: seq_ends[idx]]): best_last_tag = hist[idx][best_tags[-1]] best_tags.append(best_last_tag.item()) # Reverse the order because we start from the last timestep best_tags.reverse() best_tags_list.append(best_tags) return best_tags_list