Exemplo n.º 1
0
    def viterbi_decode(self, h: FloatTensor,
                       mask: BoolTensor) -> List[List[int]]:
        """
        decode labels using viterbi algorithm
        :param h: hidden matrix (batch_size, seq_len, num_labels)
        :param mask: mask tensor of each sequence
                     in mini batch (batch_size, batch_size)
        :return: labels of each sequence in mini batch
        """

        batch_size, seq_len, _ = h.size()
        # prepare the sequence lengths in each sequence
        seq_lens = mask.long().sum(dim=1)
        # seq_lens = torch.ones(sizeas)
        # In mini batch, prepare the score
        # from the start sequence to the first label
        score = [self.start_trans.data + h[:, 0]]
        path = []

        for t in range(1, seq_len):
            # extract the score of previous sequence
            # (batch_size, num_labels, 1)
            previous_score = score[t - 1].view(batch_size, -1, 1)

            # extract the score of hidden matrix of sequence
            # (batch_size, 1, num_labels)
            h_t = h[:, t].view(batch_size, 1, -1)

            # extract the score in transition
            # from label of t-1 sequence to label of sequence of t
            # self.trans_matrix has the score of the transition
            # from sequence A to sequence B
            # (batch_size, num_labels, num_labels)
            score_t = previous_score + self.trans_matrix + h_t

            # keep the maximum value
            # and point where maximum value of each sequence
            # (batch_size, num_labels)
            best_score, best_path = score_t.max(1)
            score.append(best_score)
            path.append(best_path)

        # predict labels of mini batch
        best_paths = []
        print(h.size(1))
        for i in range(batch_size):
            best_path = self._viterbi_compute_best_path(
                i, seq_lens, score, path)
            pad_path = best_path + [0] * (h.size(1) - len(best_path))
            best_paths.append(pad_path)

        # best_paths = [
        #     self._viterbi_compute_best_path(i, seq_lens, score, path)
        #     for i in range(batch_size)
        # ]

        return torch.LongTensor(best_paths)
Exemplo n.º 2
0
    def viterbi_decode(
        self, h: torch.FloatTensor, mask: torch.BoolTensor
    ) -> List[List[int]]:
        """
        decode labels using viterbi algorithm
        :param h: hidden matrix (batch_size, seq_len, num_labels)
        :param mask: mask tensor of each sequence
                     in mini batch (seq_len, batch_size)
        :return: labels of each sequence in mini batch
        """

        batch_size, seq_len, _ = h.size()
        # 各系列の系列長を用意
        # prepare the sequence lengths in each sequence
        seq_lens = mask.long().sum(dim=1)
        # バッチ内において,スタート地点から先頭のラベルに対してのスコアを用意
        # In mini batch, prepare the score
        # from the start sequence to the first label
        score = [self.start_trans.data + h[:, 0]]
        path = []

        for t in range(1, seq_len):
            # 1つ前の系列のスコアを抽出
            # extract the score of previous sequence
            # (batch_size, num_labels, 1)
            previous_score = score[t - 1].view(batch_size, -1, 1)

            # 系列の隠れ層のスコアを抽出
            # extract the score of hidden matrix of sequence
            # (batch_size, 1, num_labels)
            h_t = h[:, t].view(batch_size, 1, -1)

            # t-1の系列のラベルからtの系列のラベルまでの遷移におけるスコアを抽出
            # self.trans_matrixは系列Aから系列Bまでの遷移のスコアを持っている
            # extract the score in transition
            # from label of t-1 sequence to label of sequence of t
            # self.trans_matrix has the score of the transition
            # from sequence A to sequence B
            # (batch_size, num_labels, num_labels)
            score_t = previous_score + self.trans_matrix + h_t

            # 導出したスコアのうち,各系列の最大値と最大値をとり得る位置を保持
            # keep the maximum value
            # and point where maximum value of each sequence
            # (batch_size, num_labels)
            best_score, best_path = score_t.max(1)
            score.append(best_score)
            path.append(best_path)

        # バッチ内のラベルを推定
        # predict labels of mini batch
        best_paths = [
            self._viterbi_compute_best_path(i, seq_lens, score, path)
            for i in range(batch_size)
        ]

        return best_paths
Exemplo n.º 3
0
    def _compute_numerator_log_likelihood(
        self, h: FloatTensor, y: LongTensor, mask: BoolTensor
    ) -> FloatTensor:
        """
        compute the numerator term for the log-likelihood
        :param h: hidden matrix (batch_size, seq_len, num_labels)
        :param y: answer labels of each sequence
                  in mini batch (batch_size, seq_len)
        :param mask: mask tensor of each sequence
                     in mini batch (batch_size, seq_len)
        :return: The score of numerator term for the log-likelihood
        """

        batch_size, seq_len, _ = h.size()
        # 系列のスタート位置のベクトルを抽出
        # extract first vector of sequences in mini batch
        score = self.start_trans[y[:, 0]]

        h = h.unsqueeze(-1)
        trans = self.trans_matrix.unsqueeze(-1)

        for t in range(seq_len - 1):
            mask_t = mask[:, t].cuda() if CRF.CUDA else mask[:, t]
            mask_t1 = mask[:, t + 1] if CRF.CUDA else mask[:, t + 1]
            # t+1番目のラベルのスコアを抽出
            # extract the score of t+1 label
            # (batch_size)
            h_t = torch.cat([h[b, t, y[b, t]] for b in range(batch_size)])
            # t番目のラベルからt+1番目のラベルへの遷移スコアを抽出
            # extract the transition score from t-th label to t+1 label
            # (batch_size)
            trans_t = torch.cat([trans[s[t], s[t + 1]] for s in y])
            # 足し合わせる
            # add the score of t+1 and the transition score
            # (batch_size)
            score += h_t * mask_t + trans_t * mask_t1

        # バッチ内の各系列の最後尾のラベル番号を抽出する
        # extract end label number of each sequence in mini batch
        # (batch_size)
        last_mask_index = mask.long().sum(1) - 1
        last_labels = y.gather(1, last_mask_index.unsqueeze(-1))
        # hの形を元に戻す
        # restore the shape of h
        h = h.unsqueeze(-1).view(batch_size, seq_len, self.num_labels)

        # バッチ内の最大長の系列のスコアを足し合わせる
        # Add the score of the sequences of the maximum length in mini batch
        score += h[:, -1].gather(1, last_labels).squeeze(1) * mask[:, -1]
        # 各系列の最後尾のタグからEOSまでのスコアを足し合わせる
        # Add the scores from the last tag of each sequence to EOS
        score += self.end_trans[last_labels].view(batch_size)

        return score
Exemplo n.º 4
0
    def _update_seq_length_for_generation(
        sequence_lengths: torch.LongTensor,
        unfinished_sequences: torch.LongTensor,
        cur_len: int,
        is_eos_in_next_token: torch.BoolTensor,
    ) -> Tuple[torch.LongTensor, torch.LongTensor]:
        # check if sentence is not finished yet
        is_sent_unfinished = unfinished_sequences.mul(
            is_eos_in_next_token.long()).bool()

        # update sentence length
        sequence_lengths = sequence_lengths.masked_fill(
            is_sent_unfinished, cur_len)
        unfinished_sequences = unfinished_sequences.mul(
            (~is_eos_in_next_token).long())
        return sequence_lengths, unfinished_sequences
Exemplo n.º 5
0
    def _compute_score(
        self, emissions: torch.Tensor, tags: torch.LongTensor, mask: torch.BoolTensor
    ) -> torch.Tensor:
        # emissions: (seq_length, batch_size, num_tags)
        # tags: (seq_length, batch_size)
        # mask: (seq_length, batch_size)
        assert emissions.dim() == 3 and tags.dim() == 2
        assert emissions.shape[:2] == tags.shape
        assert emissions.size(2) == self.num_tags
        assert mask.shape == tags.shape
        assert mask[0].all()

        seq_length, batch_size = tags.shape
        mask = mask.float()

        # Start transition score and first emission
        # shape: (batch_size,)
        score = self.start_transitions[tags[0]]
        score += emissions[0, torch.arange(batch_size), tags[0]]

        for i in range(1, seq_length):
            # Transition score to next tag, only added if next timestep is valid (mask == 1)
            # shape: (batch_size,)
            score += self.transitions[tags[i - 1], tags[i]] * mask[i]

            # Emission score for next tag, only added if next timestep is valid (mask == 1)
            # shape: (batch_size,)
            score += emissions[i, torch.arange(batch_size), tags[i]] * mask[i]

        # End transition score
        # shape: (batch_size,)
        seq_ends = mask.long().sum(dim=0) - 1
        # shape: (batch_size,)
        last_tags = tags[seq_ends, torch.arange(batch_size)]
        # shape: (batch_size,)
        score += self.end_transitions[last_tags]

        return score
Exemplo n.º 6
0
def count_correct(
    heads: LongTensor,
    types: LongTensor,
    pred_heads: LongTensor,
    pred_types: LongTensor,
    mask: BoolTensor,
    nopunct_mask: BoolTensor,
    proj_mask: BoolTensor,
    root_idx: int = 0,
    type_idx: Optional[int] = None,
) -> Union["Counts", "TypeWiseCounts"]:
    # shape: (bsz, slen)
    assert heads.dim() == 2
    assert types.shape == heads.shape
    assert pred_heads.shape == heads.shape
    assert pred_types.shape == heads.shape
    assert mask.shape == heads.shape
    assert nopunct_mask.shape == heads.shape
    assert proj_mask.shape == heads.shape

    corr_heads = heads == pred_heads
    corr_types = types == pred_types

    if type_idx is None:
        root_mask = heads == root_idx
        nonproj_mask = ~torch.all(proj_mask | (~mask), dim=1, keepdim=True)

        usents = int(torch.all(corr_heads | (~mask), dim=1).long().sum())
        usents_nopunct = int(
            torch.all(corr_heads | (~mask) | (~nopunct_mask),
                      dim=1).long().sum())
        lsents = int(
            torch.all(corr_heads & corr_types | (~mask), dim=1).long().sum())
        lsents_nopunct = int(
            torch.all(corr_heads & corr_types | (~mask) | (~nopunct_mask),
                      dim=1).long().sum())
        uarcs = int((corr_heads & mask).long().sum())
        uarcs_nopunct = int((corr_heads & mask & nopunct_mask).long().sum())
        uarcs_nonproj = int((corr_heads & mask & nonproj_mask).long().sum())
        larcs = int((corr_heads & corr_types & mask).long().sum())
        larcs_nopunct = int(
            (corr_heads & corr_types & mask & nopunct_mask).long().sum())
        larcs_nonproj = int(
            (corr_heads & corr_types & mask & nonproj_mask).long().sum())
        roots = int((corr_heads & mask & root_mask).long().sum())
        n_sents = heads.size(0)
        n_arcs = int(mask.long().sum())
        n_arcs_nopunct = int((mask & nopunct_mask).long().sum())
        n_arcs_nonproj = int((mask & nonproj_mask).long().sum())
        n_roots = int((mask & root_mask).long().sum())

        return Counts(
            usents,
            usents_nopunct,
            lsents,
            lsents_nopunct,
            uarcs,
            uarcs_nopunct,
            uarcs_nonproj,
            larcs,
            larcs_nopunct,
            larcs_nonproj,
            roots,
            n_sents,
            n_arcs,
            n_arcs_nopunct,
            n_arcs_nonproj,
            n_roots,
        )

    assert type_idx is not None
    type_mask = types == type_idx

    uarcs = int((corr_heads & type_mask & mask).long().sum())
    uarcs_nopunct = int(
        (corr_heads & type_mask & nopunct_mask & mask).long().sum())
    larcs = int((corr_heads & corr_types & type_mask & mask).long().sum())
    larcs_nopunct = int((corr_heads & corr_types & type_mask & nopunct_mask
                         & mask).long().sum())
    n_arcs = int((type_mask & mask).long().sum())
    n_arcs_nopunct = int((type_mask & nopunct_mask & mask).long().sum())

    return TypeWiseCounts(type_idx, uarcs, uarcs_nopunct, larcs, larcs_nopunct,
                          n_arcs, n_arcs_nopunct)
Exemplo n.º 7
0
    def _viterbi_decode(
        self, emissions: torch.FloatTensor, mask: torch.BoolTensor
    ) -> List[List[int]]:
        # emissions: (seq_length, batch_size, num_tags)
        # mask: (seq_length, batch_size)
        assert emissions.dim() == 3 and mask.dim() == 2
        assert emissions.shape[:2] == mask.shape
        assert emissions.size(2) == self.num_tags
        assert mask[0].all()

        seq_length, batch_size = mask.shape

        # Start transition and first emission
        # shape: (batch_size, num_tags)
        score = self.start_transitions + emissions[0]
        history = []

        # score is a tensor of size (batch_size, num_tags) where for every batch,
        # value at column j stores the score of the best tag sequence so far that ends
        # with tag j
        # history saves where the best tags candidate transitioned from; this is used
        # when we trace back the best tag sequence

        # Viterbi algorithm recursive case: we compute the score of the best tag sequence
        # for every possible next tag
        for i in range(1, seq_length):
            # Broadcast viterbi score for every possible next tag
            # shape: (batch_size, num_tags, 1)
            broadcast_score = score.unsqueeze(2)

            # Broadcast emission score for every possible current tag
            # shape: (batch_size, 1, num_tags)
            broadcast_emission = emissions[i].unsqueeze(1)

            # Compute the score tensor of size (batch_size, num_tags, num_tags) where
            # for each sample, entry at row i and column j stores the score of the best
            # tag sequence so far that ends with transitioning from tag i to tag j and emitting
            # shape: (batch_size, num_tags, num_tags)
            next_score = broadcast_score + self.transitions + broadcast_emission

            # Find the maximum score over all possible current tag
            # shape: (batch_size, num_tags)
            next_score, indices = next_score.max(dim=1)

            # Set score to the next score if this timestep is valid (mask == 1)
            # and save the index that produces the next score
            # shape: (batch_size, num_tags)
            score = torch.where(mask[i].unsqueeze(1), next_score, score)
            history.append(indices)

        # End transition score
        # shape: (batch_size, num_tags)
        score += self.end_transitions

        # Now, compute the best path for each sample

        # shape: (batch_size,)
        seq_ends = mask.long().sum(dim=0) - 1
        best_tags_list = []

        for idx in range(batch_size):
            # Find the tag which maximizes the score at the last timestep; this is our best tag
            # for the last timestep
            _, best_last_tag = score[idx].max(dim=0)
            best_tags = [best_last_tag.item()]

            # We trace back where the best last tag comes from, append that to our best tag
            # sequence, and trace it back again, and so on
            for hist in reversed(history[: seq_ends[idx]]):
                best_last_tag = hist[idx][best_tags[-1]]
                best_tags.append(best_last_tag.item())

            # Reverse the order because we start from the last timestep
            best_tags.reverse()
            best_tags_list.append(best_tags)

        return best_tags_list