Пример #1
0
    def prune_step_1_always_keep_alternative(self):
        """
        Return
            always_keep(dict)
            alternatives(dict)
        """
        current_piece = self.SentencePiece.get_pieces()
        # pieceをkeyとしてdictで管理
        always_keep = dict()
        alternatives = defaultdict(list)

        # First segments the current sentencepieces to kwon how each sentencepiece is resegmented if this sentencepiece is  removed from vocabulary.
        for key, score in current_piece.items():
            L = Lattice()
            L.set_sentence(key)
            L.populate_nodes(current_piece, self.Trie)
            nbests = L.NBest(2, ret_piece=True)

            if len(nbests) == 1:  # only one way to resegment it
                always_keep[key] = True

            elif len(nbests[0]) >= 2:
                always_keep[key] = False

            elif len(nbests[0]) == 1:
                always_keep[key] = True
                alternatives[key] = nbests[1]

        #print("alt=>",alternatives)
        return always_keep, alternatives