Exemplo n.º 1
0
 def __init__(self, model: CompiledModel,
              analyser: BaseMorphologicalAnalyser,
              log_theta: float,
              suf_theta: float,
              max_guessed_tags: int,
              use_beam_search: bool):
     self.model = model
     self.analyser = analyser
     if use_beam_search:
         # todo esetleg beam_size a parancssorból?
         self.decoder = BeamSearch(model, analyser, log_theta, suf_theta, max_guessed_tags)
     else:
         self.decoder = BeamedViterbi(model, analyser, log_theta, suf_theta, max_guessed_tags)
Exemplo n.º 2
0
class POSTagger:
    @staticmethod
    def preprocess_sentence(sentence: list):
        util.analysis_queue.init(len(sentence))
        ret = []
        for i, word in enumerate(sentence):
            if AnalysisQueue.ispreanalysed(word):
                util.analysis_queue.add_word(word, i)
                ret.append(AnalysisQueue.clean(word))
            else:
                ret.append(word)
        return ret

    def __init__(self, model: CompiledModel,
                 analyser: BaseMorphologicalAnalyser,
                 log_theta: float,
                 suf_theta: float,
                 max_guessed_tags: int,
                 use_beam_search: bool):
        self.model = model
        self.analyser = analyser
        if use_beam_search:
            # todo esetleg beam_size a parancssorból?
            self.decoder = BeamSearch(model, analyser, log_theta, suf_theta, max_guessed_tags)
        else:
            self.decoder = BeamedViterbi(model, analyser, log_theta, suf_theta, max_guessed_tags)

    def tag_sentence(self, sentence: list,  # list of strings
                     max_res: int) -> Sentence:
        sentence = self.preprocess_sentence(sentence)
        tag_list = self.decoder.decode(sentence, max_res)
        return [Sentence(self.merge(sentence, tags[0]), score=tags[1]) for tags in tag_list]

    def merge(self, sentence: list, tags: list) -> list:
        vocab = self.model.data.tag_vocabulary
        return [Token(sentence[idx], None, vocab.word(tags[idx]))
                for idx in range(min(len(tags), len(sentence)))]

    def tag(self, source: io.TextIOWrapper, dest: io.TextIOWrapper, max_results_number: int=1):
        for line in source:
            sent_str = self.tag_and_format(line, max_results_number)
            print(sent_str, file=dest)

    def tag_and_format(self, line: str, max_res_num: int) -> str:
        sent_str = ""
        if line.strip() != "":
            s = self.tag_sentence(line.split(), max_res_num)
            sent_str = self.sentences_to_string(s, max_res_num > 1)
        return sent_str

    def sentences_to_string(self, sentences: list, show_prob: bool) -> str:
        return "\t".join([self.sent_to_string(s, show_prob) for s in sentences])

    @staticmethod
    def sent_to_string(sentence: Sentence, show_prob: bool) -> str:
        # ret = " ".join(str(sentence))
        ret = str(sentence)
        if show_prob:
            ret += "$${}$$".format(sentence.score)
        return ret