Exemplo n.º 1
0
 def __init__(self, training_set: TextIO, n: int, end_chr: str = "\x03"):
     nwords, words = nwords_counter(training_set, n, end_chr)
     self.__nwords = expand_2d(nwords)
     self.__n = n
     self.__words = words
     self.end_chr = end_chr
     self.__word_max_len = max(words.values())
     pass
Exemplo n.º 2
0
 def __init__(self, model, max_iter: int = 10**100):
     super().__init__(None)
     if isinstance(model, tuple):
         backwords, words, config = model
     else:
         backwords, words, config = pickle.load(model)
     backwords = freq2prob(backwords, config['threshold'])
     self.nwords = expand_2d(backwords)
     self.end_chr = config['end_chr']
     self.words = words
     self.min_len = 4
     self.default_start = config['start_chr']
     self.start_chr = config['start_chr']
     self.max_iter = max_iter
Exemplo n.º 3
0
 def __init__(self, training_set: Union[TextIO, None], n: int = 2, splitter: str = ' ', start4word: int = 0,
              skip4word: int = 1, start_chr="\x00",
              end_chr: str = "\x03"):
     if training_set is None:
         return
     nwords, words = nwords_counter(training_set, n, splitter, end_chr, start4word, skip4word,
                                    start_chr=start_chr)
     self.nwords = expand_2d(nwords)
     self.__n = n
     self.words = words
     self.end_chr = end_chr
     self.min_len = 4
     self.default_start = tuple([start_chr for _ in range(n - 1)])
     self.start_chr = start_chr
     pass
Exemplo n.º 4
0
 def __init__(self, training_set: Union[TextIO, None], splitter: str = '', start4word: int = 0, skip4word: int = 1,
              threshold: int = 10, start_chr: str = '\x00', end_chr: str = "\x03", max_gram: int = 256,
              max_iter: int = 10 ** 100):
     super().__init__(None)
     if training_set is None:
         return
     backwords, words = backwords_counter(training_set, splitter, start_chr, end_chr, start4word, skip4word,
                                          threshold=threshold, max_gram=max_gram)
     self.nwords = expand_2d(backwords)
     self.end_chr = end_chr
     self.words = words
     self.min_len = 4
     self.default_start = start_chr
     self.start_chr = start_chr
     self.max_iter = max_iter
Exemplo n.º 5
0
 def __init__(self,
              training_set: TextIO,
              splitter: str,
              start4word: int,
              skip4word: int,
              threshold: int,
              start_chr: str = '\x00',
              end_chr: str = "\x03",
              max_gram: int = 256):
     super().__init__(None)
     backwords, words = backwords_counter(training_set,
                                          splitter,
                                          start_chr,
                                          end_chr,
                                          start4word,
                                          skip4word,
                                          threshold=threshold,
                                          max_gram=max_gram)
     self.nwords = expand_2d(backwords)
     self.end_chr = end_chr
     self.words = words
     self.min_len = 4
     self.default_start = start_chr
     self.start_chr = start_chr
Exemplo n.º 6
0
 def __init__(self, model_path: str):
     grammars, terminals = read_bpe(model_path=model_path)
     self.__grammars = expand_1d(grammars, minus_log_based=True)
     self.__terminals = expand_2d(terminals, minus_log_based=True)
     self.__converted, self.__not_parsed = count_luds(grammars)
     pass