def __init__(self, training_set: TextIO, n: int, end_chr: str = "\x03"): nwords, words = nwords_counter(training_set, n, end_chr) self.__nwords = expand_2d(nwords) self.__n = n self.__words = words self.end_chr = end_chr self.__word_max_len = max(words.values()) pass
def __init__(self, model, max_iter: int = 10**100): super().__init__(None) if isinstance(model, tuple): backwords, words, config = model else: backwords, words, config = pickle.load(model) backwords = freq2prob(backwords, config['threshold']) self.nwords = expand_2d(backwords) self.end_chr = config['end_chr'] self.words = words self.min_len = 4 self.default_start = config['start_chr'] self.start_chr = config['start_chr'] self.max_iter = max_iter
def __init__(self, training_set: Union[TextIO, None], n: int = 2, splitter: str = ' ', start4word: int = 0, skip4word: int = 1, start_chr="\x00", end_chr: str = "\x03"): if training_set is None: return nwords, words = nwords_counter(training_set, n, splitter, end_chr, start4word, skip4word, start_chr=start_chr) self.nwords = expand_2d(nwords) self.__n = n self.words = words self.end_chr = end_chr self.min_len = 4 self.default_start = tuple([start_chr for _ in range(n - 1)]) self.start_chr = start_chr pass
def __init__(self, training_set: Union[TextIO, None], splitter: str = '', start4word: int = 0, skip4word: int = 1, threshold: int = 10, start_chr: str = '\x00', end_chr: str = "\x03", max_gram: int = 256, max_iter: int = 10 ** 100): super().__init__(None) if training_set is None: return backwords, words = backwords_counter(training_set, splitter, start_chr, end_chr, start4word, skip4word, threshold=threshold, max_gram=max_gram) self.nwords = expand_2d(backwords) self.end_chr = end_chr self.words = words self.min_len = 4 self.default_start = start_chr self.start_chr = start_chr self.max_iter = max_iter
def __init__(self, training_set: TextIO, splitter: str, start4word: int, skip4word: int, threshold: int, start_chr: str = '\x00', end_chr: str = "\x03", max_gram: int = 256): super().__init__(None) backwords, words = backwords_counter(training_set, splitter, start_chr, end_chr, start4word, skip4word, threshold=threshold, max_gram=max_gram) self.nwords = expand_2d(backwords) self.end_chr = end_chr self.words = words self.min_len = 4 self.default_start = start_chr self.start_chr = start_chr
def __init__(self, model_path: str): grammars, terminals = read_bpe(model_path=model_path) self.__grammars = expand_1d(grammars, minus_log_based=True) self.__terminals = expand_2d(terminals, minus_log_based=True) self.__converted, self.__not_parsed = count_luds(grammars) pass