def get_trigram(mode, use_raw=False, corpi=CORPI, name_stem=''): if use_raw: tg = load_corpi(mode, corpi) tg.save_trigrams(os.path.join(TRIGRAM_MODEL_DIR, '%s%s.txt' % (name_stem, mode))) else: tg = Trigram(mode=mode) tg.load_trigrams(os.path.join(TRIGRAM_MODEL_DIR, '%s%s.txt' % (name_stem, mode))) return tg
def load_corpi(mode, corpi=CORPI): if isinstance(mode, Trigram): tg = mode mode = tg.mode else: tg = Trigram(mode=mode) for base, gz, precooked in corpi: if precooked: tg.load_trigrams(cooked_corpi_path(base, mode, gz)) else: fn = raw_corpi_path(base, gz) tg.import_text(fn) return tg
def trigrams(self): return (Trigram.from_lines(*self.lines[:3]), Trigram.from_lines(*self.lines[3:]))