Пример #1
0
def main(fstpath, path_in, path_out):
    pool = multiprocessing.Pool(processes=16)
    fst = fstinter.FST(fstpath, get_cost)
    fst_analyze_line = partial(analyze_line, fst)
    for fnin in glob.glob(path_in):
        fnout = os.path.join(path_out, os.path.basename(fnin))
        with open(fnin, encoding='utf-8') as fin:
            lines = fin.readlines()
            out_data = pool.map(fst_analyze_line, lines)
            out_data = '\n'.join(out_data)
        with open(fnout, 'w', encoding='utf-8') as fout:
            fout.write(out_data)
Пример #2
0
def main(fstpath, path_in, path_out):
    fst = fstinter.FST(fstpath, get_cost)
    for fnin in glob.glob(path_in):
        print("Analyzing {}...".format(fnin))
        fnout = os.path.join(path_out, os.path.basename(fnin))
        with open(fnin,
                  encoding='utf-8') as fin, open(fnout, 'w',
                                                 encoding='utf-8') as fout:
            for line in tqdm.tqdm(fin, total=get_num_lines(fnin)):
                output = []
                tokens = line.strip().split()
                analyses = fst.analyses(tokens)
                for (token, (morphs, lemma)) in analyses:
                    props = [] if len(morphs) < 2 else morphs[1:]
                    output.append('w:{}~l:{}~m:{}'.format(
                        token, lemma, '+'.join(props)))
                print(' '.join(output), file=fout)
Пример #3
0
 def __init__(self, fstpath):
     self.fst = fstinter.FST(fstpath, get_cost)