示例#1
0
def load_words(filepath: str):
    exist_words = get_exists_words()

    xhe_transformer = get_full_to_xhe_transformer()
    zrm_transformer = get_full_to_zrm_transformmer()
    lu_transformer = get_full_to_lu_transformmer()
    bingji_transformer = get_full_to_bingji_transformer()

    words = []
    with open(filepath, 'r', encoding='utf8') as fin:
        for line in fin:
            line = line.strip()
            if len(line) == 0: continue
            cols = line.split(" ")
            if len(cols) > 5:
                print(f"wrong line {line}")
                continue
            if contain_alpha(cols[0]) or contain_symbols(cols[0]):
                print(f"contains num or symbols {line}")
                continue
            if cols[0] in exist_words: continue
            words.append(
                cols_to_tangshi_item(cols, xhe_transformer, zrm_transformer,
                                     bingji_transformer, lu_transformer))
            exist_words.add(cols[0])

    return words
示例#2
0
    _, words_path = sys.argv

    exist_words = set()
    exist_words = pipe(WordPhoneTable.select(),
                       map(lambda e: e.word),
                       set
                       )

    exist_words = exist_words | pipe(DelWordTable.select(),
                                     map(lambda e: e.word),
                                     set
                                     )

    xhe_transformer = get_full_to_xhe_transformer();
    zrm_transformer = get_full_to_zrm_transformmer();
    lu_transformer = get_full_to_lu_transformmer();

    with open(words_path, "r", encoding='utf8') as fin:
        to_add_words = pipe(fin,
                            map(lambda e: e.strip().split(' ')),
                            # filter(lambda e: len(e) in (1, 2)),
                            filter(lambda e: len(e[0]) <= 5),
                            filter(lambda e: not contain_alpha(
                                e[0]) and not contain_symbols(e[0])),
                            filter(lambda e: e[0] not in exist_words),
                            map(lambda e: cols_to_word_phone_table(e, xhe_transformer, zrm_transformer))
                            )

        with db.atomic():
            WordPhoneTable.bulk_create(to_add_words, batch_size=100)
示例#3
0
def main():
    full_to_lu_transformer = get_full_to_lu_transformmer()
    check_lu_char(full_to_lu_transformer)
    check_lu_word(full_to_lu_transformer)
    print("done")