Пример #1
0
import sys
from tables import db, CharShapeTable
from toolz.curried import pipe, map, filter
from common import for_each

if __name__ == "__main__":
    null_lu_shape_items = pipe(
        CharShapeTable.select().where(CharShapeTable.lu_shapes == ''),
        list,
    )
    if len(null_lu_shape_items) != 0:
        print(f"{len(null_lu_shape_items)} null lushape items.",
              file=sys.stderr)
        sys.exit(1)
    del null_lu_shape_items

    lu_shape_ne_xhe_items = pipe(
        CharShapeTable.select().where(
            CharShapeTable.shapes != CharShapeTable.lu_shapes), list)
    if len(lu_shape_ne_xhe_items) != 0:
        print(f"{len(lu_shape_ne_xhe_items)} lu shape != xhe shapes.",
              file=sys.stderr)
        pipe(
            lu_shape_ne_xhe_items,
            for_each(lambda e: print(e)),
        )

    print("done")
    for e in eles:
        proc(e)


if __name__ == "__main__":

    if len(sys.argv) != 1:
        print("USAGE: python3 generate_dd_txt.py ")
        sys.exit(1)

    fname, output_dir = sys.argv[0], "zrm_phone_xhe_shape"

    if not Path(output_dir).exists():
        os.makedirs(output_dir)

    char_to_shape = pipe(CharShapeTable.select(),
                         map(lambda e: (e.char, e.shapes)),
                         reduceby(lambda e: e[0], lambda e1, e2: e1),
                         valmap(lambda e: e[1]), dict)
    print(f"total {len(char_to_shape)} char shapes")

    char_to_phones = pipe(CharPhoneTable.select(),
                          map(lambda e: (e.char, e.zrm)),
                          groupby(lambda e: e[0]),
                          valmap(lambda phones: [e[1] for e in phones]), dict)
    print(f"total {len(char_to_phones)} char phones")

    one_hit_char_items = generate_one_hit_char(60000)
    top_single_chars_items = generate_topest_char(char_to_phones, 60000)
    sys_top_chars_data = f"{output_dir}/sys_top_chars_data.txt"
    with open(sys_top_chars_data, 'w', encoding='utf8') as fout:
Пример #3
0
if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("USAGE: python3 dump_char_shape_table.py char_shape.txt")
        sys.exit(1)

    _, char_shape_path = sys.argv
    with open(char_shape_path, 'r', encoding='utf8') as fin:
        for line in fin:
            line = line.strip()
            if line == "":
                continue
            cols = line.split('\t')
            if len(cols) != 2:
                print(f"ERROR line {line} in file {char_shape_path}")
                continue
            cols = list(map(lambda e: e.strip(), cols))
            exit_num = CharShapeTable.select().where(
                CharShapeTable.char == cols[0],
                CharShapeTable.shapes == cols[1]).count()
            if exit_num > 0:
                print(f"WARNING: char shape already exists, {line}")
                continue
            else:
                CharShapeTable(char=cols[0],
                               shapes=cols[1],
                               priority=1,
                               updatedt=datetime.now()).save()
    print('done')
    pass