示例#1
0
def load_mrd(dir):
    print('parsing source dictionary file...')
    mrd_source = MrdDataSource(os.path.join(dir,'morphs.utf8.mrd'),
                               os.path.join(dir,'gramtab.utf8.mrd'),
                               strip_EE=True)
    mrd_source.load()

    print('calculating rule frequencies...')
    mrd_source.calculate_rule_freq()
    return mrd_source
示例#2
0
                 coding, 'utf8')

    convert_file(os.path.join(src_dir, 'Morph', gramtab),
                 os.path.join(dest_dir, lang, 'gramtab.utf8.mrd'),
                 coding, 'utf8')


def cleanup_after_convert(dir):
    print('cleaning up...')
    os.unlink(os.path.join(dir, 'morphs.utf8.mrd'))
    os.unlink(os.path.join(dir, 'gramtab.utf8.mrd'))
    print("========")


if __name__ == '__main__':
    MrdDataSource.setup_psyco()

    src_dir = 'dicts/src/Dicts'
    dest_dir = 'dicts/converted'

    LANGUAGES = 'en', 'ru'
    FORMATS = 'cdb', 'sqlite', # 'tinycdb', 'cdblib', # 'shelve',

    for lang in LANGUAGES:
        convert_dicts(src_dir, dest_dir, lang)
        dest = os.path.join(dest_dir, lang)

        mrd = load_mrd(dest)
        make_pickled(dest, mrd)

        for fmt in FORMATS: