def compile_dict(in_filename, out_path=None, overwrite=False, prediction_options=None): """ Make a Pymorphy2 dictionary from OpenCorpora .xml dictionary. """ if out_path is None: out_path = "dict" opencorpora_dict.convert_to_pymorphy2( opencorpora_dict_path=in_filename, out_path=out_path, overwrite=overwrite, prediction_options=prediction_options )
def compile_dict(in_filename, out_path=None, overwrite=False, prediction_options=None): """ Make a Pymorphy2 dictionary from OpenCorpora .xml dictionary. """ if out_path is None: out_path = 'dict' opencorpora_dict.convert_to_pymorphy2( opencorpora_dict_path = in_filename, out_path = out_path, overwrite = overwrite, prediction_options = prediction_options )
if args['--clear']: shutil.rmtree(out_path) else: logger.error("Output path exists: %r", out_path) sys.exit(1) compile_options = dict( (key.replace('-', '_'), int(args['--' + key])) for key in ('min-ending-freq', 'min-paradigm-popularity', 'max-suffix-length') ) compile_options["paradigm_prefixes"] = lang.PARADIGM_PREFIXES opencorpora_dict.convert_to_pymorphy2( opencorpora_dict_path=dict_xml, out_path=out_path, source_name=args['--source-name'], language_code=args['--lang'], compile_options=compile_options, ) if args["--corpus"]: add_conditional_tag_probability( corpus_filename=args["--corpus"], out_path=out_path, min_word_freq=int(args['--min-word-freq']), logger=logger, ) rev = get_corpus_revision(args["--corpus"]) meta_filename = os.path.join(out_path, "meta.json") update_meta(meta_filename, {"corpus_revision": rev})
shutil.rmtree(out_path) else: logger.error("Output path exists: %r", out_path) sys.exit(1) compile_options = dict( (key.replace('-', '_'), int(args['--' + key])) for key in ('min-ending-freq', 'min-paradigm-popularity', 'max-suffix-length') ) # compile_options["paradigm_prefixes"] = lang.PARADIGM_PREFIXES compile_options["paradigm_prefixes"] = ['', 'naj'] # not sure, maybe just ['']? opencorpora_dict.convert_to_pymorphy2( opencorpora_dict_path=dict_xml, out_path=out_path, source_name=args['--source-name'], language_code=args['--lang'], compile_options=compile_options, ) if args["--corpus"]: add_conditional_tag_probability( corpus_filename=args["--corpus"], out_path=out_path, min_word_freq=int(args['--min-word-freq']), logger=logger, ) rev = get_corpus_revision(args["--corpus"]) meta_filename = os.path.join(out_path, "meta.json") update_meta(meta_filename, {"corpus_revision": rev})