Exemplo n.º 1
0
def compile_dict(in_filename, out_path=None, overwrite=False, prediction_options=None):
    """
    Make a Pymorphy2 dictionary from OpenCorpora .xml dictionary.
    """
    if out_path is None:
        out_path = "dict"

    opencorpora_dict.convert_to_pymorphy2(
        opencorpora_dict_path=in_filename, out_path=out_path, overwrite=overwrite, prediction_options=prediction_options
    )
Exemplo n.º 2
0
def compile_dict(in_filename, out_path=None, overwrite=False, prediction_options=None):
    """
    Make a Pymorphy2 dictionary from OpenCorpora .xml dictionary.
    """
    if out_path is None:
        out_path = 'dict'

    opencorpora_dict.convert_to_pymorphy2(
        opencorpora_dict_path = in_filename,
        out_path = out_path,
        overwrite = overwrite,
        prediction_options = prediction_options
    )
Exemplo n.º 3
0
        if args['--clear']:
            shutil.rmtree(out_path)
        else:
            logger.error("Output path exists: %r", out_path)
            sys.exit(1)

    compile_options = dict(
        (key.replace('-', '_'), int(args['--' + key]))
        for key in ('min-ending-freq', 'min-paradigm-popularity', 'max-suffix-length')
    )
    compile_options["paradigm_prefixes"] = lang.PARADIGM_PREFIXES

    opencorpora_dict.convert_to_pymorphy2(
        opencorpora_dict_path=dict_xml,
        out_path=out_path,
        source_name=args['--source-name'],
        language_code=args['--lang'],
        compile_options=compile_options,
    )

    if args["--corpus"]:
        add_conditional_tag_probability(
            corpus_filename=args["--corpus"],
            out_path=out_path,
            min_word_freq=int(args['--min-word-freq']),
            logger=logger,
        )
        rev = get_corpus_revision(args["--corpus"])
        meta_filename = os.path.join(out_path, "meta.json")
        update_meta(meta_filename, {"corpus_revision": rev})
Exemplo n.º 4
0
            shutil.rmtree(out_path)
        else:
            logger.error("Output path exists: %r", out_path)
            sys.exit(1)

    compile_options = dict(
        (key.replace('-', '_'), int(args['--' + key]))
        for key in ('min-ending-freq', 'min-paradigm-popularity', 'max-suffix-length')
    )
    # compile_options["paradigm_prefixes"] = lang.PARADIGM_PREFIXES
    compile_options["paradigm_prefixes"] = ['', 'naj']  # not sure, maybe just ['']?

    opencorpora_dict.convert_to_pymorphy2(
        opencorpora_dict_path=dict_xml,
        out_path=out_path,
        source_name=args['--source-name'],
        language_code=args['--lang'],
        compile_options=compile_options,
    )

    if args["--corpus"]:
        add_conditional_tag_probability(
            corpus_filename=args["--corpus"],
            out_path=out_path,
            min_word_freq=int(args['--min-word-freq']),
            logger=logger,
        )
        rev = get_corpus_revision(args["--corpus"])
        meta_filename = os.path.join(out_path, "meta.json")
        update_meta(meta_filename, {"corpus_revision": rev})