Python load_mentions_vocab_from_files示例

编程语言: Python

命名空间/包名称: nlp_architect.models.cross_doc_coref.system.cdc_utils

方法/功能: load_mentions_vocab_from_files

hotexamples.com的示例: 4

Python load_mentions_vocab_from_files - 已找到4个示例。这些是从开源项目中提取的最受好评的nlp_architect.models.cross_doc_coref.system.cdc_utils.load_mentions_vocab_from_files现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： create_wiki_dump.py 项目： yueyedeai/nlp-architect

def wiki_dump_from_gs():
    logger.info('Starting, process will connect with ElasticSearch and online wikipedia site...')
    mentions_files = [args.mentions]
    dump_file = args.output
    vocab = load_mentions_vocab_from_files(mentions_files)

    if args.host and args.port and args.index:
        wiki_elastic = WikipediaRelationExtraction(WikipediaSearchMethod.ELASTIC,
                                                   host=args.host,
                                                   port=args.port,
                                                   index=args.index)
    else:
        logger.info(
            'Running without Wikipedia elastic search, Note that this will '
            'take much longer to process only using online service')
        wiki_elastic = None

    wiki_online = WikipediaRelationExtraction(WikipediaSearchMethod.ONLINE)

    for phrase in vocab:
        phrase = phrase.replace("'", "").replace('"', "").replace('\\', "").strip()
        logger.info('Try to retrieve \'%s\' from elastic search', phrase)
        pages = None
        if wiki_elastic:
            pages = wiki_elastic.get_phrase_related_pages(phrase)
        if not pages or not pages.get_pages() or len(pages.get_pages()) == 0:
            logger.info('Not on elastic, retrieve \'%s\' from wiki online site', phrase)
            pages = wiki_online.get_phrase_related_pages(phrase)
        for search_page in pages.get_pages():
            add_page(search_page, phrase)

    with open(dump_file, 'w') as myfile:
        json.dump(result_dump, myfile, default=json_dumper)

    logger.info('Saving dump to file-%s', dump_file)

示例#2

显示文件

文件： create_word_embed_glove_dump.py 项目： xelda1988/nlp-architect

def glove_dump():
    filter_stop_words = False
    glove_file = args.glove
    out_file = args.output
    mention_files = [args.mentions]
    vocab = load_mentions_vocab_from_files(mention_files, filter_stop_words)
    word_to_ix, embeddings = load_glove_for_vocab(glove_file, vocab)

    logger.info('Words in vocabulary %d', len(vocab))
    logger.info('Found %d words from vocabulary', len(word_to_ix.keys()))
    with open(out_file, 'wb') as f:
        pickle.dump([word_to_ix, embeddings], f)
    logger.info('Saving dump to file-%s', out_file)

示例#3

显示文件

文件： create_verbocean_dump.py 项目： yueyedeai/nlp-architect

def vo_dump():
    vo_file = args.vo
    out_file = args.output
    mentions_event_gold_file = [args.mentions]
    vocab = load_mentions_vocab_from_files(mentions_event_gold_file, True)
    vo = VerboceanRelationExtraction.load_verbocean_file(vo_file)
    vo_for_vocab = {}
    for word in vocab:
        if word in vo:
            vo_for_vocab[word] = vo[word]

    logger.info('Found %d words from vocabulary', len(vo_for_vocab.keys()))
    logger.info('Preparing to save refDict output file')

    with open(out_file, 'w') as f:
        json.dump(vo_for_vocab, f)
    logger.info('Done saved to-%s', out_file)

示例#4

显示文件

文件： create_reference_dict_dump.py 项目： yueyedeai/nlp-architect

def ref_dict_dump():
    logger.info('Extracting referent dict dump, this may take a while...')
    ref_dict_file = args.ref_dict
    out_file = args.output
    mentions_entity_gold_file = [args.mentions]
    vocab = load_mentions_vocab_from_files(mentions_entity_gold_file, True)

    ref_dict = ReferentDictRelationExtraction.load_reference_dict(ref_dict_file)

    ref_dict_for_vocab = {}
    for word in vocab:
        if word in ref_dict:
            ref_dict_for_vocab[word] = ref_dict[word]

    logger.info('Found %d words from vocabulary', len(ref_dict_for_vocab.keys()))
    logger.info('Preparing to save refDict output file')
    with open(out_file, 'w') as f:
        json.dump(ref_dict_for_vocab, f)
    logger.info('Done saved to-%s', out_file)