def extract_named_entities(): # Extract Wikipedia link graph. if flags.arg.extract_wikilinks: log.info("Extract Wikipedia link graph") wf = entity.EntityWorkflow("wiki-links") wf.extract_wikilinks() workflow.run(wf.wf) # Extract IDF table. if flags.arg.build_idf: wf = entity.EntityWorkflow("idf-table") for language in flags.arg.languages: log.info("Build " + language + " IDF table") wf.build_idf(language=language) workflow.run(wf.wf) # Fuse NER items. if flags.arg.fuse_ner_items: log.info("Fuse NER items") wf = entity.EntityWorkflow("fuse-ner-items") wf.fuse_items() workflow.run(wf.wf) # Build NER knowledge base. if flags.arg.build_ner_kb: log.info("Build NER knowledge base") wf = entity.EntityWorkflow("ner-knowledge-base") wf.build_knowledge_base() workflow.run(wf.wf)
def sling_entity_link(sling_input_corpus, sling_output_corpus): """Does sling entity linking and created linked output corpus.""" labeler = entity.EntityWorkflow("wiki-label") unannotated = labeler.wf.resource(sling_input_corpus, format="records/document") annotated = labeler.wf.resource(sling_output_corpus, format="records/document") labeler.label_documents(indocs=unannotated, outdocs=annotated) workflow.run(labeler.wf)