示例#1
0
def extract_named_entities():
  # Extract Wikipedia link graph.
  if flags.arg.extract_wikilinks:
    log.info("Extract Wikipedia link graph")
    wf = entity.EntityWorkflow("wiki-links")
    wf.extract_wikilinks()
    workflow.run(wf.wf)

  # Extract IDF table.
  if flags.arg.build_idf:
    wf = entity.EntityWorkflow("idf-table")
    for language in flags.arg.languages:
      log.info("Build " + language + " IDF table")
      wf.build_idf(language=language)
    workflow.run(wf.wf)

  # Fuse NER items.
  if flags.arg.fuse_ner_items:
    log.info("Fuse NER items")
    wf = entity.EntityWorkflow("fuse-ner-items")
    wf.fuse_items()
    workflow.run(wf.wf)

  # Build NER knowledge base.
  if flags.arg.build_ner_kb:
    log.info("Build NER knowledge base")
    wf = entity.EntityWorkflow("ner-knowledge-base")
    wf.build_knowledge_base()
    workflow.run(wf.wf)
示例#2
0
def sling_entity_link(sling_input_corpus, sling_output_corpus):
    """Does sling entity linking and created linked output corpus."""
    labeler = entity.EntityWorkflow("wiki-label")
    unannotated = labeler.wf.resource(sling_input_corpus,
                                      format="records/document")
    annotated = labeler.wf.resource(sling_output_corpus,
                                    format="records/document")
    labeler.label_documents(indocs=unannotated, outdocs=annotated)
    workflow.run(labeler.wf)