示例#1
0
def test_displacy_parse_ents(en_vocab):
    """Test that named entities on a Doc are converted into displaCy's format."""
    doc = Doc(en_vocab,
              words=["But", "Google", "is", "starting", "from", "behind"])
    doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
    ents = displacy.parse_ents(doc)
    assert isinstance(ents, dict)
    assert ents["text"] == "But Google is starting from behind "
    assert ents["ents"] == [{
        "start": 4,
        "end": 10,
        "label": "ORG",
        "kb_id": "",
        "kb_url": "#"
    }]

    doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"], kb_id="Q95")]
    ents = displacy.parse_ents(doc)
    assert isinstance(ents, dict)
    assert ents["text"] == "But Google is starting from behind "
    assert ents["ents"] == [{
        "start": 4,
        "end": 10,
        "label": "ORG",
        "kb_id": "Q95",
        "kb_url": "#"
    }]
示例#2
0
def test_displacy_parse_ents(en_vocab):
    """Test that named entities on a Doc are converted into displaCy's format."""
    doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
    doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
    ents = displacy.parse_ents(doc)
    assert isinstance(ents, dict)
    assert ents["text"] == "But Google is starting from behind "
    assert ents["ents"] == [{"start": 4, "end": 10, "label": "ORG"}]
示例#3
0
def test_displacy_parse_ents_with_kb_id_options(en_vocab):
    """Test that named entities with kb_id on a Doc are converted into displaCy's format."""
    doc = Doc(en_vocab,
              words=["But", "Google", "is", "starting", "from", "behind"])
    doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"], kb_id="Q95")]

    ents = displacy.parse_ents(
        doc, {"kb_url_template": "https://www.wikidata.org/wiki/{}"})
    assert isinstance(ents, dict)
    assert ents["text"] == "But Google is starting from behind "
    assert ents["ents"] == [{
        "start": 4,
        "end": 10,
        "label": "ORG",
        "kb_id": "Q95",
        "kb_url": "https://www.wikidata.org/wiki/Q95",
    }]
示例#4
0
def render_ner(text):
    tagged_text = NLP(text)
    return displacy.parse_ents(tagged_text)
示例#5
0
#input text 
text = """Fycompa 4 mg film-coated tablets
crocine 12 mg film-coated tablets 
Quadrameterer 1.3 GBq/mL solution for injection 
Topotecan123 Actavis 1mg powder for concentrate for solution 
4mg –packs of 7, 28, 84 and 98
Topotecan2344 Actavis 2mg powder for concentrate for solution
Topotecan45 Actavis 1mg powder for concentrate for solution
"""

#loading the trained model 
nlp = spacy.load("cogna")
doc = nlp(text)
options = {"ents": ["PACKAGE_ITEM_QTY","Tablet","injection"], "colors": {"PACKAGE_ITEM_QTY":"Red","Tablet":"Yellow"}}
#print(displacy.render(doc, style="dep", page=False, minify=False, jupyter=None, options=options, manual=False))
results=displacy.parse_ents(doc, options=options)
import json 
print(json.dumps(results))


#NER output data as dataframe(tabular format)
param = [[ent.text,ent.label_] for ent in doc.ents]
df=pd.DataFrame(param)
headers = ['Entity','Category']
df.columns = headers 
df_table= pd.read_table(StringIO(str(df)), sep="\s+", header=0)

print(tabulate(df_table, headers='keys', tablefmt='psql'))


#output data  visulalization in spacy 
示例#6
0
def apply_spacy_model(source_generator,spacy_nlp_model):
    modifier_function = lambda text_string: displacy.parse_ents(spacy_nlp_model(str(text_string)))
    return Generators.generator_modifier(source_generator,modifier_function)
示例#7
0
with open(path, 'r') as f:
    text = f.read()

doc = nlp(text)

matches = matcher(doc)

for match_id, start, end in matches:
    string_id = nlp.vocab.strings[match_id]  # Get string representation
    span = doc[start:end]  # The matched span
    #print("Match: ", match_id, string_id, start, end, span.text)

#sentence_spans = list(doc.sents)

parse_dict = displacy.parse_ents(doc)
#parse_dict = displacy.render(doc, style="ent")
ents_arr = parse_dict['ents']

spans = []
for ent in ents_arr:
    ent['type'] = ent['label']
    del ent['label']
    spans.append(ent)

# reformat into the displacy-ent.js format (now deprecated?)
ent_dict = {}
#ent_dict['text'] = ''
#ent_dict['spans'] = [ { 'end': 20, 'start': 5, 'type': "PERSON" }, { 'end': 67, 'start': 61, 'type': "ORG" }, { 'end': 75, 'start': 71, 'type': "DATE" } ]
#ent_dict['ents'] = ['']
ent_dict['spans'] = spans