Exemplo n.º 1
0
def analyze():

    if not request.json:
        abort(400)

    if "text" not in request.json:
        return jsonify({"Error": '"text" param missing in post request.'})
    doc = entities(english(request.json["text"]))
    concepts = [{**concept} for concept in Concept.iter(doc)]
    print("concepts:", concepts)
    return jsonify(concepts)
Exemplo n.º 2
0
from eot.wowool.native import Analyzer, Domain
from eot.wowool.annotation import Concept
from eot.wowool.error import Error

try:
    english = Analyzer(language="dutch")
    entities = Domain("dutch-entity")

    doc = english("Jan Van Den Berg werkte als hoofdarts bij Omega Pharma.")
    doc = entities(doc)

    # filter some concepts
    requested_concepts = set(['Person', 'Position', 'Company'])
    concept_filter = lambda concept: concept.uri in requested_concepts
    for concept in Concept.iter(doc, concept_filter):
        print(f"literal: {concept.literal:<20}, stem={concept.stem}")
    # flatten concepts into dict
    print('-' * 40)
    for concept in Concept.iter(doc, concept_filter):
        print({**concept})

    print('-' * 40)
    for person in Concept.iter(doc, lambda concept: concept.uri == "Person"):
        for person_parts in Concept.iter(
                person, lambda concept: concept.uri.startswith("Person")):
            print(
                f"Person: {person_parts.uri:<20}, stem={person_parts.literal}")

except Error as ex:
    print("Exception:", ex)
Exemplo n.º 3
0
entity_filter = set(["Person", "Company", "Address", "City", "Facility"])

st.write("EyeOnText English Topics and Entities")
name = st.text_area("Enter Your text",
                    """John Smith works at EyeOnText in Antwerp.""")
if st.button("Analyze"):
    input_text = name
    analyzer = Language("english")
    entities = Domain("english-entity")
    topicit = TopicIdentifier(language="english")

    doc = entities(analyzer(input_text))
    topics = topicit.get_topics(doc, 20)

    # st.write("topics")
    # st.write(pd.DataFrame(topics, columns=["topic", "relevancy"]))

    combined_topics = {}
    for topic in topics:
        combined_topics[topic[0]] = ["topic", topic[0], topic[1]]

    for concept in Concept.iter(doc,
                                lambda concept: concept.uri in entity_filter):
        combined_topics[concept.literal] = [concept.uri, concept.literal, 1.0]

    st.write(f"topics and {','.join(entity_filter)}")
    st.write(
        pd.DataFrame([v for k, v in combined_topics.items()],
                     columns=["type", "topic", 'relavancy']))
Exemplo n.º 4
0
#  Copyright (c) 2020 EyeOnText, All Rights Reserved.
#  NOTICE:  All information contained herein is, and remains the property of EyeOnText.

from eot.wowool.native import Language, Domain, Filter
from eot.wowool.annotation import Concept

a = Language("english")
dc = Domain("english-company")
doc = dc(a("this is a EyeOnText."))

call_plugin = Domain(source="""
rule:{ Company }= ::python::myplugin::call_this;
rule:{ Company }= Other@(name=f"{rule.literal()}" );

""")

doc = call_plugin(doc)
filter = Filter(['Other', 'Company', 'PLUGIN_COMPANY'])
doc = filter(doc)
concepts = [c for c in Concept.iter(doc)]
uris = [c.uri for c in concepts]
print(doc)

assert "PLUGIN_COMPANY" in uris, "Missing some plugin annotation"
assert "Company" in uris, "Missing some plugin annotation"
assert concepts[0].attributes["name"][0] == "EyeOnText", "Missing attributes."
Exemplo n.º 5
0
#  Copyright (c) 2020 EyeOnText, All Rights Reserved.
#  NOTICE:  All information contained herein is, and remains the property of EyeOnText.

from eot.wowool.native import Analyzer, Domain
from eot.wowool.annotation import Concept
from eot.wowool.error import Error

try:
    dutch = Analyzer(language="dutch")
    entities = Domain("dutch-entity")

    doc = dutch("Jan Van Den Berg werkte als hoofdarts bij Omega Pharma.")
    doc = entities(doc)

    mydomain = Domain(source=r"""
        rule:{ Person .. <'werken'> .. Company }= PersonWorkCompany@(verb="work");
        rule:{ Person .. Company }= PersonCompany;
    """)
    doc = mydomain(doc)

    # filter some concepts
    requested_concepts = set(
        ['Person', 'Company', 'PersonWorkCompany', 'PersonCompany'])
    concept_filter = lambda concept: concept.uri in requested_concepts
    for concept in Concept.iter(doc, concept_filter):
        print(f"{concept.uri} -> {concept.literal}")

except Error as ex:
    print("Exception:", ex)
Exemplo n.º 6
0
try:
    analyzer = Analyzer(language="dutch")
    rule_source = """
// Compound Sample:
// capture all the word with verzekering
lexicon:(input="component"){
    verzekering } = INSURANCE_COMP;

// capture only the real verzekering not verzekeringsmaatschapijen
lexicon:(input="head"){
    verzekering } = INSURANCE_HEAD;

// capture the cost of the insurance.
rule:{ h'verzekering' { <+currency> } = INSURANCE_PRICE };
    """
    compounds = Domain(source=rule_source)
    input = "Er zijn verzekeringsmaatschapijen €40.000.000 en verzekeringen: autoverzekeringen €100, fietsverzekering €10"
    doc = compounds(analyzer(input))
    print("-" * 80)
    print(rule_source)
    print("-" * 80)
    print(input)
    print("-" * 80)
    print(f"{'uri':<20s} | {'literal':<30s} | {'stem'}")
    print("-" * 80)
    for concept in Concept.iter(
            doc, lambda concept: concept.uri.startswith("INSURANCE")):
        print(f"{concept.uri:<20s} | {concept.literal:<30s} | {concept.stem}")
except Error as ex:
    print("Exception:", ex)
// capture the cost of the insurance.
rule:{ {h'försäkring'} = INSURANCE_TYPE { Num +currency } = INSURANCE_PRICE };
    """
    compounds = Domain(source=rule_source)
    input = "Det finns försäkringsbolag 40000 euro och försäkring: bilförsäkring 100 euro, cykelförsäkring 200 SEK "
    doc = compounds(analyzer(input))
    print(doc)

    print("-" * 80)
    print(rule_source)
    print("-" * 80)
    print(input)
    print("-" * 80)
    print(f"{'uri':<20s} | {'literal':<30s} | {'stem'}")
    print("-" * 80)
    for concept in Concept.iter(
            doc, lambda concept: concept.uri == "INSURANCE_COMPONENT"):
        print(f"{concept.uri:<20s} | {concept.literal:<30s} | {concept.stem}")
    print("-" * 80)
    for concept in Concept.iter(
            doc, lambda concept: concept.uri == "INSURANCE_HEAD"):
        print(f"{concept.uri:<20s} | {concept.literal:<30s} | {concept.stem}")
    print("-" * 80)
    for concept in Concept.iter(
            doc, lambda concept: concept.uri == "INSURANCE_PRICE" or concept.
            uri == "INSURANCE_TYPE"):
        print(f"{concept.uri:<20s} | {concept.literal:<30s} | {concept.stem}")

except Error as ex:
    print("Exception:", ex)
    f = open(output_directory + "cypher-out.cypher", "w")
    webpage = open(output_directory + "index.html", "w")
    for i, ip in enumerate(InputProviders(input_provider_path)):
        print(f"Processing File no {i}: {ip.id()}")
        doc = english(ip)
        doc = entities(doc)
        doc = myrule(doc)
        requested_concepts = set([
            'EngineType', 'Battery', 'Flying', 'Range', 'BatteryDensity',
            'EnginePower', 'Manufacturer', 'City', 'Time', 'Price', 'Website',
            'Reference_Title', 'Reference_Name', 'Year', 'Speed', 'Management',
            'System'
        ])
        concept_filter = lambda concept: concept.uri in requested_concepts
        for concept in Concept.iter(doc):
            # print( f"Tagname: {concept.uri}, literal: {concept.literal:<20}, stem={concept.stem}" )
            # Unpack concept
            print({**concept})

        print("-" * 10)
        graphit = EntityGraph(graph_config)

        from pathlib import Path
        filename = Path(ip.id()).stem
        graphit.slots = {}
        graphit.slots['Document'] = {"data": filename}
        graphit.slots['Title'] = {"expr": "Reference_Title"}
        results = graphit(doc)

        print(results.df_from)