def get_employee_name(text, return_source=False):
    definitions = list(get_definitions(text))
    fake_person = False
    found_employee = None
    defined_employee_found = False
    for d in definitions:
        if d.lower() in TRIGGER_LIST_EMPLOYEE:
            defined_employee_found = True
            break

    if defined_employee_found:
        persons = list(get_persons(text))
        companies = list(get_companies(text))
        for p in persons:
            person_is_a_company = False
            for f in FALSE_PEOPLE:
                if f in str(p).lower():
                    fake_person = True
            if not fake_person:
                for c in companies:
                    # persons and companies return slightly different values for same text
                    # so need to standardize to compare
                    if len(c) > 0:
                        if c[1] is not None and c[0] is not None:
                            company_full_string = str(
                                clean(c[0]) + clean(c[1]))
                        else:
                            company_full_string = str(clean(c[0]))

                        employee_full_string = str(clean(p))
                        # handle this- where get_companies picks up more surrounding text
                        # than get_persons: EMPLOYMENT AGREEMENT WHEREAS, Kensey Nash Corporation,
                        # a Delaware corporation (the “Company”) and Todd M. DeWitt
                        # (the “Executive”) entered into that certain Amended
                        # and Restated Employment Agreement,...
                        if (employee_full_string == company_full_string or
                                employee_full_string in company_full_string):
                            person_is_a_company = True

            if not person_is_a_company and not fake_person:
                found_employee = str(p)
                # take first person found meeting our employee criteria
                break
            fake_person = False  # reset for next person

    if return_source:
        return found_employee, text
    else:
        return found_employee
示例#2
0
import lexnlp.nlp.en.segments.sentences as lex_sentences
import lexnlp.extract.en.dates as lex_dates
import lexnlp.extract.en.entities.nltk_maxent as lex_entities

direct_path = "/Users/brandon/Documents/Northwestern Courses/Winter 2019/CS+Law Innovation Lab/Orrick, Harrington, & Sutcliffe/Documents/Dish_Sample.txt"

with open(direct_path, 'r') as file:
    brief = file.read()

processed_brief = lex_sentences.pre_process_document(brief)
sentences_brief = lex_sentences.get_sentence_list(processed_brief)

facts = []

for sentence in sentences_brief:
    entities = lex_entities.get_persons(sentence)
    for entity in entities:
        facts.append((entity, sentence))

for fact in facts:
    print("Question:\nWhy is {} relevant?\n\nAnswer:\n{}".format(
        fact[0], fact[1]))
    print("\n---------------\n")
'''
Question:
Why is Farmers Branch relevant?

Answer:
In 2009, DISH began a pilot program to test QPC, a new incentive-based system at several locations, including two of its eight offices in the North Texas region: Farmers Branch and North Richland Hills.

---------------
 def _extract_variants_from_text(self, field, text: str, **kwargs):
     persons = get_persons(text, return_source=False)
     return list(persons) if persons else None
示例#4
0
 def extract_persons(self, text=None):
     if not text:
         text = self.text
     return list(lex_entities.get_persons(text))