def get_employee_name(text, return_source=False): definitions = list(get_definitions(text)) fake_person = False found_employee = None defined_employee_found = False for d in definitions: if d.lower() in TRIGGER_LIST_EMPLOYEE: defined_employee_found = True break if defined_employee_found: persons = list(get_persons(text)) companies = list(get_companies(text)) for p in persons: person_is_a_company = False for f in FALSE_PEOPLE: if f in str(p).lower(): fake_person = True if not fake_person: for c in companies: # persons and companies return slightly different values for same text # so need to standardize to compare if len(c) > 0: if c[1] is not None and c[0] is not None: company_full_string = str( clean(c[0]) + clean(c[1])) else: company_full_string = str(clean(c[0])) employee_full_string = str(clean(p)) # handle this- where get_companies picks up more surrounding text # than get_persons: EMPLOYMENT AGREEMENT WHEREAS, Kensey Nash Corporation, # a Delaware corporation (the “Company”) and Todd M. DeWitt # (the “Executive”) entered into that certain Amended # and Restated Employment Agreement,... if (employee_full_string == company_full_string or employee_full_string in company_full_string): person_is_a_company = True if not person_is_a_company and not fake_person: found_employee = str(p) # take first person found meeting our employee criteria break fake_person = False # reset for next person if return_source: return found_employee, text else: return found_employee
import lexnlp.nlp.en.segments.sentences as lex_sentences import lexnlp.extract.en.dates as lex_dates import lexnlp.extract.en.entities.nltk_maxent as lex_entities direct_path = "/Users/brandon/Documents/Northwestern Courses/Winter 2019/CS+Law Innovation Lab/Orrick, Harrington, & Sutcliffe/Documents/Dish_Sample.txt" with open(direct_path, 'r') as file: brief = file.read() processed_brief = lex_sentences.pre_process_document(brief) sentences_brief = lex_sentences.get_sentence_list(processed_brief) facts = [] for sentence in sentences_brief: entities = lex_entities.get_persons(sentence) for entity in entities: facts.append((entity, sentence)) for fact in facts: print("Question:\nWhy is {} relevant?\n\nAnswer:\n{}".format( fact[0], fact[1])) print("\n---------------\n") ''' Question: Why is Farmers Branch relevant? Answer: In 2009, DISH began a pilot program to test QPC, a new incentive-based system at several locations, including two of its eight offices in the North Texas region: Farmers Branch and North Richland Hills. ---------------
def _extract_variants_from_text(self, field, text: str, **kwargs): persons = get_persons(text, return_source=False) return list(persons) if persons else None
def extract_persons(self, text=None): if not text: text = self.text return list(lex_entities.get_persons(text))