def find_landlord_tenant(text: str): # landlord, tenant = find_landlord_tenant_re(text) # if landlord or tenant: # return landlord, tenant companies = list(get_companies(text, detail_type=True, name_upper=True, strict=True)) text = text.lower() min_index_landlord = min_index_of_word(text, _LANDLORD_SYNONYMS) min_index_tenant = min_index_of_word(text, _TENANT_SYNONYMS) if min_index_landlord < min_index_tenant: landlord = companies[0] if len(companies) > 0 else None tenant = companies[1] if len(companies) > 1 else None else: tenant = companies[0] if len(companies) > 0 else None landlord = companies[1] if len(companies) > 1 else None if landlord is not None and landlord[0] is not None: landlord = '{0}{1}'.format(landlord[0].upper(), (' ' + landlord[1].upper()) if landlord[1] is not None else '') if tenant is not None and tenant[0] is not None: tenant = '{0}{1}'.format(tenant[0].upper(), (' ' + tenant[1].upper()) if tenant[1] is not None else '') return landlord, tenant
def get_employer_name(text, return_source=False): definitions = list(get_definitions(text)) companies = [] defined_employer_found = False defined_employee_found = False first_company_string = None for d in definitions: if d.lower() in TRIGGER_LIST_COMPANY: defined_employer_found = True if d.lower() in TRIGGER_LIST_EMPLOYEE: defined_employee_found = True if defined_employee_found is True and defined_employer_found is True: break if defined_employer_found and defined_employee_found: companies = list(get_companies(text)) if len(companies) > 0: # take first employer found first_company_string = ', '.join(str(s) for s in companies[0]) if return_source: return first_company_string, text else: return first_company_string
def parse(self, log: ProcessLogger, text, text_unit_id, _text_unit_lang, document_initial_load: bool = False, **kwargs) -> Optional[ParseResults]: # Here we override saving logic to workaround race conditions on party creation vs party usage saving if not document_initial_load: PartyUsage.objects.filter(text_unit_id=text_unit_id).delete() found = list( get_companies(text, count_unique=True, detail_type=True, name_upper=True)) if found: for _party in found: name, _type, type_abbr, type_label, type_desc, count = _party defaults = dict(type=_type, type_label=type_label, type_description=type_desc) party, created = Party.objects.get_or_create( name=name, type_abbr=type_abbr or '', defaults=defaults) return ParseResults({ PartyUsage: [ PartyUsage(text_unit_id=text_unit_id, party=party, count=count) ] })
def _extract_variants_from_text(self, field, text: str, **kwargs): companies = list(get_companies(text, detail_type=True, name_upper=True, strict=True)) if not companies: return None return ['{0}{1}'.format(company[0].upper(), (' ' + company[1].upper()) if company[1] is not None else '') for company in companies]
def get_employee_name(text, return_source=False): definitions = list(get_definitions(text)) fake_person = False found_employee = None defined_employee_found = False for d in definitions: if d.lower() in TRIGGER_LIST_EMPLOYEE: defined_employee_found = True break if defined_employee_found: persons = list(get_persons(text)) companies = list(get_companies(text)) for p in persons: person_is_a_company = False for f in FALSE_PEOPLE: if f in str(p).lower(): fake_person = True if not fake_person: for c in companies: # persons and companies return slightly different values for same text # so need to standardize to compare if len(c) > 0: if c[1] is not None and c[0] is not None: company_full_string = str( clean(c[0]) + clean(c[1])) else: company_full_string = str(clean(c[0])) employee_full_string = str(clean(p)) # handle this- where get_companies picks up more surrounding text # than get_persons: EMPLOYMENT AGREEMENT WHEREAS, Kensey Nash Corporation, # a Delaware corporation (the “Company”) and Todd M. DeWitt # (the “Executive”) entered into that certain Amended # and Restated Employment Agreement,... if (employee_full_string == company_full_string or employee_full_string in company_full_string): person_is_a_company = True if not person_is_a_company and not fake_person: found_employee = str(p) # take first person found meeting our employee criteria break fake_person = False # reset for next person if return_source: return found_employee, text else: return found_employee
def extraction_function(self, field, possible_value, text): if possible_value: return possible_value if possible_value is None and not text: return None companies = list( get_companies(text, detail_type=True, name_upper=True, strict=True)) company = ValueExtractionHint.get_value(companies, field.item_number) if company: return '{0}{1}'.format( company[0].upper(), (' ' + company[1].upper()) if company[1] is not None else '') else: return None
def get_companies(cls, text: str, strict: bool = False, use_gnp: bool = False, detail_type: bool = False, count_unique: bool = False, name_upper: bool = False, parse_name_abbr: bool = False, return_source: bool = False): _filter = cls.get_banlist_filter() return get_companies(text, strict=strict, use_gnp=use_gnp, detail_type=detail_type, count_unique=count_unique, name_upper=name_upper, parse_name_abbr=parse_name_abbr, return_source=return_source, banlist_usage=_filter)
def parse(self, text, text_unit_id, _text_unit_lang, **kwargs) -> ParseResults: found = list( get_companies(text, count_unique=True, detail_type=True, name_upper=True)) if found: pu_list = [] for _party in found: name, _type, type_abbr, type_label, type_desc, count = _party defaults = dict(type=_type, type_label=type_label, type_description=type_desc) party, _ = Party.objects.get_or_create(name=name, type_abbr=type_abbr or '', defaults=defaults) pu_list.append( PartyUsage(text_unit_id=text_unit_id, party=party, count=count)) return ParseResults({PartyUsage: pu_list})
def extract_companies(self, text=None): if not text: text = self.text return list(lex_entities.get_companies(text))
def getCompanies(self): mem = [] companies = list(get_companies(self.bill_text)) for company in companies: mem.append(str(company[0] + " " + str(company[1]))) self.bill.info['companies'] = mem