def test_bankr_courts_wo_nltk(self): text = 'One one Bankr. E.D.N.C. two two two.' courts = list( get_courts(text, court_config_list=self.build_courts_config(), simplified_normalization=True)) self.assertEqual(1, len(courts))
def parse(self, log: ProcessLogger, text, text_unit_id, text_unit_lang, **kwargs) -> ParseResults: court_config = dict_data_cache.get_court_config() found = [dict_entities.get_entity_id(i[0]) for i in courts.get_courts(text, court_config_list=court_config, text_languages=[text_unit_lang])] if found: unique = set(found) return ParseResults({CourtUsage: [CourtUsage(text_unit_id=text_unit_id, court_id=court_id, count=found.count(court_id)) for court_id in unique]})
def parse_courts_legacy_function(self, text: str): court_df = pandas \ .read_csv( "https://raw.githubusercontent.com/LexPredict/lexpredict-legal-dictionary/1.0.2/en/legal/us_courts" ".csv") # Create config objects court_config_list = [] for _, row in court_df.iterrows(): c = entity_config(row["Court ID"], row["Court Name"], 0, row["Alias"].split(";") if not pandas.isnull(row["Alias"]) else []) court_config_list.append(c) return get_courts(text, court_config_list)
def parse(self, log: ProcessLogger, text, text_unit_id, text_unit_lang, document_initial_load: bool = False, **kwargs) -> ParseResults: from apps.extract.app_vars import SIMPLE_LOCATOR_TOKENIZATION simple_norm = SIMPLE_LOCATOR_TOKENIZATION.val court_config = dict_data_cache.get_court_config() found = [i[0].id for i in courts.get_courts(text, court_config_list=court_config, text_languages=[text_unit_lang], simplified_normalization=simple_norm)] if found: unique = set(found) return ParseResults({CourtUsage: [CourtUsage(text_unit_id=text_unit_id, court_id=court_id, count=found.count(court_id)) for court_id in unique]})
def parse_courts_legacy_function(self, text: str): court_df = pandas \ .read_csv( "https://raw.githubusercontent.com/LexPredict/lexpredict-legal-dictionary/1.0.2/en/legal/us_courts" ".csv") # Create config objects court_config_list = [] for _, row in court_df.iterrows(): aliases = [] if not pandas.isnull(row['Alias']): aliases = [ DictionaryEntryAlias(r) for r in row['Alias'].split(';') ] c = DictionaryEntry(id=int(row['Court ID']), name=row['Court Name'], priority=0, name_is_alias=True, aliases=aliases) court_config_list.append(c) return get_courts(text, court_config_list)
def test_bankr_courts(self): text = 'One one Bankr. E.D.N.C. two two two.' courts = list( get_courts(text, court_config_list=self.build_courts_config())) self.assertEqual(1, len(courts))
def extract_courts(self, text=None): if not text: text = self.text return list(lex_courts.get_courts(text))
def parse_courts_legacy_function(self, text: str): court_config_list = self.load_en_courts() return get_courts(text, court_config_list)