def test_prepare_alias_blacklist_dict(): src = [('Alias1', 'lang1', False), ('ABBREV1', 'lang1', True), ('Alias2', None, False), ('Alias3', 'lang1', False)] actual = prepare_alias_blacklist_dict(src, use_stemmer=False) expected = { 'lang1': ([' alias1 ', ' alias3 '], [' ABBREV1 ']), None: ([' alias2 '], []) } assert_dict_equal(actual, expected) assert_true(prepare_alias_blacklist_dict([]) is None)
def test_alias_is_blacklisted(): src = [('Alias1', 'lang1', False), ('ABBREV1', 'lang1', True), ('Alias2', None, False), ('Alias3', 'lang1', False)] prepared = prepare_alias_blacklist_dict(src, use_stemmer=False) assert_true(alias_is_blacklisted(prepared, ' ABBREV1 ', 'lang1', True)) assert_false(alias_is_blacklisted(prepared, ' AAA ', 'lang1', True)) assert_false(alias_is_blacklisted(None, 'aaaa', 'l', False))
def test_geoentities_alias_filtering(): prepared_alias_blacklist = prepare_alias_blacklist_dict([('Afghanistan', None, False), ('Mississippi', 'en', False), ('AL', 'en', True)]) lexnlp_tests.test_extraction_func_on_test_data(get_geoentities, geo_config_list=_CONFIG, prepared_alias_black_list=prepared_alias_blacklist, actual_data_converter=lambda actual: [get_entity_name(c[0]) for c in actual], debug_print=True, start_from_csv_line=6)
""" from typing import List, Tuple, Union, Dict, Generator, Any from lexnlp.config.en import geoentities_config from lexnlp.extract.en.dict_entities import find_dict_entities, conflicts_take_first_by_id, \ prepare_alias_blacklist_dict, conflicts_top_by_priority __author__ = "ContraxSuite, LLC; LexPredict, LLC" __copyright__ = "Copyright 2015-2019, ContraxSuite, LLC" __license__ = "https://github.com/LexPredict/lexpredict-lexnlp/blob/master/LICENSE" __version__ = "0.2.5" __maintainer__ = "LexPredict, LLC" __email__ = "*****@*****.**" _ALIAS_BLACK_LIST_PREPARED = prepare_alias_blacklist_dict( geoentities_config.ALIAS_BLACK_LIST) def get_geoentities( text: str, geo_config_list: List[Tuple[int, str, List[Tuple[str, str, bool, int]]]], priority: bool = False, priority_by_id: bool = False, text_languages: List[str] = None, min_alias_len: int = geoentities_config.MIN_ALIAS_LEN, prepared_alias_black_list: Union[None, Dict[str, Tuple[ List[str], List[str]]]] = _ALIAS_BLACK_LIST_PREPARED ) -> Generator[Tuple[Tuple, Tuple], Any, Any]: """ Searches for geo entities from the provided config list and yields pairs of (entity, alias). Entity is: (entity_id, name, [list of aliases])