def load_entities_dict(): entities_fn = os.path.join( os.path.dirname(lexnlp_tests.this_test_data_path()), 'geoentities.csv') aliases_fn = os.path.join( os.path.dirname(lexnlp_tests.this_test_data_path()), 'geoaliases.csv') entities = {} with open(entities_fn, 'r', encoding='utf8') as f: reader = csv.DictReader(f) for row in reader: entities[row['id']] = entity_config( row['id'], row['name'], int(row['priority']) if row['priority'] else 0, name_is_alias=True) with open(aliases_fn, 'r', encoding='utf8') as f: reader = csv.DictReader(f) for row in reader: entity = entities.get(row['entity_id']) if entity: add_aliases_to_entity( entity, row['alias'], row['locale'], row['type'].startswith('iso') or row['type'] == 'abbreviation') return entities.values()
def cache_geo_config(): geo_config = {} for name, pk, priority in GeoEntity.objects.values_list('name', 'pk', 'priority'): entity = dict_entities.entity_config(pk, name, priority or 0, name_is_alias=True) geo_config[pk] = entity for alias_id, alias_text, alias_type, entity_id, alias_lang \ in GeoAlias.objects.values_list('pk', 'alias', 'type', 'entity', 'locale'): entity = geo_config[entity_id] if entity: is_abbrev = alias_type.startswith('iso') or alias_type.startswith('abbrev') dict_entities.add_aliases_to_entity(entity, aliases_csv=alias_text, language=alias_lang, is_abbreviation=is_abbrev, alias_id=alias_id) res = list(geo_config.values()) DbCache.put_to_db(CACHE_KEY_GEO_CONFIG, res)
def load_entities_dict_by_path(entities_fn: str, aliases_fn: str): entities = {} import csv with open(entities_fn, 'r', encoding='utf8') as f: reader = csv.DictReader(f) for row in reader: entities[row['id']] = entity_config(row['id'], row['name'], int(row['priority']) if row['priority'] else 0, name_is_alias=True) with open(aliases_fn, 'r', encoding='utf8') as f: reader = csv.DictReader(f) for row in reader: entity = entities.get(row['entity_id']) if entity: add_aliases_to_entity(entity, row['alias'], row['locale'], row['type'].startswith('iso') or row['type'] == 'abbreviation') return entities.values()