def migrate_entity_map(app_path, old_entity_map_path): """Populates gazetteer.txt files for each entity using an old entity map""" configure_logs(format='%(asctime)-15s: %(message)s') # Load old entity map old_entity_map = load_json_file(old_entity_map_path) entity_types = path.get_entity_types(app_path) for e in old_entity_map['entities']: entity_type = e['entity-name'] if entity_type not in entity_types: logging.info('Creating entity folder for %r', entity_type) # Create os.mkdir(os.path.relpath(path.get_entity_folder(app_path, entity_type))) mapping_path = path.get_entity_map_path(app_path, entity_type) if not os.path.exists(mapping_path): # Create an empty mapping.json for this entity dump_json_file(mapping_path, []) gaz_txt_path = path.get_entity_gaz_path(app_path, entity_type) entity_data_path = os.path.join(app_path, 'entity-data', '{}-entities.tsv').format( entity_type) try: entity_data = load_gazetteer_txt(entity_data_path) except OSError: entity_data = {} min_pop = min(entity_data.values()) if len(entity_data) else 1.0 # fields which existed in an old style entity map map_fields = ['map', 'text-map', 'clause-map'] for field in map_fields: try: entity_mappings = e[field] except KeyError: # no map of this type for this entity continue # TODO: create a new mapping.json from this info # This would only be possible when there are no commas and we have no for synonym in entity_mappings: # entity_data[synonym] = entity_data.get(synonym, min_pop) dump_gazetteer_txt(gaz_txt_path, entity_data)
def test_get_entity_types(): entity_types = path.get_entity_types(APP_PATH) assert len(entity_types) == 1 assert "store_name" in entity_types