示例#1
0
def migrate_entity_map(app_path, old_entity_map_path):
    """Populates gazetteer.txt files for each entity using an old entity map"""
    configure_logs(format='%(asctime)-15s: %(message)s')

    #  Load old entity map
    old_entity_map = load_json_file(old_entity_map_path)

    entity_types = path.get_entity_types(app_path)

    for e in old_entity_map['entities']:
        entity_type = e['entity-name']

        if entity_type not in entity_types:
            logging.info('Creating entity folder for %r', entity_type)
            # Create
            os.mkdir(os.path.relpath(path.get_entity_folder(app_path, entity_type)))
        mapping_path = path.get_entity_map_path(app_path, entity_type)
        if not os.path.exists(mapping_path):
            # Create an empty mapping.json for this entity
            dump_json_file(mapping_path, [])

        gaz_txt_path = path.get_entity_gaz_path(app_path, entity_type)
        entity_data_path = os.path.join(app_path, 'entity-data', '{}-entities.tsv').format(
            entity_type)

        try:
            entity_data = load_gazetteer_txt(entity_data_path)
        except OSError:
            entity_data = {}

        min_pop = min(entity_data.values()) if len(entity_data) else 1.0

        # fields which existed in an old style entity map
        map_fields = ['map', 'text-map', 'clause-map']
        for field in map_fields:
            try:
                entity_mappings = e[field]
            except KeyError:
                # no map of this type for this entity
                continue
            # TODO: create a new mapping.json from this info
            # This would only be possible when there are no commas and we have no
            for synonym in entity_mappings:
                #
                entity_data[synonym] = entity_data.get(synonym, min_pop)

        dump_gazetteer_txt(gaz_txt_path, entity_data)
示例#2
0
def test_get_entity_types():
    entity_types = path.get_entity_types(APP_PATH)
    assert len(entity_types) == 1
    assert "store_name" in entity_types