示例#1
0
    def before_index(self, data_dict):
        dataset = sh.scheming_get_schema('dataset', 'dataset')
        if ('res_format' in data_dict):
            #Get format field
            formats = sh.scheming_field_by_name(dataset.get('resource_fields'),
                                                'format')

            #Create SOLR field
            data_dict['res_format_label'] = []
            for res_format in data_dict['res_format']:
                #Get format label
                res_format_label = sh.scheming_choices_label(
                    formats['choices'], res_format)
                if res_format_label:
                    #Add label to new SOLR field
                    data_dict['res_format_label'].append(res_format_label)

        if ('frequency' in data_dict):
            #Get frequency field
            frequency = data_dict['frequency']
            if frequency:
                freq = json.loads(frequency)
                ftype = freq['type']
                fvalue = freq['value']
                data_dict['frequency_id'] = '{value}-{type}'.format(
                    type=ftype, value=fvalue)
                data_dict[
                    'frequency_label'] = helpers.csc_dataset_display_frequency(
                        fvalue, ftype)
                #log.info('Frecuency = {f1}, frequency_id={f2}, frequency_label={f3}'.format(f1=frequency, f2=data_dict['frequency_id'], f3=data_dict['frequency_label']))

        if ('theme' in data_dict):
            #Get theme field
            categoria = sh.scheming_field_by_name(
                dataset.get('dataset_fields'), 'theme')

            #Get theme value
            valor_categoria = data_dict['theme']

            #Empty theme values
            data_dict['theme'] = []
            data_dict['theme_id'] = []
            data_dict['theme_es'] = []
            data_dict['theme_gl'] = []

            #Get key values
            valores = valor_categoria.replace('[', '').replace(']', '')
            categorias = valores.split('", "')
            #Get translated label for each key
            for term_categoria in list(categorias):
                clean_term = term_categoria.replace('"', '')
                data_dict['theme'].append(clean_term)
                data_dict['theme_id'].append(helpers.csc_theme_id(clean_term))
                #Look for label in the scheme
                for option in categoria.get('choices'):
                    if option['value'] == clean_term:
                        #Add label for each language
                        data_dict['theme_es'].append(option['label']['es'])
                        data_dict['theme_gl'].append(option['label']['gl'])
        return data_dict
示例#2
0
def spc_thematic_area_list(context, data_dict):
    tk.check_access('spc_thematic_area_list', context, data_dict)
    schema = scheming_helpers.scheming_get_dataset_schema('dataset')
    field = scheming_helpers.scheming_field_by_name(schema['dataset_fields'],
                                                    'thematic_area_string')
    choices = scheming_helpers.scheming_field_choices(field)
    return choices
示例#3
0
def _dge_harvest_list_dataset_field_labels(name_field=None, value_field=None):
    '''
    Returns the available values that the given dataset name_field may have to the given value_field
    '''
    result = {}
    if name_field is not None:
        dataset = sh.scheming_get_schema('dataset', 'dataset')
        values = sh.scheming_field_by_name(dataset.get('dataset_fields'),
                                           name_field) or []
        if values and values['choices']:
            for option in values['choices']:
                if option and option['value']:
                    if value_field:
                        if option['value'] == value_field:
                            return {
                                option.get('value'): {
                                    'label': option.get('label'),
                                    'description': option.get('description'),
                                    'dcat_ap': option.get('dcat_ap'),
                                    'notation': option.get('notation')
                                }
                            }
                    else:
                        result[option.get('value')] = {
                            'label': option.get('label'),
                            'description': option.get('description'),
                            'dcat_ap': option.get('dcat_ap'),
                            'notation': option.get('notation')
                        }
    return result
示例#4
0
def has_published_date_field_in_schema(dataset_type):
    if not dataset_type:
        return False
    dataset_scheme = h.scheming_get_schema('dataset', dataset_type)
    fields = dataset_scheme['dataset_fields']
    pd = h.scheming_field_by_name(fields, "published_date")
    if not pd:
        return False
    return True
def datawa_scheming_select_options(field_name):
    schema = sh.scheming_get_dataset_schema("dataset")
    try:
        access_level_options = sh.scheming_field_by_name(
            schema["dataset_fields"], field_name)["choices"]
        options = {i["value"]: i["label"] for i in access_level_options}
    except Exception as e:
        raise e
    return options
示例#6
0
def get_choice_label(name, value, is_resource=False):
    schema = scheming_get_dataset_schema('deposited-dataset')
    fields = schema['resource_fields'] if is_resource else schema[
        'dataset_fields']
    field = scheming_field_by_name(fields, name)
    for choice in field.get('choices', []):
        if choice.get('value') == value:
            return choice.get('label')
    return value
def datawa_scheming_select_options(field_name):
    schema = sh.scheming_get_dataset_schema('dataset')
    try:
        access_level_options = sh.scheming_field_by_name(
            schema['dataset_fields'], field_name)['choices']
        options = {i['value']: i['label'] for i in access_level_options}
    except Exception as e:
        raise e
    return options
示例#8
0
def get_field_label(name, is_resource=False):
    schema = scheming_get_dataset_schema('deposited-dataset')
    fields = schema['resource_fields'] if is_resource else schema[
        'dataset_fields']
    field = scheming_field_by_name(fields, name)
    if field:
        return field.get('label', name)
    else:
        log.warning(
            'Could not get field {} from deposited-dataset schema'.format(
                name))
示例#9
0
def get_data_container_choice_label(name, value):
    schema = scheming_get_organization_schema('data-container')
    fields = schema['fields']
    field = scheming_field_by_name(fields, name)
    if field:
        for choice in field.get('choices', []):
            if choice.get('value') == value:
                return choice.get('label')
        return value
    else:
        log.warning(
            'Could not get field {} from data-container schema'.format(name))
示例#10
0
 def validator(key, data, errors, context):
     value = data.get(key)
     try:
         other_value = data[(field.get('required_if_value_in'),)]
     except:
         other_value = ""
         
     if (not value or value is missing) :
         if (not other_value or other_value is missing):
             return value
         raise Invalid(_('Required since "%s" is defined.') % sh.scheming_field_by_name(schema['dataset_fields'], field.get('required_if_value_in'))['label'])
     return value
示例#11
0
def _get_classfication_field(dataset_type):
    scheme = h.scheming_get_schema('dataset', dataset_type)
    if not scheme:
        return None
    fields = scheme.get('resource_fields')
    if not fields:
        return None
    f = h.scheming_field_by_name(fields, "classification")
    if f:
        return f
    else:
        return None
def _map_gdl_to_publication(data_dict, obj):
    dataset = {
        "id": str(uuid.uuid3(uuid.NAMESPACE_DNS, str(data_dict['id']))),
        "type": "publications",
        "title": data_dict['title'],
        "creator": [a['name'] for a in data_dict['authors']],
        # "subject": data_dict,
        "notes": data_dict['description'],
        "publisher": data_dict.get('relatedOrganisation'),
        # "contributor": [a['name'] for a in data_dict['authors']],
        "date": data_dict.get('created'),
        "metadata_modified": data_dict.get('created'),
        # "publication_type": data_dict,
        # "format": data_dict,
        "identifier": data_dict['identifier'],
        "source": data_dict.get('source'),
        # "language": data_dict,
        # "relation": data_dict,
        # "spatial": data_dict,
        # "rights": data_dict,
        "license_id": 'notspecified',
        "member_countries": 'other',  # relatedCountry, optional
        "harvest_source": 'GDL'
    }
    thematic_area = data_dict.get('thematicArea', {}).get('area')
    if thematic_area:
        dataset["thematic_area_string"] = thematic_area_mapping.get(
            thematic_area)
    related_country = data_dict.get('relatedCountry')
    if related_country:
        schema = sh.scheming_get_dataset_schema('publications')
        choices = sh.scheming_field_by_name(schema['dataset_fields'],
                                            'member_countries')['choices']
        member_country = F.first(
            F.filter(
                F.compose(F.rpartial(contains, related_country),
                          itemgetter('label')), choices))
        if member_country:
            dataset['member_countries'] = member_country['value']
            spatial = get_extent_for_country(member_country['label'])
            if spatial:
                dataset['spatial'] = spatial['value']
    if data_dict['file']:
        res_url = _gl_url(obj.source.url, 'download') + '?id=' + str(
            data_dict['id'])
        res = {'name': data_dict['file'], 'url': res_url}
        res['format'] = splitext(res['name'])[1].lstrip('.')
        dataset['resources'] = [res]

    return dataset
示例#13
0
def get_choice_label(name, value, is_resource=False):
    schema = scheming_get_dataset_schema('deposited-dataset')
    fields = schema['resource_fields'] if is_resource else schema[
        'dataset_fields']
    field = scheming_field_by_name(fields, name)
    if field:
        for choice in field.get('choices', []):
            if choice.get('value') == value:
                return choice.get('label')
        return value
    else:
        log.warning(
            'Could not get field {} from deposited-dataset schema'.format(
                name))
示例#14
0
def _csc_dcat_list_resource_field_values(name_field=None):
    '''
    Returns the available values that the given resource name_field may have
    '''
    result = []
    if name_field is not None:
        dataset = sh.scheming_get_schema('dataset', 'dataset')
        values = sh.scheming_field_by_name(dataset.get('resource_fields'),
                name_field) or []
        if values and values['choices']:
            for option in values['choices']:
                if option and option['value']:
                    result.append(option['value'])
    return result
示例#15
0
def dge_list_themes(themes=None):
    '''
    Given an theme list values, get theirs translated labels
    
    :param themes: value theme list
    :type string list
    
    :rtype (string, string) list
    '''
    dataset = sh.scheming_get_schema('dataset', 'dataset')
    formats = sh.scheming_field_by_name(dataset.get('dataset_fields'), 'theme')
    label_list = []
    for theme in themes:
        label = sh.scheming_choices_label(formats['choices'], theme)
        if label:
            label_list.append((dge_theme_id(theme), label))
    return label_list
示例#16
0
def dge_resource_format_label(res_format=None):
    '''
    Given an format, get its label
    
    :param res_format: format
    :type string
    
    :rtype string
    '''
    if format:
        dataset = sh.scheming_get_schema('dataset', 'dataset')
        formats = sh.scheming_field_by_name(dataset.get('resource_fields'),
                                            'format')
        res_format_label = sh.scheming_choices_label(formats['choices'],
                                                     res_format)
        if res_format_label:
            return res_format_label
    return res_format
    def after_search(self, search_results, search_params):
        facets = search_results.get('search_facets')
        results = search_results.get('results')
        if not facets or not results:
            return search_results
        schema = scheming_helpers.scheming_get_dataset_schema(results[0]['type'])
        for facet in facets.values():
            for item in facet['items']:
                field_name = facet['title'].replace('_facet', '')
                field = scheming_helpers.scheming_field_by_name( \
                        schema['dataset_fields'], field_name)
                if field and (field.get('choices') or \
                        field.get('choices_helper')):
                    choices = scheming_helpers.scheming_field_choices(field)
                    item['display_name'] = scheming_helpers. \
                            scheming_choices_label(choices, item['name'])

        return search_results
    def _extract_additional_fields(self, content, package_dict):
        package_dict['thematic_area_string'] = self.topic

        if not package_dict.get('license_id'):
            package_dict['license_id'] = 'notspecified'

        skip_keys = {'set_spec', 'description'}

        for key, value in content.items():
            if key in package_dict or key in skip_keys:
                continue
            if key == 'type':
                key = 'publication_type'
            package_dict[key] = value

        package_dict.pop('extras', None)
        package_dict['type'] = 'publications'
        package_dict.pop('maintainer_email', None)

        coverage = package_dict.pop('coverage', None)
        if coverage:
            schema = scheming_get_dataset_schema('publications')
            field = scheming_field_by_name(schema['dataset_fields'],
                                           'member_countries')
            choices = scheming_field_choices(field)
            package_dict['member_countries'] = [
                choice['value']
                for choice in choices if choice['label'] in coverage
            ] or ['other']
            polygons = [
                t['geometry'] for t in eez.collection
                if any(country in t['properties']['GeoName']
                       for country in coverage)
            ]
            # TODO: for now we are taking first polygon from possible
            # list because of SOLR restriction of spatial field
            # size. In future we may add additional logic here
            if polygons:
                package_dict['coverage'] = json.dumps(polygons[0])

        return package_dict
示例#19
0
    def after_search(self, search_results, search_params):
        if not is_frontend():
            return search_results

    # Translate the unselected search facets.
        facets = search_results.get('search_facets')
        if not facets:
            return search_results

        desired_lang_code = pylons.request.environ['CKAN_LANG']
        fallback_lang_code = pylons.config.get('ckan.locale_default', 'es')

        # Look up translations for all of the facets in one db query.
        dataset = sh.scheming_get_schema('dataset', 'dataset')
        categoria = sh.scheming_field_by_name(dataset.get('dataset_fields'),
                                              'theme')
        dict_categoria = {}
        for option in categoria.get('choices'):
            label_option = (option.get('label')).get(desired_lang_code, None)
            if not label_option:
                label_option = (option.get('label')).get(
                    fallback_lang_code, None)
            dict_categoria[helpers.dge_theme_id(
                option.get('value'))] = label_option
        facet = facets.get('theme_id', None)
        if facet:
            for item in facet.get('items', None):
                item['display_name'] = dict_categoria.get(
                    item.get('name'), item.get('display_name'))
                item['class'] = item.get('name')

        facet = facets.get('administration_level', None)
        if facet:
            for item in facet.get('items', None):
                item[
                    'display_name'] = helpers.dge_get_translated_administration_level(
                        item.get('name'))
        return search_results
示例#20
0
    def after_search(self, search_results, search_params):

        # Translate the unselected search facets.
        facets = search_results.get('search_facets')
        if not facets:
            return search_results

        desired_lang_code = request.environ.get('CKAN_LANG')
        fallback_lang_code = config.get('ckan.locale_default', 'es')

        # Look up translations for all of the facets in one db query.
        dataset = sh.scheming_get_schema('dataset', 'dataset')
        categoria = sh.scheming_field_by_name(dataset.get('dataset_fields'),
                                              'theme')
        dict_categoria = {}
        for option in categoria.get('choices'):
            label_option = (option.get('label')).get(desired_lang_code, None)
            if not label_option:
                label_option = (option.get('label')).get(
                    fallback_lang_code, None)
            dict_categoria[helpers.csc_theme_id(
                option.get('value'))] = label_option
        facet = facets.get('theme_id', None)
        if facet:
            for item in facet.get('items', None):
                item['display_name'] = dict_categoria.get(
                    item.get('name'), item.get('display_name'))
                item['class'] = item.get('name')

        facet = facets.get('frequency_id', None)
        if facet:
            for item in facet.get('items', None):
                #log.info("facet {facet}".format(facet=facet))
                value = item.get('name', '').split('-')
                item['display_name'] = helpers.csc_dataset_display_frequency(
                    value[0], value[1])
        return search_results
示例#21
0
def dge_list_reduce_resource_format_label(resources=None, field_name='format'):
    '''
    Given an resource list, get label of resource_format
    
    :param resources: resource dict
    :type dict list
    
    :param field_name: field_name of resource
    :type string
    
    :rtype string list
    '''

    format_list = h.dict_list_reduce(resources, field_name)
    dataset = sh.scheming_get_schema('dataset', 'dataset')
    formats = sh.scheming_field_by_name(dataset.get('resource_fields'),
                                        'format')
    label_list = []
    for res_format in format_list:
        res_format_label = sh.scheming_choices_label(formats['choices'],
                                                     res_format)
        if res_format_label:
            label_list.append(res_format_label)
    return label_list
示例#22
0
    def import_stage(self, harvest_object):
        '''
        The import stage will receive a HarvestObject object and will be
        responsible for:
            - performing any necessary action with the fetched object (e.g
              create a CKAN package).
              Note: if this stage creates or updates a package, a reference
              to the package must be added to the HarvestObject.
              Additionally, the HarvestObject must be flagged as current.
            - creating the HarvestObject - Package relation (if necessary)
            - creating and storing any suitable HarvestObjectErrors that may
              occur.
            - returning True if everything went as expected, False otherwise.

        :param harvest_object: HarvestObject object
        :returns: True if everything went right, False if errors were found
        '''
        logger.debug("in import stage: %s" % harvest_object.guid)
        if not harvest_object:
            logger.error('No harvest object received')
            self._save_object_error('No harvest object received')
            return False
        try:
            self._set_config(harvest_object.job.source.config)

            package_dict = json.loads(harvest_object.content)
            data_dict = {}
            data_dict['id'] = package_dict['id']
            data_dict['title'] = package_dict['title']
            data_dict['name'] = munge_title_to_name(package_dict['name'])

            data_dict['notes'] = markdown_extract(
                package_dict.get('description'))

            tags = package_dict.get('keyword', [])
            data_dict['tag_string'] = ', '.join(
                [munge_tag(tag) for tag in tags])

            data_dict['private'] = False

            license_id = package_dict.get('license',
                                          'cc-by').strip('/').split('/')[-1]

            if license_id == 'de2a56f5-a565-481a-8589-406dc40b5588':
                license_id = 'sprep-public-license'
            data_dict['license_id'] = license_id or 'notspecified'

            data_dict['created'] = _parse_drupal_date(package_dict['issued'])
            data_dict['modified'] = _parse_drupal_date(
                package_dict['modified'])

            c_point, c_email = package_dict['contactPoint'][
                'fn'], package_dict['contactPoint']['hasEmail'].split(':')[-1]
            if c_email != '*****@*****.**':
                data_dict['contact_uri'] = c_point
                data_dict['contact_email'] = c_email
            data_dict['resources'] = []
            for res in package_dict.get('distribution', []):

                # res['issued'] = _parse_drupal_date(res.pop('created'))
                # res['modified'] = _parse_drupal_date(
                #     res.pop('last_modified').replace('Date changed ', '')
                # )
                res['url'] = res.get('downloadURL') or res.get('accessURL')
                res['name'] = res['title']
                res['description'] = markdown_extract(res.get('description'))
                data_dict['resources'].append(res)

            if 'spatial' in package_dict:
                data_dict['spatial'] = package_dict.pop('spatial')

                try:
                    geometry = {
                        "type":
                        "Polygon",
                        "coordinates":
                        [[[float(c) for c in pair.split()]
                          for pair in RE_SPATIAL.match(
                              data_dict['spatial']).group(1).split(', ')]]
                    }
                    shape = shapely.geometry.asShape(geometry)
                    if shape.is_valid and shape.is_closed:
                        data_dict['spatial'] = json.dumps(geometry)
                    else:
                        del data_dict['spatial']

                except KeyError:
                    pass
                except (AttributeError, ValueError):
                    del data_dict['spatial']
                    # logger.warn('-' * 80)
                    #
                    # logger.warn('Failed parsing of spatial field: %s', data_dict['spatial'])

                # package_dict.pop('type')

            # add owner_org
            source_dataset = get_action('package_show')(
                {
                    'ignore_auth': True
                }, {
                    'id': harvest_object.source.id
                })

            owner_org = source_dataset.get('owner_org')
            data_dict['owner_org'] = owner_org
            data_dict['member_countries'] = country_mapping[None]
            if 'isPartOf' in package_dict:
                country = package_dict['isPartOf'].split('.')[0]
                data_dict['member_countries'] = country_mapping.get(
                    country, country_mapping[None])
                org = model.Session.query(
                    model.Group).filter_by(name=country + '-data').first()
                if org:
                    data_dict['owner_org'] = org.id

            if 'spatial' in package_dict:
                data_dict['spatial'] = package_dict['spatial']
                try:
                    data_dict['spatial'] = json.dumps({
                        "type":
                        "Polygon",
                        "coordinates":
                        [[[float(c) for c in pair.split()]
                          for pair in RE_SPATIAL.match(
                              data_dict['spatial']).group(1).split(', ')]]
                    })
                except KeyError:
                    pass
                # package_dict.pop('type')
            else:
                schema = sh.scheming_get_dataset_schema('dataset')
                choices = sh.scheming_field_by_name(
                    schema['dataset_fields'], 'member_countries')['choices']
                member_country = sh.scheming_choices_label(
                    choices, data_dict['member_countries'])
                if member_country:
                    spatial = get_extent_for_country(member_country)
                    if spatial:
                        data_dict['spatial'] = spatial['value']

            data_dict['source'] = package_dict.get('landingPage')

            data_dict['theme'] = package_dict.get('theme', [])
            data_dict['theme'] = package_dict.get('theme', [])

            data_dict['thematic_area_string'] = _map_theme_to_topic(
                data_dict['theme'])

            data_dict['harvest_source'] = 'SPREP'

            self._create_or_update_package(data_dict, harvest_object,
                                           'package_show')

            Session.commit()
            stored_package = get_action('package_show')({
                'ignore_auth': True
            }, {
                'id': data_dict['id']
            })
            for res in stored_package.get('resources', []):
                get_action('resource_create_default_resource_views')(
                    {
                        'ignore_auth': True
                    }, {
                        'package': stored_package,
                        'resource': res
                    })

            logger.debug("Finished record")
        except:
            logger.exception('Something went wrong!')
            self._save_object_error('Exception in import stage',
                                    harvest_object)
            return False
        return True
示例#23
0
def dge_harvest_catalog_show(context, data_dict):
    method_log_prefix = '[%s][dge_harvest_catalog_show]' % __name__
    output = None
    try:
        log.debug('%s Init method. Inputs context=%s, data_dict=%s' %
                  (method_log_prefix, context, data_dict))
        ini = datetime.datetime.now()
        toolkit.check_access('dge_harvest_catalog_show', context, data_dict)

        page = 1
        data_dict['page'] = page
        limit = data_dict.get('limit', -1)
        _format = data_dict.get('format')
        if _format == RDF_FORMAT:
            filepath = config.get('ckanext.dge_harvest.rdf.filepath',
                                  '/tmp/catalog.rdf')
        elif _format == CSV_FORMAT:
            filepath = config.get('ckanext.dge_harvest.csv.filepath',
                                  '/tmp/catalog.csv')
            columnsfilepath = config.get(
                'ckanext.dge_harvest.csv.columns.filepath',
                '/usr/lib/ckan/default/src/ckanext-dge-harvest/ckanext/dge_harvest/commands/columns.json'
            )
        else:
            filepath = '/tmp/catalog.' + _format
        query = _dge_harvest_search_ckan_datasets(context, data_dict)
        dataset_dicts = query['results']
        total_datasets = query['count']
        log.debug('%s Total_datasets obtenidos en la query: %s' %
                  (method_log_prefix, total_datasets))
        if limit > -1 and limit < total_datasets:
            total_datasets = limit
        num = len(dataset_dicts)
        log.debug('%s Total_datasets a exportar: %s' %
                  (method_log_prefix, total_datasets))

        while (total_datasets > num):
            page = page + 1
            data_dict['page'] = page
            query = _dge_harvest_search_ckan_datasets(context, data_dict)
            dataset_dicts.extend(query['results'])
            total_datasets = query['count']
            num = len(dataset_dicts)
            log.debug('%s Total_datasets obtenidos en la query: %s' %
                      (method_log_prefix, total_datasets))
            log.debug('%s Total_datasets a exportar: %s' %
                      (method_log_prefix, num))

        if _format == RDF_FORMAT:
            serializer = DGERDFSerializer()
            #log.debug("%s DATASET_DICTS = %s" % (method_log_prefix,dataset_dicts))
            output = serializer.serialize_catalog(
                {},
                dataset_dicts,
                _format=data_dict.get('format'),
                pagination_info=None)
        elif _format == CSV_FORMAT and columnsfilepath:
            #log.info('%s Dataset_dicts de partida =%s' % (method_log_prefix, dataset_dicts))
            organizations = {}
            themes = dhh.dge_harvest_dict_theme_option_label()
            spatial_coverages = dhh.dge_harvest_dict_spatial_coverage_option_label(
            )
            _dataset = sh.scheming_get_schema('dataset', 'dataset')
            res_format = sh.scheming_field_by_name(
                _dataset.get('resource_fields'), 'format')
            format_values = res_format['choices']
            formats = {}
            datasets = []
            num = 0
            for dataset in dataset_dicts:
                ds = {}
                #Id
                #ds['id'] = _encode_value(dataset.get('id', None))

                #ulr
                ds['url'] = dataset_uri(dataset)

                #Description
                descriptions = _from_dict_to_string(
                    dataset.get(dhc.DS_DESCRIPTION, None))
                ds['description'] = _encode_value(descriptions, True)

                #Title
                titles = _from_dict_to_string(
                    dataset.get(dhc.DS_TITLE_TRANSLATED, None))
                ds['title'] = _encode_value(titles, True)

                #Theme
                theme_values = dataset.get(dhc.DS_THEME, None)
                theme_labels = []
                if theme_values:
                    for value in theme_values:
                        theme = themes.get(value)
                        if theme and theme.get('label'):
                            theme_labels.append(theme.get('label').get('es'))
                    theme_value = _from_list_to_string(theme_labels)
                    ds['theme'] = _encode_value(theme_value, True)

                #Keywords
                tags = dataset.get(dhc.DS_TAGS)
                value = None
                if tags and len(tags) > 0:
                    for tag in tags:
                        stag = tag.get('name', None)
                        if stag:
                            if value:
                                value = '%s%s%s' % (value, MAIN_SEPARATOR,
                                                    stag)
                            else:
                                value = stag
                    ds['tags'] = _encode_value(value, True)

                #Identifier
                ds['identifier'] = _encode_value(
                    dataset.get('identifier', None), True)

                #Created
                ds['issued_date'] = _encode_value(
                    _from_iso8601_date_to_string(
                        dataset.get(dhc.DS_ISSUED_DATE, None)))

                #Modified
                ds['modified_date'] = _encode_value(
                    _from_iso8601_date_to_string(
                        dataset.get(dhc.DS_MODIFIED_DATE, None)))

                #Accrual Periodicity
                frequency = dataset.get(dhc.DS_FREQUENCY)
                if (frequency):
                    stype = frequency.get('type', '')
                    if stype and len(stype) > 0:
                        stype = 'http://www.w3.org/2006/time#' + stype
                    svalue = frequency.get('value', '')
                    sfrequency = '[TYPE]%s[VALUE]%s' % (stype, svalue)
                    ds['frequency'] = _encode_value(sfrequency, True)

                #Language
                languages = _from_list_to_string(dataset.get(dhc.DS_LANGUAGE))
                ds['language'] = _encode_value(languages, True)

                #Publisher
                publisher = dataset.get(dhc.DS_PUBLISHER, None)
                if publisher:
                    if publisher in organizations:
                        ds['publisher'] = _encode_value(
                            organizations.get(publisher, None), True)
                    else:
                        organization = h.get_organization(publisher, False)
                        if organization:
                            organizations[publisher] = organization.get(
                                'title',
                                organization.get('display_name', None))
                            ds['publisher'] = _encode_value(
                                organizations.get(publisher), True)

                #License
                ds['license_id'] = _encode_value(dataset.get(dhc.DS_LICENSE),
                                                 True)

                #Spatial
                spatial_values = dataset.get(dhc.DS_SPATIAL, None)
                spatial_labels = []
                if spatial_values:
                    for value in spatial_values:
                        spatial = spatial_coverages.get(value)
                        if spatial and spatial.get('label') and spatial.get(
                                'label').get('es'):
                            spatial_labels.append(
                                spatial.get('label').get('es'))
                    spatials = _from_list_to_string(spatial_labels)
                    ds['spatial'] = _encode_value(spatials, True)

                #Temporal
                temporal_coverage = dataset.get(dhc.DS_TEMPORAL_COVERAGE)
                if temporal_coverage:
                    value = None
                    for tc in temporal_coverage.itervalues():
                        if tc:
                            tc_from = _from_iso8601_date_to_string(
                                tc.get('from', None))
                            tc_to = _from_iso8601_date_to_string(
                                tc.get('to', None))
                            if tc_from or tc_to:
                                if value:
                                    value = '%s%s%s-%s' % (value,
                                                           MAIN_SEPARATOR,
                                                           (tc_from or ''),
                                                           (tc_to or ''))
                                else:
                                    value = '%s-%s' % ((tc_from or ''),
                                                       (tc_to or ''))
                    ds['coverage_new'] = _encode_value(value, True)

                #Valid
                ds['valid'] = _encode_value(
                    _from_iso8601_date_to_string(
                        dataset.get(dhc.DS_VALID, None)), True)

                #References
                references = _from_list_to_string(
                    dataset.get(dhc.DS_REFERENCE, None))
                ds['references'] = _encode_value(references, True)

                #Normative
                conforms_to = _from_list_to_string(
                    dataset.get(dhc.DS_NORMATIVE, None))
                ds['conforms_to'] = _encode_value(conforms_to, True)

                #Resources
                resources = dataset.get(dhc.DS_RESOURCES)
                sresources = []
                if resources:
                    for resource in resources:
                        sresource = None
                        if resource:
                            name = _from_dict_to_string(
                                resource.get(dhc.DS_RESOURCE_NAME_TRANSLATED,
                                             None), 'TITLE_')
                            if not name:
                                name = ''
                            url = resource.get(dhc.DS_RESOURCE_ACCESS_URL, '')
                            if url:
                                url = '[ACCESS_URL]%s' % (url)

                            format_value = resource.get(
                                dhc.DS_RESOURCE_FORMAT, None)
                            format = None
                            if format_value:
                                if format_value in formats:
                                    format = formats.get(format_value, None)
                                else:
                                    formats[
                                        format_value] = sh.scheming_choices_label(
                                            format_values, format_value)
                                    format = formats.get(format_value, None)
                            if format:
                                format = '[MEDIA_TYPE]%s' % (format)
                            size = resource.get(dhc.DS_RESOURCE_BYTE_SIZE, '')
                            if size:
                                size = '[BYTE_SIZE]%s' % (size)
                            relation = _from_list_to_string(
                                resource.get(dhc.DS_RESOURCE_RELATION, None),
                                SECONDARY_SEPARATOR)
                            relations = ''
                            if relation:
                                relations = '[RELATION]%s' % (relation)
                            sresource = '%s%s%s%s%s' % (name, url, format,
                                                        size, relations)
                            if sresource and len(sresource) > 0:
                                sresources.append(sresource)
                if len(sresources) > 0:
                    value = None
                    for item in sresources:
                        if value:
                            value = '%s%s%s' % (value, MAIN_SEPARATOR, item)
                        else:
                            value = item
                ds['resources'] = _encode_value(value, True)

                num = num + 1
                datasets.append(ds)
            #log.debug('%s Datasets con datos a exportar=%s' % (method_log_prefix, datasets))
            log.debug('%s Numero de datasets con datos a exportar...%s' %
                      (method_log_prefix, num))
            output = losser.losser.table(datasets,
                                         columnsfilepath,
                                         csv=True,
                                         pretty=False)

        if filepath:
            file = None
            try:
                file = open(filepath, "w")
                file.write(output)
                file.close()
            except:
                if file and not file.closed:
                    file.close()

        end = datetime.datetime.now()
        log.debug(
            "%s Time in serialize %s catalog [%s] with %s datasets ... %s milliseconds"
            % (method_log_prefix, _format, filepath, total_datasets,
               int((end - ini).total_seconds() * 1000)))
    except Exception, e:
        log.error("%s Exception %s: %s" %
                  (method_log_prefix, type(e).__name__, e))
        output = None
示例#24
0
def get_field_label(name, is_resource=False):
    schema = scheming_get_dataset_schema('deposited-dataset')
    fields = schema['resource_fields'] if is_resource else schema[
        'dataset_fields']
    field = scheming_field_by_name(fields, name)
    return field.get('label', name)
示例#25
0
    def before_index(self, data_dict):
        dataset = sh.scheming_get_schema('dataset', 'dataset')
        if ('res_format' in data_dict):
            #Get format field
            formats = sh.scheming_field_by_name(dataset.get('resource_fields'),
                                                'format')

            #Create SOLR field
            data_dict['res_format_label'] = []
            for res_format in data_dict['res_format']:
                #Get format label
                res_format_label = sh.scheming_choices_label(
                    formats['choices'], res_format)
                if res_format_label:
                    #Add label to new SOLR field
                    data_dict['res_format_label'].append(res_format_label)

        if ('publisher' in data_dict):
            organismo = data_dict['publisher']
            if is_frontend():
                publisher = toolkit.get_action('dge_organization_publisher')(
                    {
                        'model': model
                    }, {
                        'id': organismo
                    })
            else:
                publisher = h.get_organization(organismo)
            data_dict['publisher'] = publisher.get('id')
            data_dict['publisher_display_name'] = publisher.get('display_name')
            administration_level_code = helpers.dge_get_organization_administration_level_code(
                publisher)
            if not administration_level_code or administration_level_code not in TRANSLATED_UNITS:
                administration_level_code = DEFAULT_UNIT
            data_dict['administration_level'] = administration_level_code
            data_dict['administration_level_es'] = TRANSLATED_UNITS[
                administration_level_code]['es'] or ''
            data_dict['administration_level_en'] = TRANSLATED_UNITS[
                administration_level_code]['en'] or ''
            data_dict['administration_level_ca'] = TRANSLATED_UNITS[
                administration_level_code]['ca'] or ''
            data_dict['administration_level_eu'] = TRANSLATED_UNITS[
                administration_level_code]['eu'] or ''
            data_dict['administration_level_gl'] = TRANSLATED_UNITS[
                administration_level_code]['gl'] or ''

        if ('theme' in data_dict):
            #Get theme field
            categoria = sh.scheming_field_by_name(
                dataset.get('dataset_fields'), 'theme')

            #Get theme value
            valor_categoria = data_dict['theme']

            #Empty theme values
            data_dict['theme'] = []
            data_dict['theme_id'] = []
            data_dict['theme_es'] = []
            data_dict['theme_en'] = []
            data_dict['theme_ca'] = []
            data_dict['theme_eu'] = []
            data_dict['theme_gl'] = []

            #Get key values
            valores = valor_categoria.replace('[', '').replace(']', '')
            categorias = valores.split('", "')
            #Get translated label for each key
            for term_categoria in list(categorias):
                clean_term = term_categoria.replace('"', '')
                data_dict['theme'].append(clean_term)
                data_dict['theme_id'].append(helpers.dge_theme_id(clean_term))
                #Look for label in the scheme
                for option in categoria.get('choices'):
                    if option['value'] == clean_term:
                        #Add label for each language
                        data_dict['theme_es'].append(option['label']['es'])
                        data_dict['theme_en'].append(option['label']['en'])
                        data_dict['theme_ca'].append(option['label']['ca'])
                        data_dict['theme_eu'].append(option['label']['eu'])
                        data_dict['theme_gl'].append(option['label']['gl'])
        return data_dict
示例#26
0
def _get_process_state_field(dataset_type):
    dataset_scheme = h.scheming_get_schema('dataset', dataset_type)
    if not dataset_scheme:
        return None
    fields = dataset_scheme['dataset_fields']
    return h.scheming_field_by_name(fields, "process_state")
    def import_stage(self, harvest_object):
        log.debug('In PRDREngergyResourcesHarvester import_stage')
        if not harvest_object:
            log.error('No harvest object received')
            return False

        self._set_config(harvest_object.job.source.config)

        if self.force_import:
            status = 'change'
        else:
            status = self._get_object_extra(harvest_object, 'status')
        if status == 'delete':
            context = {
                'model': model,
                'session': model.Session,
                'user': self._get_user_name()
            }

            p.toolkit.get_action('package_delete')(
                context, {
                    'id': harvest_object.package_id
                })
            log.info('Deleted package {0} with guid {1}'.format(
                harvest_object.package_id, harvest_object.guid))

            return True
        if harvest_object.content is None:
            self._save_object_error(
                'Empty content for object %s' % harvest_object.id,
                harvest_object, 'Import')
            return False

        # Get the last harvested object (if any)
        previous_object = model.Session.query(HarvestObject) \
            .filter(HarvestObject.guid == harvest_object.guid) \
            .filter(HarvestObject.current == True) \
            .first()

        # Flag previous object as not current anymore
        if previous_object and not self.force_import:
            previous_object.current = False
            previous_object.add()

        package_dict = self._get_package_dict(harvest_object)
        if not package_dict:
            return False

        if not package_dict.get('name'):
            package_dict['name'] = \
                self._get_package_name(harvest_object, package_dict['title'])

        # copy across resource ids from the existing dataset, otherwise they'll
        # be recreated with new ids

        if status == 'change':
            existing_dataset = self._get_existing_dataset(harvest_object.guid)
            if existing_dataset:
                copy_across_resource_ids(existing_dataset, package_dict)

        # Allow custom harvesters to modify the package dict before creating
        # or updating the package
        package_dict = self.modify_package_dict(package_dict, harvest_object)
        # Unless already set by an extension, get the owner organization (if
        # any) from the harvest source dataset
        if not package_dict.get('owner_org'):
            source_dataset = model.Package.get(harvest_object.source.id)
            if source_dataset.owner_org:
                package_dict['owner_org'] = source_dataset.owner_org

        if not package_dict.get('license_id'):
            package_dict['license_id'] = 'notspecified'

        # Flag this object as the current one
        harvest_object.current = True
        harvest_object.add()

        context = {
            'user': self._get_user_name(),
            'return_id_only': True,
            'ignore_auth': True,
        }

        package_schema = scheming_get_dataset_schema('dataset')
        field = scheming_field_by_name(package_schema['dataset_fields'],
                                       'member_countries')
        choices = scheming_field_choices(field)

        mem_temp_list = [
            x for x in package_dict['member_countries'] if x is not None
        ]
        package_dict['member_countries'] = [
            choice['value']
            for choice in choices if choice['label'] in mem_temp_list
        ] or ['other']

        polygons = [
            t['geometry'] for t in eez.collection
            if any(country in t['properties']['GeoName']
                   for country in mem_temp_list)
        ]
        # TODO: for now we are taking first polygon from possible
        # list because of SOLR restriction of spatial field
        # size. In future we may add additional logic here
        if polygons:
            package_dict['coverage'] = json.dumps(polygons[0])

        if status == 'new':
            # context['schema'] = package_schema

            # We need to explicitly provide a package ID
            package_dict['id'] = unicode(uuid.uuid4())
            # package_schema['id'] = [unicode]

            # Save reference to the package on the object
            harvest_object.package_id = package_dict['id']
            harvest_object.add()

            # Defer constraints and flush so the dataset can be indexed with
            # the harvest object id (on the after_show hook from the harvester
            # plugin)
            model.Session.execute(
                'SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
            model.Session.flush()
            package_id = \
                p.toolkit.get_action('package_create')(context, package_dict)
            log.info('Created dataset with id %s', package_id)

        elif status == 'change':
            package_dict['id'] = harvest_object.package_id
            try:
                package_id = \
                    p.toolkit.get_action('package_update')(context, package_dict)
                log.info('Updated dataset with id %s', package_id)
            except NotFound:
                log.info(
                    'Update returned NotFound, trying to create new Dataset.')
                if not harvest_object.package_id:
                    package_dict['id'] = unicode(uuid.uuid4())
                    harvest_object.package_id = package_dict['id']
                    harvest_object.add()
                else:
                    package_dict['id'] = harvest_object.package_id
                package_id = \
                    p.toolkit.get_action('package_create')(context, package_dict)
                log.info('Created dataset with id %s', package_id)
        model.Session.commit()
        stored_package = p.toolkit.get_action('package_show')(context.copy(), {
            'id': package_id
        })
        for res in stored_package.get('resources', []):
            p.toolkit.get_action('resource_create_default_resource_views')(
                context.copy(), {
                    'package': stored_package,
                    'resource': res
                })

        return True
    def graph_from_dataset(self, dataset_dict, dataset_ref):
        '''
        Given a CKAN dataset dict, creates an RDF graph

        The class RDFLib graph (accessible via `self.g`) should be updated on
        this method

        `dataset_dict` is a dict with the dataset metadata like the one
        returned by `package_show`. `dataset_ref` is an rdflib URIRef object
        that must be used to reference the dataset when working with the graph.
        '''
        method_log_prefix = '[%s][graph_from_dataset]' % type(
            self).__name__
        #log.debug('%s Init method. Inputs dataset_dict=%r, dataset_ref=%r' % (method_log_prefix, dataset_dict, dataset_ref))
        #log.debug('%s Init method. Inputs, dataset_ref=%r' % (method_log_prefix, dataset_ref))
        try:
            g = self.g

            for prefix, namespace in namespaces.iteritems():
                g.bind(prefix, namespace)

            g.add((dataset_ref, RDF.type, DCAT.Dataset))

            # Title
            self._add_translated_triple_field_from_dict(
                dataset_dict, dataset_ref, DCT.title, DS_TITLE_TRANSLATED, None)

            # Description
            self._add_translated_triple_field_from_dict(
                dataset_dict, dataset_ref, DCT.description, DS_DESCRIPTION, None)

            # Theme
            value = self._get_dict_value(dataset_dict, DS_THEME)
            if value:
                themes = dataset_dict.get(EXPORT_AVAILABLE_THEMES, {})
                for theme in value:
                    #self._add_resource_list_triple(dataset_ref, DCAT.theme, value)
                    theme_values = themes.get(theme, {})
                    labels = theme_values.get('label')
                    descriptions = theme_values.get('description')
                    dcat_ap = theme_values.get('dcat_ap')
                    notation = theme_values.get('notation')
                    self._add_resource_list_triple(
                        dataset_ref, DCAT.theme, theme, labels, descriptions, dcat_ap, notation)

            # Tags
            for tag in dataset_dict.get('tags', []):
                self.g.add(
                    (dataset_ref, DCAT.keyword, Literal(tag['name'])))

            # Identifier
            self._add_triple_from_dict(
                dataset_dict, dataset_ref, DCT.identifier, DS_IDENTIFIER, None, False, False)

            # Issued, Modified dates
            self._add_date_triple(dataset_ref, DCT.issued, self._get_value_from_dict(
                dataset_dict, DS_ISSUED_DATE, ['metadata_created']))
            self._add_date_triple(dataset_ref, DCT.modified, self._get_value_from_dict(
                dataset_dict, DS_MODIFIED_DATE, ['metadata_modified']))
            self._add_date_triple(dataset_ref, DCT.valid, self._get_value_from_dict(
                dataset_dict, DS_VALID, None))

            # Accrual periodicity
            frequency = dataset_dict.get(DS_FREQUENCY)
            if frequency:
                ftypes = {'seconds': TIME.seconds,
                          'minutes': TIME.minutes,
                          'hours': TIME.hours,
                          'days': TIME.days,
                          'weeks': TIME.weeks,
                          'months': TIME.months,
                          'years': TIME.years}
                ftype = frequency.get('type')
                fvalue = frequency.get('value')
                if ftype and ftype in ftypes.keys() and fvalue:
                    duration = BNode()
                    frequency = BNode()
                    g.add((frequency, RDF.type, DCT.Frequency))
                    g.add((duration, RDF.type, TIME.DurationDescription))
                    g.add((dataset_ref, DCT.accrualPeriodicity, frequency))
                    g.add((frequency, RDF.value, duration))
                    g.add((duration, ftypes.get(ftype), Literal(
                        fvalue, datatype=XSD.decimal)))

            # Languages
            self._add_triple_from_dict(
                dataset_dict, dataset_ref, DCT.language, DS_LANGUAGE, None, True, False)

            # Publisher
            pub_dir3 = False
            publishers = dataset_dict.get(
                EXPORT_AVAILABLE_PUBLISHERS, {})
            organization_id = dataset_dict.get('owner_org')
            if organization_id in publishers:
                publisher = publishers.get(organization_id)
            else:
                org = h.get_organization(organization_id, False)
                publisher = [None, None, None]
                if org:
                    publisher = [org.get('title'), None, None]
                    if org['extras']:
                        for extra in org.get('extras'):
                            if extra and 'key' in extra and extra['key'] == ORG_PROP_ID_UD_ORGANICA:
                                notation = extra.get('value')
                                if notation and notation != '':
                                    pub_dir3 = True
                                    publisher[1] = PUBLISHER_PREFIX + notation
                                    publisher[2] = notation
                if pub_dir3:
                    publishers[organization_id] = publisher
                    dataset_dict[EXPORT_AVAILABLE_PUBLISHERS] = publishers
                else:
                    #publisher 
                    organizations = cdh.csc_dcat_organizations_available()
                    publisher_ref = config.get('ckanext.csc_dcat.catalog.publisher', None)
                    if publisher_ref and len(publisher_ref.strip()) > 0:
                        publisher_ref = publisher_ref.strip()
                        publisher = [publisher_ref, None, None]
                        s_publisher = publisher_ref.upper().split('/')
                        if s_publisher and len(s_publisher) > 0:
                            organization_minhap = s_publisher[-1]
                            org = organizations.get(organization_minhap, None)
                            if org:
                                publisher = [org[1], PUBLISHER_PREFIX +
                                        organization_minhap, organization_minhap]
            if publisher[1]:
                self._add_resource_list_triple(
                        dataset_ref, DCT.publisher, publisher[1], publisher[0], None, None, publisher[2])
            else:
                g.add((dataset_ref, DCT.publisher, URIRef(publisher[0])))

            # Spatial Coverage
            value = self._get_dict_value(dataset_dict, DS_SPATIAL)
            if value:
                self._add_resource_list_triple(
                    dataset_ref, DCT.spatial, value)

            # Temporal
            temporal_coverage = self._get_dataset_value(
                dataset_dict, DS_TEMPORAL_COVERAGE)
            i = 1
            if temporal_coverage:
                for key, value in temporal_coverage.items():
                    if (value):
                        start = end = None
                        if 'from' in value:
                            start = value.get('from')
                        if 'to' in value:
                            end = value.get('to')
                        if start or end:
                            temporal_extent = URIRef(
                                "%s/%s-%s" % (dataset_ref, 'PeriodOfTime', i))
                            g.add(
                                (temporal_extent, RDF.type, DCT.PeriodOfTime))
                            if start:
                                self._add_date_triple(
                                    temporal_extent, SCHEMA.startDate, start)
                            if end:
                                self._add_date_triple(
                                    temporal_extent, SCHEMA.endDate, end)
                            g.add((dataset_ref, DCT.temporal, temporal_extent))
                            i = i+1

            # References
            value = self._get_dict_value(dataset_dict, DS_REFERENCE)
            if value:
                self._add_resource_list_triple(
                    dataset_ref, DCT.references, value)

            # Conforms To
            value = self._get_dict_value(dataset_dict, DS_NORMATIVE)
            if value:
                self._add_resource_list_triple(
                    dataset_ref, DCT.conformsTo, value)

            # License (dataset license)
            if dataset_dict.get(DS_LICENSE):
                g.add((dataset_ref, DCT.license, URIRef(
                        dataset_dict.get(DS_LICENSE))))

            # Distributions/Resources
            for resource_dict in dataset_dict.get('resources', []):
                uri_resource = '%s/resource/%s' % (
                    dataset_ref, resource_dict['id'])
                distribution = URIRef(uri_resource)
                g.add((dataset_ref, DCAT.distribution, distribution))
                g.add((distribution, RDF.type, DCAT.Distribution))

                # Identifier
                self._add_triple_from_dict(
                    resource_dict, distribution, DCT.identifier, DS_RESOURCE_IDENTIFIER, None, False, False)

                # Title
                self._add_translated_triple_field_from_dict(
                    resource_dict, distribution, DCT.title, DS_RESOURCE_NAME_TRANSLATED, None)

                # License (dataset license)
                if dataset_dict.get(DS_LICENSE):
                    g.add((distribution, DCT.license, URIRef(
                        dataset_dict.get(DS_LICENSE))))

                # Access URL
                if resource_dict.get(DS_RESOURCE_ACCESS_URL):
                    g.add((distribution, DCAT.accessURL, Literal(
                        resource_dict.get(DS_RESOURCE_ACCESS_URL), datatype=XSD.anyURI)))

                # Format
                if resource_dict.get(DS_RESOURCE_FORMAT, None):
                    imt = URIRef("%s/format" % uri_resource)
                    g.add((imt, RDF.type, DCT.IMT))
                    g.add((distribution, DCT['format'], imt))

                    format = resource_dict.get(
                        DS_RESOURCE_FORMAT, None)
                    formats = dataset_dict.get(
                        EXPORT_AVAILABLE_RESOURCE_FORMATS, {})
                    label = None
                    if format and format in formats:
                        label = formats.get(format, None)
                    else:
                        _dataset = sh.scheming_get_schema(
                            'dataset', 'dataset')
                        res_format = sh.scheming_field_by_name(_dataset.get('resource_fields'),
                                                               'format')
                        formats[format] = sh.scheming_choices_label(
                            res_format['choices'], format)
                        label = formats.get(format, None)
                        dataset_dict[EXPORT_AVAILABLE_RESOURCE_FORMATS] = formats
                    if label:
                        g.add((imt, RDFS.label, Literal(label)))
                    g.add((imt, RDF.value, Literal(
                        resource_dict[DS_RESOURCE_FORMAT])))

                # Size
                if resource_dict.get(DS_RESOURCE_BYTE_SIZE):
                    try:
                        g.add((distribution, DCAT.byteSize,
                               Literal(float(resource_dict[DS_RESOURCE_BYTE_SIZE]),
                                       datatype=XSD.decimal)))
                    except (ValueError, TypeError):
                        g.add((distribution, DCAT.byteSize,
                               Literal(resource_dict[DS_RESOURCE_BYTE_SIZE])))
                # Relation
                value = self._get_dict_value(
                    dataset_dict, DS_NORMATIVE)
                if value:
                    self._add_resource_list_triple(
                        distribution, DCT.relation, value)

        except Exception, e:
            log.error("%s [dataset_ref: %s]. Unexpected Error %s: %s" % (
                method_log_prefix, dataset_ref, type(e).__name__, e))